乡下人产国偷v产偷v自拍,国产午夜片在线观看,婷婷成人亚洲综合国产麻豆,久久综合给合久久狠狠狠9

  • <output id="e9wm2"></output>
    <s id="e9wm2"><nobr id="e9wm2"><ins id="e9wm2"></ins></nobr></s>

    • 分享

      Lucene 3.0 入門實(shí)例

       tinglu10 2010-12-18
      lucene3.0已于2009-11-25發(fā)布啦,但網(wǎng)上的入門實(shí)例都是針對lucene3.0以前的,相對于以前的版本,貌似改動(dòng)不小。

      本人從零開始學(xué)習(xí)《lucene in action中文版》,并結(jié)合lucene3.0文檔寫了個(gè)入門實(shí)例,可供像我一樣直接從lucene3.0開始學(xué)習(xí)的初學(xué)者參考?。?font color=#0000ff>變化大的地方用藍(lán)字標(biāo)出來了)

      入門實(shí)例:

      1.預(yù)處理:先把網(wǎng)上下載的一個(gè)《三國演義》電子書“三國演義.txt”(可用其他代替,呵呵)切割成多個(gè)小文件。
      /**
      * @author ht
      * 預(yù)處理
      *
      */
      public class FilePreprocess {
      public static void main(String[] arg){
      String outputpath = "D:\\test\\small\\";//小文件存放路徑
      String filename = "D:\\test\\三國演義.txt";//原文件存放路徑
      if(!new File(outputpath).exists()){
      new File(outputpath).mkdirs();
      }

      splitToSmallFiles(new File(filename), outputpath);
      }
      /**大文件切割為小的
      * @param file
      * @param outputpath
      */
      public static void splitToSmallFiles(File file ,String outputpath){
      int filePointer = 0;
      int MAX_SIZE = 10240;//小文件大小
      String filename = "output";//小文件的文件名前綴

      BufferedWriter writer = null;
      try {
      BufferedReader reader = new BufferedReader(new FileReader(file));
      StringBuffer buffer = new StringBuffer();
      String line = reader.readLine();
      while(line != null){
      buffer.append(line).append("\r\n");
      if(buffer.toString().getBytes().length>=MAX_SIZE){
      writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));
      writer.write(buffer.toString());
      writer.close();
      filePointer++;
      buffer=new StringBuffer();
      }
      line = reader.readLine();              
      }
      writer = new BufferedWriter(new FileWriter(outputpath+filename+filePointer+".txt"));
      writer.write(buffer.toString());
      writer.close();
      System.out.println("The file hava splited to small files !");
      } catch (FileNotFoundException e) {
      System.out.println("file not found !");
      e.printStackTrace();
      } catch (IOException e) {
      e.printStackTrace();
      }      
      }

      2.用lucene3.0生成索引類:用lencene3.0對生成的多個(gè)小文件進(jìn)行索引,中文分詞用的是lucene3.0自帶的StandardAnalyzer.
      /**
      * @author ht
      * 索引生成
      *
      */
      public class Indexer {
      private static String INDEX_DIR = "D:\\test\\index";//索引存放目錄
      private static String DATA_DIR = "D:\\test\\small\\";//小文件存放的目錄

      public static void main(String[] args) throws Exception {

      long start = new Date().getTime();
      int numIndexed = index(new File(INDEX_DIR), new File(DATA_DIR));//調(diào)用index方法
      long end = new Date().getTime();
      System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds");
      }

      /**索引dataDir下的.txt文件,并儲存在indexDir下,返回索引的文件數(shù)量
      * @param indexDir
      * @param dataDir
      * @return int
      * @throws IOException
      */
      public static int index(File indexDir, File dataDir) throws IOException {

      if (!dataDir.exists() || !dataDir.isDirectory()) {
      throw new IOException(dataDir + " does not exist or is not a directory");
      }

      IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_CURRENT), true,   
      IndexWriter.MaxFieldLength.LIMITED);


      indexDirectory(writer, dataDir);//調(diào)用indexDirectory方法
      int numIndexed = writer.numDocs();
      writer.optimize();
      writer.close();
      return numIndexed;
      }

      /**循環(huán)遍歷目錄下的所有.txt文件并進(jìn)行索引
      * @param writer
      * @param dir
      * @throws IOException
      */
      private static void indexDirectory(IndexWriter writer, File dir)
      throws IOException {

      File[] files = dir.listFiles();

      for (int i = 0; i < files.length; i++) {
      File f = files[i];
      if (f.isDirectory()) {
      indexDirectory(writer, f); // recurse
      } else if (f.getName().endsWith(".txt")) {
      indexFile(writer, f);
      }
      }
      }

      /**對單個(gè)txt文件進(jìn)行索引
      * @param writer
      * @param f
      * @throws IOException
      */
      private static void indexFile(IndexWriter writer, File f)
      throws IOException {

      if (f.isHidden() || !f.exists() || !f.canRead()) {
      return;
      }

      System.out.println("Indexing " + f.getCanonicalPath());
      Document doc = new Document();
      doc.add(new Field("contents",new FileReader(f)));
      doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));


      writer.addDocument(doc);
      }
      }


      3.查詢類:查詢“玄德”!
      /**
      * @author ht
      * 查詢
      *
      */
      public class Searcher {
      private static String INDEX_DIR = "D:\\test\\index\\";//索引所在的路徑
      private static String KEYWORD = "玄德";//關(guān)鍵詞
      private static int TOP_NUM = 100;//顯示前100條結(jié)果

      public static void main(String[] args) throws Exception {
      File indexDir = new File(INDEX_DIR);
      if (!indexDir.exists() || !indexDir.isDirectory()) {
      throw new Exception(indexDir +
      " does not exist or is not a directory.");
      }
      search(indexDir, KEYWORD);//調(diào)用search方法進(jìn)行查詢
      }
      /**查詢
      * @param indexDir
      * @param q
      * @throws Exception
      */
      public static void search(File indexDir, String q) throws Exception {
      IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir),true);//read-only
      String field = "contents";

      QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, new StandardAnalyzer(Version.LUCENE_CURRENT));
      Query query = parser.parse(q);


      TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);

      long start = new Date().getTime();// start time

      is.search(query, collector);
      ScoreDoc[] hits = collector.topDocs().scoreDocs;


      System.out.println(hits.length);
      for (int i = 0; i < hits.length; i++) {
      Document doc = is.doc(hits[i].doc);//new method is.doc()
      System.out.println(doc.getField("filename")+"   "+hits[i].toString()+" ");
      }
      long end = new Date().getTime();//end time

      System.out.println("Found " + collector.getTotalHits() +
      " document(s) (in " + (end - start) +
      " milliseconds) that matched query '" +
      q + "':");
      }
      }

        本站是提供個(gè)人知識管理的網(wǎng)絡(luò)存儲空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點(diǎn)。請注意甄別內(nèi)容中的聯(lián)系方式、誘導(dǎo)購買等信息,謹(jǐn)防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點(diǎn)擊一鍵舉報(bào)。
        轉(zhuǎn)藏 分享 獻(xiàn)花(0

        0條評論

        發(fā)表

        請遵守用戶 評論公約

        類似文章 更多