第一个搜索引擎: 为一个文本文件添加索引 以便检索
目录如下:
d:\\index 存放索引文件
D:\testfolder 存放大文件分割好的小文件(很多)
d:/book.txt 存放原始文件
?
import? java.io.IOException;??
?
import? org.apache.lucene.analysis.Analyzer;??
import? org.apache.lucene.analysis.standard.StandardAnalyzer;??
import? org.apache.lucene.queryParser.ParseException;??
import? org.apache.lucene.queryParser.QueryParser;??
import? org.apache.lucene.search.Hits;??
import? org.apache.lucene.search.IndexSearcher;??
import? org.apache.lucene.search.Query;??
?
?public?? class? TestQuery?? {??
???? public?? static?? void? main(String[] args)? throws? IOException, ParseException?? {??
??????? Hits hits? =?? null ;??
??????? String queryString? =?? "把原来可能是军心涣散的溃退变成一场精神抖擞的胜利进军.进军到战略要地西北去,无疑是他们大战役的第二个基本原因,他们正确地预见到这个地区要对中,日,苏的当前命运将起决定性的作用.后来的历史证明,他们强调这个原因是完" ;??
??????? Query query? =?? null ;??
??????? IndexSearcher searcher? =?? new? IndexSearcher( "d:\\index" );??
?
??????? Analyzer analyzer? =?? new? StandardAnalyzer();??
???????? try??? {??
??????????? QueryParser qp? =?? new? QueryParser( "body" , analyzer);??
??????????? query? =? qp.parse(queryString);??
??????? }?? catch? (ParseException e)?? {??
??????? }???
???????? if? (searcher? !=?? null )?? {??
??????????? hits? =? searcher.search(query);??
???????????? if? (hits.length()? >?? 0 )?? {???
??????????????? System.out.println( " 找到: "?? +? hits.length()? +?? "? 个结果! " );??
????????????? //? System.out.println(searcher.getIndexReader().document(? ));
??????????? }???
???????????? for(int i=0; i<hits.length(); i++){
??????????? ? System.out.println(hits.id(i));
???????????? }
??????? }
????????
?
??? }?
?
}??
?
?
?
?
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
?
?/** */ /**??
?* author lighter date 2006-8-7?
? */??
?public?? class?? TextFileIndexer???? {??
???? public?? static?? void? main(String[] args)? throws? Exception?? {??
???????? /**/ /*? 指明要索引文件夹的位置,这里是C盘的S文件夹下? */??
??????? File fileDir? =?? new? File( "d:\\testfolder" );??
?
???????? /**/ /*? 这里放索引文件的位置? */??
??????? File indexDir? =?? new? File( "d:\\index" );??
??????? Analyzer luceneAnalyzer? =?? new? StandardAnalyzer();??
??????? IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer, true);?
??????? File[] textFiles? =? fileDir.listFiles();??
???????? long? startTime? =?? new? Date().getTime();??
??????????
???????? // 增加document到索引去???
????????? for? ( int? i? =?? 0 ; i? <? textFiles.length; i ++ )?? {??
???????????? if? (textFiles[i].isFile()??
???????????????????? &&? textFiles[i].getName().endsWith( ".txt" ))?? {??
??????????????? System.out.println( " File? "?? +? textFiles[i].getCanonicalPath()??
???????????????????????? +?? " 正在被索引.... " );??
??????????????? String temp? =? FileReaderAll(textFiles[i].getCanonicalPath(),??
???????????????????????? "GBK" );??
??????????????? System.out.println(temp);??
??????????????? Document document? =?? new? Document();??
??????????????? Field FieldPath? =?? new? Field( "path" , textFiles[i].getPath(),??
??????????????????????? Field.Store.YES, Field.Index.NO);??
??????????????? Field FieldBody? =?? new? Field( "body" , temp, Field.Store.YES,??
??????????????????????? Field.Index.TOKENIZED,??
??????????????????????? Field.TermVector.WITH_POSITIONS_OFFSETS);??
??????????????? document.add(FieldPath);??
??????????????? document.add(FieldBody);??
??????????????? indexWriter.addDocument(document);??
??????????? }???
??????? }???
???????? // optimize()方法是对索引进行优化???
??????? indexWriter.optimize();??
??????? indexWriter.close();??
??????????
???????? // 测试一下索引的时间???
???????? long? endTime? =?? new? Date().getTime();??
??????? System.out??
??????????????? .println( " 这花费了 "??
???????????????????????? +? (endTime? -? startTime)??
???????????????????????? +?? "? 毫秒来把文档增加到索引里面去! "??
???????????????????????? +? fileDir.getPath());??
??? }???
?
???? public?? static? String FileReaderAll(String FileName, String charset)??
???????????? throws? IOException?? {??
??????? BufferedReader reader? =?? new? BufferedReader( new? InputStreamReader(??
???????????????? new? FileInputStream(FileName), charset));??
??????? String line? =?? new? String();??
??????? String temp? =?? new? String();??
??????????
???????? while? ((line? =? reader.readLine())? !=?? null )?? {??
??????????? temp? +=? line;??
??????? }???
??????? reader.close();??
???????? return? temp;??
??? }???
}??