lucene 3.0 使用片段(转)
<%@ page import = "org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer" %>
<%@ page import = "org.apache.lucene.document.*" %>
<%@ page import = "org.apache.lucene.index.*" %>
<%@ page import = "org.apache.lucene.store.*" %>
<%@ page import = "org.apache.lucene.util.*" %>
设定分词器
SmartChinese 是Lucene较新版本内置的强大分词器,当然你也可以使用第三方的,如 IKAnalyzer 等
Analyzer analyzer = new IKAnalyzer();
//Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
Document doc = new Document();
Field f = null;
索引类
IndexWriter writer = new IndexWriter(FSDirectory.open( new File(request.getRealPath("网站的目录") ) ),
???????????????????????????????????? analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
for( 循环要索引的东东 )
{
??? 新文档(这里的文档,其实也可以认为相当于数据库的“行”,当然,如果索引的对象正好是个文件,那文档这种叫法就很合理了)
??? doc = new Document();
???
??? doc.add( new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED) );
??? doc.add( new Field("title", rs.getString("title"), Field.Store.YES, Field.Index.ANALYZED) );
???
??? d = new SimpleDateFormat("yyyy-MM-dd").format( new java.util.Date( rs.getLong("shadow_time") ) ).toString();
??? f = new Field("shadowtime", d, Field.Store.YES, Field.Index.NOT_ANALYZED);
??? doc.add(f);
???
??? writer.addDocument(doc);
}
索引必须严格按照这样的方式去创建,如果按2.4版以下的方式,会出现方法已经过期的警告,在3.0中一些方法甚至是直接不能用的,具体是哪些可以看相关的API。
<%@ page import = "org.apache.lucene.store.*" %>
<%@ page import = "org.apache.lucene.util.Version" %>
索引目录
String indexPath = request.getRealPath("网站目录");
IndexReader reader = null;
long startTime = (new java.util.Date()).getTime();??
???
打开索引
try {
??? reader = IndexReader.open( FSDirectory.open(new File(indexPath)) , true );
}
catch (Exception e) {
??? ;
}
版本:<%=reader.getVersion()%> 文档:<%=pagination.getTotalResult()%> 个<br />
TermDocs tdocs = reader.termDocs();
int i = 0;
while( tdocs.next() )
{
??? i++;
???????????
??? Document doc = reader.document( tdocs.doc() );
???????????
??? String docid = doc.get("id");
??? String title = doc.get("title");
??? String area = doc.get("shadowtime");
???
??? ....
}
二、搜索索引
除了上面引入的包外,还需要引入
<%@ page import = "org.apache.lucene.search.*" %>
<%@ page import = "org.apache.lucene.queryParser.*" %>
//索引目录
String indexPath = request.getRealPath("网站目录");
???
Analyzer analyzer = new IKAnalyzer();
???
IndexSearcher searcher = null;
IndexReader reader = null;
Query query = null;
TopDocs hits = null;
//打开索引目录
try
{
??? reader = IndexReader.open( FSDirectory.open(new File(indexPath)) , true );
??? searcher = new IndexSearcher(reader);
}
catch (Exception e)
{
??? return;
}
执行查询
try
{?
??? //多字段搜索,这里要搜索多少个字段就要有多个个参数
??? BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
???
??? String[] fieldnames = new String[] { "title", "content"};
??? IKQueryParser ikquery = new IKQueryParser();
??? query = ikquery.parseMultiField(fieldnames, queryString , clauses);
???????????????????
??? //获取分词结果(这个只是便于对搜索结果输出加亮之类,并非必要选项)
??? String wd = "";
??? Reader r = new StringReader(queryString);
??? org.apache.lucene.analysis.TokenStream tsi = analyzer.tokenStream("", r);
??? org.apache.lucene.analysis.Token token;
??? while ( tsi.incrementToken() )
??? {
??????? wd = tsi.toString().replaceAll("\(\(|,(.*)", "");
??????? spword += (spword=="" ? wd : "," + wd );
??? }
??? tsi.close();
???
}
catch (Exception e)
{???????????????????????
???? return ;
}
读取搜索结果
hits = searcher.search(query, 0);
for (int i = startindex; i < hits.totalHits; i++)
{
??? if(i >= hits.totalHits) { break; }
???????????
??? Document doc = searcher.doc(hits.scoreDocs[i].doc);
???????????
??? String docid = doc.get("id");
??? String title = doc.get("title");
??
?? .....................
}