读书人

lucene4.4 目录的增删改查

发布时间: 2013-09-06 10:17:17 作者: rapoo

lucene4.4 索引的增删改查

package com.lucene.test;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.Date;

import org.apache.log4j.Logger;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field.Store;

import org.apache.lucene.document.IntField;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.DocsAndPositionsEnum;

import org.apache.lucene.index.Fields;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.MultiFields;

import org.apache.lucene.index.Term;

import org.apache.lucene.index.Terms;

import org.apache.lucene.index.TermsEnum;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.DocIdSetIterator;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.BytesRef;

import org.apache.lucene.util.Version;

publicclass IndexUtil {

privatestaticfinal Logger LOGGER = Logger.getLogger(IndexUtil.class);

private Directorydirectory = null;

private DirectoryReaderreader = null;

private IndexWriterConfigconfig = null;

private IndexWriterwriter = null;

publicstaticfinal IndexUtilInstance =new IndexUtil();

private IndexUtil() {

try {

directory = FSDirectory.open(new File("D:/lucene/index"));

config =new IndexWriterConfig(Version.LUCENE_44,

new StandardAnalyzer(Version.LUCENE_44));

} catch (IOException e) {

e.printStackTrace();

}

}

/**

*添加索引

*@throwsIOException

*/

publicvoid index()throws IOException {

writer =new IndexWriter(directory,config);

File file = new File("D:\\lucene\\example");

Document document = null;

int id = 0;

long start =new Date().getTime();

LOGGER.info("添加索引…………………………");

for (File f : file.listFiles()) {

document = new Document();

document.add(new StringField("name",f.getName(), Store.YES));

document.add(new IntField("id", id++,Store.YES));

document.add(new StringField("path",f.getAbsolutePath(), Store.YES));

document.add(new TextField("context",new FileReader(f)));

writer.addDocument(document);

}

long end =new Date().getTime();

LOGGER.info("添加索引完成,用时:" + (end - start) / 1000.0 +"s…………………………");

writer.close();

}

/**

*查询索引

*@throwsIOException

*@throwsParseException

*/

publicvoid search()throws IOException, ParseException {

reader = DirectoryReader.open(directory);

QueryParser parser = newQueryParser(Version.LUCENE_44,"context",

new StandardAnalyzer(Version.LUCENE_44));

Query query = parser.parse("lucene");

IndexSearcher searcher =new IndexSearcher(reader);

TopDocs docs = searcher.search(query,100);

/**

*reader.maxDoc()包含索引文档的总数包含可用的和已经删除的数量

*reader.numDocs()当前可用的索引文档的数量不包含已经删除的

*reader.numDeletedDocs()删除的索引文档的数量

*/

LOGGER.info("总记录:" + docs.totalHits + " 命中文档数:" + docs.scoreDocs.length

+ " 最大的文档数maxDoc:" +reader.maxDoc() +" 删除文件数numDeletedDocs:"

+ reader.numDeletedDocs() + " numDocs" +reader.numDocs());

for (ScoreDoc doc : docs.scoreDocs) {

Document document = reader.document(doc.doc);

LOGGER.info("id:" +document.get("id") +" name:"

+ document.get("name") +" path:" + document.get("path"));

}

reader.close();

}

/**

*更新索引

*@throwsIOException

*/

publicvoid update()throws IOException {

writer =new IndexWriter(directory,config);

Document document = new Document();

document.add(new StringField("name","新文件", Store.YES));

document.add(new IntField("id", 12, Store.YES));

document.add(new StringField("path","D:\\lucene\\example\\新文件.txt", Store.YES));

writer.updateDocument(new Term("id","2"),document);

writer.commit();

writer.close();

}

/**

*删除索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)

*@throwsIOException

*/

publicvoid delete()throws IOException {

writer =new IndexWriter(directory,config);

writer.deleteDocuments(new Term("name","11.txt"));

writer.close();

}

/**

*删除所有的索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)

*@throwsIOException

*/

publicvoid deleteAll()throws IOException {

writer =new IndexWriter(directory,config);

writer.deleteAll();

writer.close();

}

/**

*删除已经删除的索引对应上一个删除方法删除回收站的文件

*@throwsIOException

*/

publicvoid forceMergeDeletes()throws IOException {

writer =new IndexWriter(directory,config);

writer.forceMergeDeletes();//清空回收站

writer.close();

}

/**

*显示所有的索引

*@throwsIOException

*/

publicvoid showIndex()throws IOException {

reader = DirectoryReader.open(directory);

Fields fields = MultiFields.getFields(reader);//获取directory中所有的field

for (String field : fields) {

LOGGER.info(field);

}

//显示 field中 context的所有的分词

Terms terms = fields.terms("context");

TermsEnum termsEnum = terms.iterator(null);

BytesRef term = null;

while ((term=termsEnum.next()) !=null) {

System.out.print(term.utf8ToString()+"\t");//分词的内容

System.out.print(termsEnum.docFreq()+"\t");//出现该分词的有文档的数量

System.out.print(termsEnum.totalTermFreq()+"\t");//分词的总数

DocsAndPositionsEnumdocsAndPositionsEnum = termsEnum.docsAndPositions(null,null);

//如果要查询的字段没有被分词,docsAndPositionsEnum就会为空继续循环

if(docsAndPositionsEnum==null){

continue;

}

int docId ;

while ((docId = docsAndPositionsEnum.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) {

Document document =reader.document(docId);//获取document对象

System.out.print(docId+"\t");//分词的总数

System.out.print(document.get("name")+"\t");//可以获取document中field的值

int freq = docsAndPositionsEnum.freq();//该document中该分词出现的次数

for (int i = 0; i < freq; i++) {

System.out.print(docsAndPositionsEnum.nextPosition()+":");//分词的位置

System.out.print("["+docsAndPositionsEnum.startOffset()+"");//分词起始偏移量的位置

System.out.print(docsAndPositionsEnum.endOffset()+"],");//分词结束偏移量的位置

System.out.print(docsAndPositionsEnum.getPayload()+"\t");

}

}

System.out.println();

}

reader.close();

}

}

读书人网 >编程

热点推荐