读书人

lucene indexer/searcher简略代码示例

发布时间: 2012-12-25 16:18:28 作者: rapoo

lucene indexer/searcher简单代码示例
仅供拷贝

<!--pom.xml--><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>3.0.0</version></dependency> 


package player.kent.chen.temp.lucene;import java.io.File;import java.io.FileReader;import java.io.IOException;import org.apache.commons.io.FileUtils;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class MyLuceneIndexer {    public static void main(String[] args) throws Exception {        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene";        File contentDir = new File(rootDir, "content");        File indexDir = new File(rootDir, "index");        FileUtils.deleteDirectory(indexDir);        indexDir.mkdirs();        long begin = now();        doIndex(contentDir, indexDir);        System.out.println("Done in miliseconds of : " + (now() - begin));    }    private static void doIndex(File cd, File id) throws IOException {        Directory indexDir = FSDirectory.open(id);        IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30),                true, IndexWriter.MaxFieldLength.UNLIMITED);        File[] files = cd.listFiles();        for (File file : files) {            System.out.println("Indexing ... " + file.getAbsolutePath());            Document doc = new Document();            doc.add(new Field("contents", new FileReader(file)));            doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES,                    Field.Index.NOT_ANALYZED));            writer.addDocument(doc);        }        writer.numDocs();        writer.close();    }    private static long now() {        return System.currentTimeMillis();    }}


package player.kent.chen.temp.lucene;import java.io.File;import java.text.MessageFormat;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class MyLuceneSearcher {    public static void main(String[] args) throws Exception {        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene";        File id = new File(rootDir, "index");        String keyword = "搜索";        Directory indexDir = FSDirectory.open(id);        IndexSearcher is = new IndexSearcher(indexDir);        QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(                Version.LUCENE_30));        Query query = qp.parse(keyword);        long begin = now();        TopDocs hits = is.search(query, 10);        System.out.println(MessageFormat.format("Found {0} matches in {1} milliseconds",                hits.totalHits, now() - begin));        System.out.println("They are:");        for (ScoreDoc scoreDoc : hits.scoreDocs) {            Document doc = is.doc(scoreDoc.doc);            String file = doc.get("filepath");            String grepCmd = MessageFormat.format("cat {0} | grep -5 {1}", file, keyword);            System.out.println("Please do: " + grepCmd);        }        is.close();    }    private static long now() {        return System.currentTimeMillis();    }}

读书人网 >编程

热点推荐