读书人

lucene的汉语分词器

发布时间: 2012-10-28 09:54:44 作者: rapoo

lucene的中文分词器
package?analyzer;
lucene的汉语分词器
lucene的汉语分词器import?java.io.Reader;
lucene的汉语分词器import?java.io.StringReader;
lucene的汉语分词器
lucene的汉语分词器import?org.apache.lucene.analysis.Analyzer;
lucene的汉语分词器import?org.apache.lucene.analysis.StopFilter;
lucene的汉语分词器import?org.apache.lucene.analysis.Token;
lucene的汉语分词器import?org.apache.lucene.analysis.TokenFilter;
lucene的汉语分词器import?org.apache.lucene.analysis.TokenStream;
lucene的汉语分词器import?org.apache.lucene.analysis.cjk.CJKAnalyzer;
lucene的汉语分词器import?org.apache.lucene.analysis.cn.ChineseAnalyzer;
lucene的汉语分词器import?org.apache.lucene.analysis.standard.StandardAnalyzer;
lucene的汉语分词器import?org.mira.lucene.analysis.IK_CAnalyzer;
lucene的汉语分词器import?org.mira.lucene.analysis.MIK_CAnalyzer;
lucene的汉语分词器
lucene的汉语分词器import?com.sohospace.lucene.analysis.xanalyzer.XAnalyzer;
lucene的汉语分词器import?com.sohospace.lucene.analysis.xanalyzer.XFactory;
lucene的汉语分词器import?com.sohospace.lucene.analysis.xanalyzer.XTokenizer;
lucene的汉语分词器//中文分词使用了Paoding的分词技术,特表示感谢
lucene的汉语分词器lucene的汉语分词器public?class?TestCJKAnalyzer?lucene的汉语分词器{
lucene的汉语分词器????private?static?String?testString1?=?"中华人民共和国在1949年建立,从此开始了新中国的伟大篇章";
lucene的汉语分词器????private?static?String?testString2?=?"比尔盖茨从事餐饮业和服务业方面的工作";
lucene的汉语分词器lucene的汉语分词器????public?static?void?testStandard(String?testString)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器????????Analyzer?analyzer?=?new?StandardAnalyzer();??????
lucene的汉语分词器????????Reader?r?=?new?StringReader(testString);??????
lucene的汉语分词器????????StopFilter?sf?=?(StopFilter)?analyzer.tokenStream("",?r);
lucene的汉语分词器????????System.err.println("=====standard?analyzer====");
lucene的汉语分词器????????System.err.println("分析方法:默认没有词只有字");
lucene的汉语分词器????????Token?t;??????
lucene的汉语分词器lucene的汉语分词器????????while?((t?=?sf.next())?!=?null)?lucene的汉语分词器{??????
lucene的汉语分词器????????????System.out.println(t.termText());??????
lucene的汉语分词器????????}?????
lucene的汉语分词器????}
lucene的汉语分词器lucene的汉语分词器????public?static?void?testCJK(String?testString)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器????????Analyzer?analyzer?=?new?CJKAnalyzer();??????
lucene的汉语分词器????????Reader?r?=?new?StringReader(testString);??????
lucene的汉语分词器????????StopFilter?sf?=?(StopFilter)?analyzer.tokenStream("",?r);
lucene的汉语分词器????????System.err.println("=====cjk?analyzer====");
lucene的汉语分词器????????System.err.println("分析方法:交叉双字分割");
lucene的汉语分词器????????Token?t;??????
lucene的汉语分词器lucene的汉语分词器????????while?((t?=?sf.next())?!=?null)?lucene的汉语分词器{??????
lucene的汉语分词器????????????System.out.println(t.termText());??????
lucene的汉语分词器????????}?????
lucene的汉语分词器????}
lucene的汉语分词器lucene的汉语分词器????public?static?void?testChiniese(String?testString)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器????????Analyzer?analyzer?=?new?ChineseAnalyzer();??????
lucene的汉语分词器????????Reader?r?=?new?StringReader(testString);??????
lucene的汉语分词器????????TokenFilter?tf?=?(TokenFilter)?analyzer.tokenStream("",?r);
lucene的汉语分词器????????System.err.println("=====chinese?analyzer====");
lucene的汉语分词器????????System.err.println("分析方法:基本等同StandardAnalyzer");
lucene的汉语分词器????????Token?t;??????
lucene的汉语分词器lucene的汉语分词器????????while?((t?=?tf.next())?!=?null)?lucene的汉语分词器{??????
lucene的汉语分词器????????????System.out.println(t.termText());??????
lucene的汉语分词器????????}?????
lucene的汉语分词器????}
lucene的汉语分词器lucene的汉语分词器????public?static?void?testPaoding(String?testString)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器????????XAnalyzer?analyzer?=?XFactory.getQueryAnalyzer();???
lucene的汉语分词器????????Reader?r?=?new?StringReader(testString);???
lucene的汉语分词器????????XTokenizer?ts?=?(XTokenizer)?analyzer.tokenStream("",?r);???
lucene的汉语分词器????????System.err.println("=====paoding?analyzer====");
lucene的汉语分词器????????System.err.println("分析方法:字典分词,去掉停止词。在字典不能匹配的情况下使用CJKAnalyzer的分割发。");
lucene的汉语分词器????????Token?t;???
lucene的汉语分词器lucene的汉语分词器????????while?((t?=?ts.next())?!=?null)?lucene的汉语分词器{???
lucene的汉语分词器???????????System.out.println(t.termText());???
lucene的汉语分词器????????}???
lucene的汉语分词器????}
lucene的汉语分词器lucene的汉语分词器????public?static?void?testJe(String?testString)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器//????????Analyzer?analyzer?=?new?MIK_CAnalyzer();
lucene的汉语分词器????????Analyzer?analyzer?=?new?IK_CAnalyzer();
lucene的汉语分词器????????Reader?r?=?new?StringReader(testString);?
lucene的汉语分词器????????TokenStream?ts?=?(TokenStream)analyzer.tokenStream("",?r);
lucene的汉语分词器????????System.err.println("=====je?analyzer====");
lucene的汉语分词器????????System.err.println("分析方法:字典分词,正反双向搜索,具体不明");
lucene的汉语分词器????????Token?t;???
lucene的汉语分词器lucene的汉语分词器????????while?((t?=?ts.next())?!=?null)?lucene的汉语分词器{???
lucene的汉语分词器???????????System.out.println(t.termText());???
lucene的汉语分词器????????}???
lucene的汉语分词器????}
lucene的汉语分词器lucene的汉语分词器????public?static?void?main(String[]?args)?throws?Exceptionlucene的汉语分词器{
lucene的汉语分词器//????????String?testString?=?testString1;
lucene的汉语分词器????????String?testString?=?testString1;
lucene的汉语分词器????????System.out.println(testString);
lucene的汉语分词器????????
lucene的汉语分词器????????testStandard(testString);
lucene的汉语分词器????????testCJK(testString);
lucene的汉语分词器????????testPaoding(testString);
lucene的汉语分词器????????
lucene的汉语分词器//????????testChiniese(testString);
lucene的汉语分词器//????????testJe(testString);
lucene的汉语分词器????}
lucene的汉语分词器
lucene的汉语分词器}
lucene的汉语分词器

?

读书人网 >编程

热点推荐