lucene Analyzer 庖丁解牛 中文分词
??? /*
??? ?*param?? 分词
??? ?*/
??? public List getname(String param) throws IOException{
??? ??? //分词(庖丁解牛分词法)
??? ??? Analyzer ika = new PaodingAnalyzer();
??? ??? List<String> keys = new ArrayList<String>();
??? ??? ??? TokenStream ts = null;
??? ??? ???
??? ??? ??? try{
??? ??? ??? ??? Reader r = new StringReader(param);
??? ??? ??? ??? ts = ika.tokenStream("TestField", r);
??? ??? ??? ??? TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class);
??? ??? ??? ??? TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class);
??? ??? ??? ??? String key = null;
??? ??? ??? ??? while (ts.incrementToken()) {
??? ??? ??? ??? ??? if ("word".equals(typeAtt.type())) {
??? ??? ??? ??? ??? ??? key = termAtt.term();
??? ??? ??? ??? ??? ??? if (key.length() >= 2) {
??? ??? ??? ??? ??? ??? ??? keys.add(key);
??? ??? ??? ??? ??? ??? }
??? ??? ??? ??? ??? }
??? ??? ??? ??? }
??? ??? ??? }catch(IOException e){
??? ??? ??? ??? e.printStackTrace();
??? ??? ??? } finally {
??? ??? ??? ??? if (ts != null) {
??? ??? ??? ??? ??? ts.close();
??? ??? ??? ??? }
??? ??? ??? }
??? ??? ???
??? ??? ??? Map<String, Integer> keyMap = new HashMap<String, Integer>();
??? ??? ??? Integer $ = null;
??? ??? ??? //计算每个词出现的次数
??? ??? ??? for (String key : keys) {
??? ??? ??? ??? keyMap.put(key, ($ = keyMap.get(key)) == null ? 1 : $ + 1);
??? ??? ??? }
??? ??? ??? List<Map.Entry<String, Integer>> keyList = new ArrayList<Map.Entry<String, Integer>>(keyMap.entrySet());
??? ??? ??? //进行排序
??? ??? ??? Collections.sort(keyList, new Comparator<Map.Entry<String, Integer>>() {
??? ??? ??? ??? public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
??? ??? ??? ??? ??? return (o2.getValue() - o1.getValue());
??? ??? ??? ??? }
??? ??? ??? });
??? ??? ??? //取出关键词
??? ??? ??? String id = null;
??? ??? ??? String str = "";
??? ??? ??? List list = new ArrayList();
??? ??? ??? if(keyList.size() >0){
??? ??? ??? ??? for (int i = 0;i < keyList.size(); i++) {
??? ??? ??? ??? ??? ?id = keyList.get(i).toString();
??? ??? ??? ??? ??? ?String[] strs = id.split("\\=");
??? ??? ??? ??? ??? ?str = strs[0];
??? ??? ??? ??? ??? ?list.add(strs[0]);
??? ??? ??? ??? ??? ?System.out.println("id:"+id);
??? ??? ??? ??? }
??? ??? ??? }
??? ??? ??? return list;
??? }