读书人

利用lucene对整个数据库建立目录(luce

发布时间: 2012-12-25 16:18:28 作者: rapoo

利用lucene对整个数据库建立索引(lucene,SQL,JDBC)(

导言:

如果要对整个数据库做精确查询或模糊查询,我们怎么才可以做到?还是通过SQL查询吗?答案是否定的。因为,通过SQL对整个数据库做精确查询或模糊查询,速度将非常的慢;

lucene解决了这个问题。通过对表或者文本文件预先建立索引,可以很快的实现全文检索。

思路:

1、通过SQL得到所有表名的集合---->2、遍历所有的表,分别为每个表的每个记录建立索引;同时添加表的中文名以及表的说明的索引---->按Writer\analyzer\document\field的循序建索引。

package com.jrj.datamart.tree;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Writer;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
//import com.jrj.datamart.model.ApiIndicator;
//import com.jrj.datamart.model.ApiInfo;
//import com.jrj.datamart.model.ApiInfoQuery;
//import com.jrj.datamart.service.ApiInfoService;


//对整个数据库的建立索引;并给每个表添加,表的说明和字段的中文名;方便查询
//索引 Lucene 3.0.2
public class IndexerDB {

?// 保存索引文件的地方
?private static? String INDEX_DIR = "F:\\MyLuceneDB2\\LuceneFileIndexDir";
?private String index_dir;
?private File file=new File(INDEX_DIR);
?// 将要搜索TXT文件的地方
?private String data_dir;
?private String DATA_DIR = "F:\\Lucene";
?private String entityName;
//?private ApiInfo apiInfo;
//?private ApiInfoQuery apiInfoQuery = new ApiInfoQuery();
//?private ApiInfoService apiInfoService;
//?private List<ApiIndicator> apiIndicators = new ArrayList<ApiIndicator>();
//?private ApiIndicator apiIndicator;
?private StringBuilder newsb = new StringBuilder();
?private ResultSet rs = null;
?private ResultSet tempRs = null;
?private ResultSetMetaData rsmd = null;
//?private SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
//???Version.LUCENE_30, true);
?private Document doc;
?private String tableName;

?public static void main(String[] args) throws Exception {
??IndexerDB indexDB=new IndexerDB();
??????? File file=new File(INDEX_DIR);
??//1
??indexDB.execute(file);
?}
?

//执行对数据库的索引化
//@param file
//@return
//?@throws Exception
//??
?public String execute(File file) throws Exception {
??long start = new Date().getTime();
??//2
??int numIndexed=getAllTableNameFromDBAndIndexing(file);
??long end = new Date().getTime();
??System.out.println("Indexing " + numIndexed + " files took "
?? + (end - start) + " milliseconds");
??return "success";
?}
?
// 查询所有的表,并遍历所有的表;
//调用getDataFromTable(tableName)获取表的记录;
//调用indexData(writer, rs, tableName)对单张表做索引
?public int getAllTableNameFromDBAndIndexing(File file)throws Exception {
??ResultSet rs=null;
??String sql1 = null;
??String sql2 = null;
??sql1 = "select [name] from [sysobjects] where [type] = 'u' order by [name]";
??sql2 = "show tables";
??SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
????Version.LUCENE_30, true);
??IndexWriter writer = new IndexWriter(FSDirectory.open(file),
????analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
??try {
???rs = JDBCUtil.execute(sql1);
??} catch (Exception e) {
???rs = JDBCUtil.execute(sql2);
??}
??while (rs.next()) {
???
???tableName=rs.getString(1);
???System.out.println("tableName: "+rs.getString(1));
???//ResultSetMetaData rsmd = rs.getMetaData();?
???ResultSet tempRs=getDataFromTable(tableName);
???System.out.println("already get data of table");
???indexData(writer, tempRs, tableName);
??}
??int numIndexed = writer.numDocs();
??writer.optimize();
??writer.close();
??return numIndexed;
?}
???
?// 取得表的数据
?public ResultSet getDataFromTable(String tableName) throws Exception {
??String sql = "select * from " + tableName;
??System.out.println(""+sql);
??//String sql = "select * from " + "HK_STKCODE";
??return JDBCUtil.execute(sql);
?}
?
//对特定表的记录,采用特定writer,索引
//该方法是lucene的indexer的关键方法
//writer---->document---->field
?private void indexData(IndexWriter writer, ResultSet rs, String tableName)
???throws Exception{
??if (rs == null) {
???return;
??}
??// 取得实体名
??//System.out.println("tableName: " + tableName);
??entityName = getEntityName(tableName);
??System.out.println(" .entityName: " + entityName);
??while (rs.next()) {
???doc = new Document();
???rsmd = rs.getMetaData();
???int colsNum = rsmd.getColumnCount();
???//System.out.println("colsNum: "+colsNum);
???for (int i = 1; i < colsNum + 1; i++) {
???String ?columnName=rsmd.getColumnName(i);??
???//System.out.println("columnName: "+columnName);
???//System.out.println(" rs.getString(i): "+ rs.getString(i));
???// 1、对该条记录的第一个字段进行索引
????doc.add(new Field(columnName, (rs.getString(i)==null?"":rs.getString(i)),
??????Field.Store.YES, Field.Index.ANALYZED));
???}
// 2、在此处添加,表的说明(description);来至于apiinfo;根据表对应的实体名,找到表对应的apiinfo实体
//???apiInfoQuery.setEntityname(entityName);
//???apiInfo = apiInfoService.gainApiInfoByEntityName(entityName);
//???if (apiInfo != null) {
//????doc.add(new Field("apiDesc", apiInfo.toString(), Field.Store.YES,
//??????Field.Index.ANALYZED));
//???}
// 3、在此处添加,表的中英文字段:
// 来自于apiindicator;根据表对应的实体名,去APIINFO中取找apiid,再去apiindicator中找apiindicator的实体
//???List<ApiIndicator> apiIndicators = apiInfoService
//?????.gainApiIndicatorsByApiId(apiInfo.getId());
//???// 输出指标和描述结合为字符串
//???for (int i = 0; i < apiIndicators.size(); i++) {
//????apiIndicator = apiIndicators.get(i);
//????newsb.append(apiIndicator.getCnname() + " ")
//??????.append(apiIndicator.getEnname() + " ")
//??????.append(apiIndicator.getDescription() + "");
//????newsb.append("\n");
//???}
//???System.out.println("newsb.toString(): " + newsb.toString());
//???doc.add(new Field("outputFields", newsb.toString(),
//?????Field.Store.YES, Field.Index.ANALYZED));
// 4、文件名
???doc.add(new Field("apiName", entityName,
?????Field.Store.YES, Field.Index.ANALYZED));
???writer.addDocument(doc);
??}
?}

?

? //将表名通过字符串处理成为实体名
?public String getEntityName(String tableName) {
??// tableName = tableName.substring(0, tableName.indexOf(".txt"));
??StringBuilder sb = new StringBuilder();
??// 将"_"替换掉,如果有的话,以便处理成与API对应的实体一样的字符串。如:PUB_SEITGC.txt,RSH_RSHRPT_INDFO.txt
??if (tableName.indexOf("_") != -1) {
???String[] subStrings = tableName.split("_");
???for (int i = 0; i < subStrings.length; i++) {
????sb.append(subStrings[i]);
???}
???return sb.toString();
??} else {
???return tableName;
??}
?}

?public String getIndex_dir() {
??return index_dir;
?}

?public void setIndex_dir(String index_dir) {
??this.index_dir = index_dir;
?}

?public String getData_dir() {
??return data_dir;
?}

?public void setData_dir(String data_dir) {
??this.data_dir = data_dir;
?}

//?public ApiInfoService getApiInfoService() {
//??return apiInfoService;
//?}
//
//?public void setApiInfoService(ApiInfoService apiInfoService) {
//??this.apiInfoService = apiInfoService;
//?}

}

?

// 数据库查询类

//JDBCUtil用于取得连接;JDBCUtil.execute(sql)用于执行SQL,并返回resultset.

package com.jrj.datamart.tree;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class JDBCUtil {

?public static Connection conn = null;
?// 建立连接
?static void getConntion() {
??try {
???String driver_class = "com.microsoft.sqlserver.jdbc.SQLServerDriver";
???// ?useUnicode=true&characterEncoding=utf-8
???String connection_url = "jdbc:sqlserver://localhost:1433;DatabaseName=UDM";
???String user_name = "sa";
???String db_password = "sa";
???Class.forName(driver_class);
???conn = DriverManager.getConnection(connection_url, user_name,
?????db_password);
???conn.setAutoCommit(false);
??} catch (Exception e) {
???e.printStackTrace();
??}
?}

?// 查询后,得到数据
?public static ResultSet execute(String sql) throws Exception {
??// 取得连接
??getConntion();
??// 写SQL
??// 得到一个statment对象
??Statement stmt = conn.createStatement();
??// 得到一个结果集
??return stmt.executeQuery(sql);
?}

?static void close() {
??if (conn != null) {
???try {
????conn.close();
???} catch (SQLException e) {
????e.printStackTrace();
???}
??}
?}

?public static Connection getConn() {
??return conn;
?}

?public static void setConn(Connection conn) {
??JDBCWriteExcel.conn = conn;
?}
}

?

?

文章出自:http://blog.sina.com.cn/s/blog_4f9ce8f30100nt5r.html

读书人网 >其他数据库

热点推荐