全文检索(二)-基于lucene4.10的增删改查(一)

2015-01-27 10:09:03 · 作者: · 浏览: 45

今天 用lucene完成了 一个简单的web应用,提取了早期编写的一个测试类, 首先简介下lucene几个常用包;

lucene 包的组成结构:对于外部应用来说索引模块(index)和检索模块(search)是主要的外部应用入口

org.apache.Lucene.search/ 搜索入口
org.apache.Lucene.index/ 索引入口
org.apache.Lucene.analysis/ 语言分析器
org.apache.Lucene.queryParser/ 查询分析器
org.apache.Lucene.document/ 存储结构
org.apache.Lucene.store/ 底层IO/存储结构
org.apache.Lucene.util/ 一些公用的数据结构


话不多说,直接上代码(这是早期封装的一个测试类,封装的还算比较完善,有兴趣的朋友可以在此基础上继续完善):

package com.lucene.util;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.message.base.search.SearchBean;

/**
 * lucene 4.10.1
 * 
 * @creatTime 2014-10-28
 * @author 胡慧超
 * 
 */
public class HhcIndexTools {

	private final static Logger logger = Logger.getLogger(HhcIndexTools.class);
	private static String indexPath = "E://lucene//index";

	public static void main(String[] args) {
		try {
//			createIndex();
//			searchIndex("码农");
//			query();
//			deleteIndex(null);
			forceDeleteIndex();
			query();
			highlighterSearch();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * 创建索引
	 */
	public static void createIndex() {
		// 最细粒切分算法--true的话是 智能切分
		Analyzer analyzer = new IKAnalyzer(false);
		Document doc = null;
		IndexWriter indexWriter = null;
		try {
			indexWriter = getIndexWriter(analyzer);
			// 添加索引
			doc = new Document();
			doc.add(new StringField("id", "1", Store.YES));
			doc.add(new TextField("title", "标题:开始", Store.YES));
			d