设为首页 加入收藏

TOP

Lucene给文本索引和搜索功能的应用(二)
2014-11-23 22:10:02 来源: 作者: 【 】 浏览:29
Tags:Lucene 文本 索引 搜索 功能 应用
xWriterConfig(Version.LUCENE_47, analyzer);
return config;
}
public int index(String dataDir, FileFilter filter)
throws Exception {



File[] files = new File(dataDir).listFiles();



for (File f: files) {
if (!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))) {
indexFile(f);
}
}



return writer.numDocs(); //5
}



private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() //6
.endsWith(".txt"); //6
}
}

/**
* 遍历每一个文件,然后读出文件中的每一行数据,当成一个document来处理
* @param f
* @throws Exception
*/
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
// Document doc = getDocument(f);
List lists = readFileNoDup(f);
for(String list:lists){
Document doc = new Document();
doc.add(new Field("contents",list,TextField.TYPE_STORED));
writer.addDocument(doc);
}

//10
}
//读取一个文件
private List readFile(File filePathAndName)throws IOException {


FileInputStream fis = new FileInputStream(filePathAndName);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
LineNumberReader lnr = new LineNumberReader(br);


List returnValue = new ArrayList();
int cnt = 0;
while (true) {
cnt++;
String tempStr = lnr.readLine();
if (tempStr == null)
break;
if (tempStr.length() < 2)
continue;
returnValue.add(tempStr);
}
lnr.close();
br.close();
isr.close();
fis.close();
return returnValue;
}
//读取一个文件并排重后返回
public static List readFileNoDup(File filePathAndName)
throws IOException {

FileInputStream fis = new FileInputStream(filePathAndName);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
LineNumberReader lnr = new LineNumberReader(br);


Set set = new HashSet();
while (true) {
String tempStr = lnr.readLine();
if (tempStr == null)
break;
if (tempStr.length() < 2)
continue;
set.add(tempStr.trim());
}
lnr.close();
br.close();
isr.close();
fis.close();
List returnValue = new ArrayList(set.size());
returnValue.addAll(set);
return returnValue;
}
}


//对刚才已经建好的索引进行搜索


package lucene.home.clq;





/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/



import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;



import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import o

首页 上一页 1 2 3 下一页 尾页 2/3/3
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇Java中IO写文件工具类 下一篇Jfinal学习之路---Controller使用

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: