Quantcast
Channel: IT社区推荐资讯 - ITIndex.net
Viewing all articles
Browse latest Browse all 15843

lucene4.3简单创建和查询索引实例

$
0
0

1.创建索引实例代码

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.swing.filechooser.FileFilter;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


public class Indexer {

	private IndexWriter writer;

	private Analyzer analyzer;

	List<Document> documents = new ArrayList<Document>();

	public static void main(String[] args) {

		String dataDir = "E:/lucene/data";
		String indexDir = "E:/lucene/index";
		try {
			Indexer indexer = new Indexer(indexDir);
			indexer.index(dataDir, new TextFilesFilter());
			indexer.writer.commit();
			System.out.println(indexer.writer.numDocs());
			indexer.writer.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	public Indexer(String indexDir) throws IOException{
		Directory dir = FSDirectory.open(new File(indexDir));
		analyzer = new IKAnalyzer();
		LogMergePolicy mergePolicy = new LogDocMergePolicy();
		// 索引基本配置
		// 设置segment添加文档(Document)时的合并频率
		// 值较小,建立索引的速度就较慢
		// 值较大,建立索引的速度就较快,>10适合批量建立索引
		mergePolicy.setMergeFactor(30);
		// 设置segment最大合并文档(Document)数
		// 值较小有利于追加索引的速度
		// 值较大,适合批量建立索引和更快的搜索
		mergePolicy.setMaxMergeDocs(5000);
		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
		//IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_43,new StandardAnalyzer(Version.LUCENE_43));
		indexWriterConfig.setMaxBufferedDocs(10000);
		indexWriterConfig.setMergePolicy(mergePolicy);
		indexWriterConfig.setRAMBufferSizeMB(64);
		// /设置索引的打开模式 创建或者添加索引
		indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
		writer = new IndexWriter(dir, indexWriterConfig);
	}

	//将File信息写入document
	private Document getDocument(File f) throws IOException{
		Document document = new Document();
		document.add(new StringField("name", f.getName(), Store.YES));
		document.add(new TextField("content", "我爱你中国", Store.YES));
		document.add(new StringField("fullpath", f.getCanonicalPath(),Store.YES));
		document.add(new StringField("updateTime", String.valueOf(f.lastModified()),Store.YES));
		return document;
	}

	
	private List<Document> getDocuments(File [] files, FileFilter filesFilter) throws IOException{

		for(File f : files){
			if(f.isDirectory()){
				getDocuments(f.listFiles(),filesFilter);
			}else{
				if(!f.isHidden() && f.canRead() && (filesFilter != null && filesFilter.accept(f))){
					documents.add(getDocument(f));
				}
			}
		}
		return documents;
	}

	//写入索引
	private void indexFile(File [] files, FileFilter filesFilter) throws IOException{
		List<Document> documents = getDocuments(files, filesFilter);
		writer.addDocuments(documents);
	}

	
	private void index(String dataDri, TextFilesFilter filesFilter){
		File [] files = new File(dataDri).listFiles();
		try {
			indexFile(files, new TextFilesFilter());
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * 过滤器,只索引txt格式文件
	 * @author ANWJ
	 *
	 */
	private static class TextFilesFilter extends FileFilter{

		@Override
		public boolean accept(File f) {
			// TODO Auto-generated method stub
			return f.getName().toLowerCase().endsWith(".txt");
		}

		@Override
		public String getDescription() {
			// TODO Auto-generated method stub
			return null;
		}

	}

}

2.检索索引实例代码

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


public class Searcher {
	
	public static void search(String indexDir, String key) throws IOException, ParseException{
		
		Directory directory = FSDirectory.open(new File(indexDir));
		
		SearcherManager sm = new SearcherManager(directory,new  SearcherFactory());
		
		IndexSearcher searcher = sm.acquire();
		Analyzer  analyzer = new IKAnalyzer();
		QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer);
		Query query = parser.parse(key);
		TopDocs hits = searcher.search(query, 10);
		for(ScoreDoc doc : hits.scoreDocs){
			Document document = searcher.doc(doc.doc);
			System.out.println(document.get("content"));
		}
	}
	public static void main(String[] args) {
		String indexDir = "E:/lucene/index";
		String key = "中国";
		try {
			search(indexDir, key);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

}


作者:awj3584 发表于2013-11-29 16:25:24 原文链接
阅读:84 评论:2 查看评论

Viewing all articles
Browse latest Browse all 15843

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>