1.创建索引实例代码
import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.swing.filechooser.FileFilter; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class Indexer { private IndexWriter writer; private Analyzer analyzer; List<Document> documents = new ArrayList<Document>(); public static void main(String[] args) { String dataDir = "E:/lucene/data"; String indexDir = "E:/lucene/index"; try { Indexer indexer = new Indexer(indexDir); indexer.index(dataDir, new TextFilesFilter()); indexer.writer.commit(); System.out.println(indexer.writer.numDocs()); indexer.writer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public Indexer(String indexDir) throws IOException{ Directory dir = FSDirectory.open(new File(indexDir)); analyzer = new IKAnalyzer(); LogMergePolicy mergePolicy = new LogDocMergePolicy(); // 索引基本配置 // 设置segment添加文档(Document)时的合并频率 // 值较小,建立索引的速度就较慢 // 值较大,建立索引的速度就较快,>10适合批量建立索引 mergePolicy.setMergeFactor(30); // 设置segment最大合并文档(Document)数 // 值较小有利于追加索引的速度 // 值较大,适合批量建立索引和更快的搜索 mergePolicy.setMaxMergeDocs(5000); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); //IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_43,new StandardAnalyzer(Version.LUCENE_43)); indexWriterConfig.setMaxBufferedDocs(10000); indexWriterConfig.setMergePolicy(mergePolicy); indexWriterConfig.setRAMBufferSizeMB(64); // /设置索引的打开模式 创建或者添加索引 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, indexWriterConfig); } //将File信息写入document private Document getDocument(File f) throws IOException{ Document document = new Document(); document.add(new StringField("name", f.getName(), Store.YES)); document.add(new TextField("content", "我爱你中国", Store.YES)); document.add(new StringField("fullpath", f.getCanonicalPath(),Store.YES)); document.add(new StringField("updateTime", String.valueOf(f.lastModified()),Store.YES)); return document; } private List<Document> getDocuments(File [] files, FileFilter filesFilter) throws IOException{ for(File f : files){ if(f.isDirectory()){ getDocuments(f.listFiles(),filesFilter); }else{ if(!f.isHidden() && f.canRead() && (filesFilter != null && filesFilter.accept(f))){ documents.add(getDocument(f)); } } } return documents; } //写入索引 private void indexFile(File [] files, FileFilter filesFilter) throws IOException{ List<Document> documents = getDocuments(files, filesFilter); writer.addDocuments(documents); } private void index(String dataDri, TextFilesFilter filesFilter){ File [] files = new File(dataDri).listFiles(); try { indexFile(files, new TextFilesFilter()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 过滤器,只索引txt格式文件 * @author ANWJ * */ private static class TextFilesFilter extends FileFilter{ @Override public boolean accept(File f) { // TODO Auto-generated method stub return f.getName().toLowerCase().endsWith(".txt"); } @Override public String getDescription() { // TODO Auto-generated method stub return null; } } }
2.检索索引实例代码
import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class Searcher { public static void search(String indexDir, String key) throws IOException, ParseException{ Directory directory = FSDirectory.open(new File(indexDir)); SearcherManager sm = new SearcherManager(directory,new SearcherFactory()); IndexSearcher searcher = sm.acquire(); Analyzer analyzer = new IKAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer); Query query = parser.parse(key); TopDocs hits = searcher.search(query, 10); for(ScoreDoc doc : hits.scoreDocs){ Document document = searcher.doc(doc.doc); System.out.println(document.get("content")); } } public static void main(String[] args) { String indexDir = "E:/lucene/index"; String key = "中国"; try { search(indexDir, key); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } } }
作者:awj3584 发表于2013-11-29 16:25:24 原文链接
阅读:84 评论:2 查看评论