Lucene
Posted by Bruce Tsai
05/25/2016
開發人員在開發上遇到問題,往往求助的是 Google 大神,透過搜尋可以找到許多網路上的資源。那如果要在開發的系統上也有搜尋的功能,Lucene 會是各位的好朋友。Lucene 是一套用於全文檢索和搜尋的開放源碼程式庫,由 Apache 軟體基金會支持和提供。Lucene 提供了一個簡單卻強大的應用程式介面,能夠做全文索引和搜尋,在 Java 開發環境裡 Lucene 是一個成熟的免費開放原始碼工具;就其本身而論,Lucene 是現在並且是這幾年,最受歡迎的免費 Java 資訊檢索程式庫。
使用範例
package avajava;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
public class LuceneDemo {
public static final String FILES_TO_INDEX_DIRECTORY = "filesToIndex";
public static final String INDEX_DIRECTORY = "indexDirectory";
public static final String FIELD_PATH = "path";
public static final String FIELD_CONTENTS = "contents";
public static void main(String[] args) throws Exception {
createIndex();
searchIndex("mushrooms");
searchIndex("steak");
searchIndex("steak AND cheese");
searchIndex("steak and cheese");
searchIndex("bacon OR cheese");
}
public static void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
Analyzer analyzer = new StandardAnalyzer();
boolean recreateIndexIfExists = true;
IndexWriter indexWriter = new IndexWriter(INDEX_DIRECTORY, analyzer, recreateIndexIfExists);
File dir = new File(FILES_TO_INDEX_DIRECTORY);
File[] files = dir.listFiles();
for (File file : files) {
Document document = new Document();
String path = file.getCanonicalPath();
document.add(new Field(FIELD_PATH, path, Field.Store.YES, Field.Index.UN_TOKENIZED));
Reader reader = new FileReader(file);
document.add(new Field(FIELD_CONTENTS, reader));
indexWriter.addDocument(document);
}
indexWriter.optimize();
indexWriter.close();
}
public static void searchIndex(String searchString) throws IOException, ParseException {
System.out.println("Searching for '" + searchString + "'");
Directory directory = FSDirectory.getDirectory(INDEX_DIRECTORY);
IndexReader indexReader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Analyzer analyzer = new StandardAnalyzer();
QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer);
Query query = queryParser.parse(searchString);
Hits hits = indexSearcher.search(query);
System.out.println("Number of hits: " + hits.length());
Iterator<Hit> it = hits.iterator();
while (it.hasNext()) {
Hit hit = it.next();
Document document = hit.getDocument();
String path = document.get(FIELD_PATH);
System.out.println("Hit: " + path);
}
}
}