Lucene

Posted by Bruce Tsai
05/25/2016

開發人員在開發上遇到問題,往往求助的是 Google 大神,透過搜尋可以找到許多網路上的資源。那如果要在開發的系統上也有搜尋的功能,Lucene 會是各位的好朋友。Lucene 是一套用於全文檢索和搜尋的開放源碼程式庫,由 Apache 軟體基金會支持和提供。Lucene 提供了一個簡單卻強大的應用程式介面,能夠做全文索引和搜尋,在 Java 開發環境裡 Lucene 是一個成熟的免費開放原始碼工具;就其本身而論,Lucene 是現在並且是這幾年,最受歡迎的免費 Java 資訊檢索程式庫。

使用範例

package avajava;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;

public class LuceneDemo {

    public static final String FILES_TO_INDEX_DIRECTORY = "filesToIndex";
    public static final String INDEX_DIRECTORY = "indexDirectory";

    public static final String FIELD_PATH = "path";
    public static final String FIELD_CONTENTS = "contents";

    public static void main(String[] args) throws Exception {

        createIndex();
        searchIndex("mushrooms");
        searchIndex("steak");
        searchIndex("steak AND cheese");
        searchIndex("steak and cheese");
        searchIndex("bacon OR cheese");

    }

    public static void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
        Analyzer analyzer = new StandardAnalyzer();
        boolean recreateIndexIfExists = true;
        IndexWriter indexWriter = new IndexWriter(INDEX_DIRECTORY, analyzer, recreateIndexIfExists);
        File dir = new File(FILES_TO_INDEX_DIRECTORY);
        File[] files = dir.listFiles();
        for (File file : files) {
            Document document = new Document();

            String path = file.getCanonicalPath();
            document.add(new Field(FIELD_PATH, path, Field.Store.YES, Field.Index.UN_TOKENIZED));

            Reader reader = new FileReader(file);
            document.add(new Field(FIELD_CONTENTS, reader));

            indexWriter.addDocument(document);
        }
        indexWriter.optimize();
        indexWriter.close();
    }

    public static void searchIndex(String searchString) throws IOException, ParseException {
        System.out.println("Searching for '" + searchString + "'");
        Directory directory = FSDirectory.getDirectory(INDEX_DIRECTORY);
        IndexReader indexReader = IndexReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        Analyzer analyzer = new StandardAnalyzer();
        QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer);
        Query query = queryParser.parse(searchString);
        Hits hits = indexSearcher.search(query);
        System.out.println("Number of hits: " + hits.length());

        Iterator<Hit> it = hits.iterator();
        while (it.hasNext()) {
            Hit hit = it.next();
            Document document = hit.getDocument();
            String path = document.get(FIELD_PATH);
            System.out.println("Hit: " + path);
        }

    }

}

results matching ""

    No results matching ""