![]() |
[Index code]
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class Indexer
{
public static void main(String[] args) throws IOException
{
String Idx = "C:\\test\\Idx";
String dateDir = "C:\\test\\Data";
IndexWriter indexWriter = null;
Directory dir = new SimpleFSDirectory(new File(Idx));
indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = new File(dateDir).listFiles();
for (int i = 0; i < files.length; i++)
{
Document doc = new Document();
doc.add(new Field("contents", new FileReader(files[i])));
doc.add(new Field("filename", files[i].getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED));
indexWriter.addDocument(doc);
}
System.out.println("numDocs"+indexWriter.numDocs());
indexWriter.close();
}
}
[END]
[Searcher code]
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class Searcher
{
public static void main(String[] args) throws IOException, ParseException
{
String Idx = "C:\\test\\Idx";
Directory dir = new SimpleFSDirectory(new File(Idx));
IndexSearcher indexSearch = new IndexSearcher(dir);
QueryParser queryParser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
Query query = queryParser.parse("鼻涕"); //key Query term
TopDocs hits = indexSearch.search(query, 500);
System.out.println("找到"+hits.totalHits+"個");
for (int i = 0; i < hits.scoreDocs.length; i++)
{
ScoreDoc sdoc = hits.scoreDocs[i];
Document doc = indexSearch.doc(sdoc.doc);
System.out.println(doc.get("filename"));
}
indexSearch.close();
}
}
[END]
檔案名稱 | 1.txt |
描述 | 內容含有鼻涕的測試用文章 |
檔案大小 | 4 Kbytes |
下載次數 | 6 次 |
![]() |
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
String Idx = "C:\\test\\Idx";
String dataDir = "C:\\test\\Data";
Directory dir = new SimpleFSDirectory(new File(Idx));
IndexSearcher indexSearch = new IndexSearcher(dir);
QueryParser queryParser = new QueryParser(Version.LUCENE_30,
"contents", new StandardAnalyzer(Version.LUCENE_30));
Query query = queryParser.parse("鼻涕"); // key Query term
TopDocs hits = indexSearch.search(query, 500);
System.out.println("找到" + hits.totalHits + "個");
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc sdoc = hits.scoreDocs[i];
Document doc = indexSearch.doc(sdoc.doc);
System.out.println(doc.get("filename"));
Scorer scorer = new QueryScorer(query);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("", "");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
String content = readFileAsString(dataDir+File.separator+doc.get("filename"));
TokenStream tokenStream = queryParser.getAnalyzer().tokenStream(
"contents", new StringReader(content));
String fragment = highlighter.getBestFragment(tokenStream, content);
System.out.println(fragment != null ? fragment : content);
}
indexSearch.close();
}
private static String readFileAsString(String filePath) throws java.io.IOException{
byte[] buffer = new byte[(int) new File(filePath).length()];
BufferedInputStream f = null;
try {
f = new BufferedInputStream(new FileInputStream(filePath));
f.read(buffer);
} finally {
if (f != null) try { f.close(); } catch (IOException ignored) { }
}
return new String(buffer);
}
}
檔案名稱 | Searcher.java |
描述 | 加上 Highlighter |
檔案大小 | 3 Kbytes |
下載次數 | 6 次 |
![]() |