- Beranda
- Komunitas
- Tech
- Computer Stuff
[minta saran] Lucene Indexing


TS
derlaz
[minta saran] Lucene Indexing
agan-agan para master. nih ane buat coding-an buat indexing pake library Lucene. an dapet coding ini dari buku Lucene in Action 2nd edition and udah gw modif. mau minta saran dan masukan dari para agan-agan biar index yang dhasilkan tambah sip 
thanks before

Spoiler for coding:
PHP Code:
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.stjhi.model.TagReader;
public class Indexer {
private String indexDir = "E:IRindexLucene3";
private String dataDir = "E:IRcorpus";
private IndexWriter writer;
public Indexer(){
}
public Indexer(String indexDir, String dataDir){
this.indexDir = indexDir;
this.dataDir = dataDir;
}
public Indexer(String indexDir) throws IOException{
File file = new File(indexDir);
Directory dir = FSDirectory.open(file);
writer = new IndexWriter(dir,
new IndonesianAnalyzer(Version.LUCENE_CURRENT),
true,
IndexWriter.MaxFieldLength.UNLIMITED);
}
public void close() throws IOException{
writer.close();
}
public int index(String dataDir, FileFilter filter) throws Exception{
File[] files = new File(dataDir).listFiles();
for(File f: files){
if(!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))){
indexFile(f);
}
}
return writer.numDocs();
}
private void indexFile(File f) throws Exception{
System.out.println("Indexing "+f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); //add document to Lucene's index
}
protected Document getDocument(File f) throws Exception{
Document doc = new Document();
TagReader tag = new TagReader(f);
tag.tagReader(f);
doc.add(new Field("filename",
f.getName(),
Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath",
f.getCanonicalPath(),
Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("title",
tag.getTitle(),
Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("content",
tag.getText(),
Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("source",
tag.getSource(),
Field.Store.YES,
Field.Index.NOT_ANALYZED));
//doc.add(new Field("contents", new FileReader(f)));
return doc;
}
public static void main(String[] args) throws Exception{
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(new Indexer().indexDir);
int numIndex;
try{
numIndex = indexer.index(new Indexer().dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.print("Indexing "+numIndex+" files took "+(end-start) +" ms");
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase().endsWith(".txt");
}
}
}
thanks before
0
1.3K
Kutip
6
Balasan


Komentar yang asik ya
Urutan
Terbaru
Terlama


Komentar yang asik ya
Komunitas Pilihan