agan-agan para master. nih ane buat coding-an buat indexing pake library Lucene. an dapet coding ini dari buku Lucene in Action 2nd edition and udah gw modif. mau minta saran dan masukan dari para agan-agan biar index yang dhasilkan tambah sip
Spoiler for coding :
PHP Code:
import java . io . File ; import java . io . FileFilter ; import java . io . FileReader ; import java . io . IOException ; import java . io . Reader ; import java . util . Set ; import org . apache . lucene . analysis . Analyzer ; import org . apache . lucene . analysis . id . IndonesianAnalyzer ; import org . apache . lucene . analysis . standard . StandardAnalyzer ; import org . apache . lucene . document . Document ; import org . apache . lucene . document . Field ; import org . apache . lucene . index . IndexWriter ; import org . apache . lucene . store . Directory ; import org . apache . lucene . store . FSDirectory ; import org . apache . lucene . store . SimpleFSDirectory ; import org . apache . lucene . util . Version ; import org . stjhi . model . TagReader ; public class Indexer { private String indexDir = "E:IRindexLucene3" ; private String dataDir = "E:IRcorpus" ; private IndexWriter writer ; public Indexer (){ } public Indexer ( String indexDir , String dataDir ){ this . indexDir = indexDir ; this . dataDir = dataDir ; } public Indexer ( String indexDir ) throws IOException { File file = new File ( indexDir ); Directory dir = FSDirectory . open ( file ); writer = new IndexWriter ( dir , new IndonesianAnalyzer ( Version . LUCENE_CURRENT ), true , IndexWriter . MaxFieldLength . UNLIMITED ); } public void close () throws IOException { writer . close (); } public int index ( String dataDir , FileFilter filter ) throws Exception { File [] files = new File ( dataDir ). listFiles (); for ( File f : files ){ if (! f . isDirectory () && ! f . isHidden () && f . exists () && f . canRead () && ( filter == null || filter . accept ( f ))){ indexFile ( f ); } } return writer . numDocs (); } private void indexFile ( File f ) throws Exception { System . out . println ( "Indexing " + f . getCanonicalPath ()); Document doc = getDocument ( f ); writer . addDocument ( doc ); //add document to Lucene's index } protected Document getDocument ( File f ) throws Exception { Document doc = new Document (); TagReader tag = new TagReader ( f ); tag . tagReader ( f ); doc . add (new Field ( "filename" , f . getName (), Field . Store . YES , Field . Index . NOT_ANALYZED )); doc . add (new Field ( "fullpath" , f . getCanonicalPath (), Field . Store . YES , Field . Index . NOT_ANALYZED )); doc . add (new Field ( "title" , tag . getTitle (), Field . Store . YES , Field . Index . NOT_ANALYZED )); doc . add (new Field ( "content" , tag . getText (), Field . Store . YES , Field . Index . ANALYZED )); doc . add (new Field ( "source" , tag . getSource (), Field . Store . YES , Field . Index . NOT_ANALYZED )); //doc.add(new Field("contents", new FileReader(f))); return doc ; } public static void main ( String [] args ) throws Exception { long start = System . currentTimeMillis (); Indexer indexer = new Indexer (new Indexer (). indexDir ); int numIndex ; try { numIndex = indexer . index (new Indexer (). dataDir , new TextFilesFilter ()); } finally { indexer . close (); } long end = System . currentTimeMillis (); System . out .print( "Indexing " + numIndex + " files took " +( end - start ) + " ms" ); private static class TextFilesFilter implements FileFilter { public boolean accept ( File path ) { return path . getName (). toLowerCase (). endsWith ( ".txt" ); } } }
thanks before