public void index(Path luceneDir,
Path crawlDb,
Path linkDb,
List segments) throws IOException {
LOG.info("Indexer: starting");
final JobConf job = new NutchJob(getConf());
job.setJobName("index-lucene " + luceneDir);
IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
FileOutputFormat.setOutputPath(job, luceneDir);
LuceneWriter.addFieldOptions("segment", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
LuceneWriter.addFieldOptions("digest", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
LuceneWriter.addFieldOptions("boost", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job);
NutchIndexWriterFactory.addClassToConf(job, LuceneWriter.class);
JobClient.runJob(job);
LOG.info("Indexer: done");
}
|