public void merge(Path[] indexes,
Path outputIndex,
Path localWorkingDir) throws IOException {
LOG.info("merging indexes to: " + outputIndex);
FileSystem localFs = FileSystem.getLocal(getConf());
if (localFs.exists(localWorkingDir)) {
localFs.delete(localWorkingDir, true);
}
localFs.mkdirs(localWorkingDir);
// Get local output target
//
FileSystem fs = FileSystem.get(getConf());
if (fs.exists(outputIndex)) {
throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!");
}
Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);
Directory[] dirs = new Directory[indexes.length];
for (int i = 0; i < indexes.length; i++) {
if (LOG.isInfoEnabled()) { LOG.info("Adding " + indexes[i]); }
dirs[i] = new FsDirectory(fs, indexes[i], false, getConf());
}
//
// Merge indices
//
IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR));
writer.setMaxBufferedDocs(getConf().getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS));
writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS));
writer.setTermIndexInterval(getConf().getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
writer.setInfoStream(LogUtil.getDebugStream(LOG));
writer.setUseCompoundFile(false);
writer.setSimilarity(new NutchSimilarity());
writer.addIndexes(dirs);
writer.close();
//
// Put target back
//
fs.completeLocalOutput(outputIndex, tmpLocalOutput);
LOG.info("done merging");
}
Merge all input indexes to the single output index |