public static void main(String[] args) throws Exception {
IndexReader reader = null;
boolean noFreqs = false;
int count = 100;
String usage = "HighFreqTerms [-count < n >] [-nofreqs] < index dir >";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-count")) { // found -count option
count = Integer.parseInt(args[++i]);
} else if (args[i].equals("-nofreqs")) { // found -nofreqs option
noFreqs = true;
} else {
reader = IndexReader.open(args[i]);
}
}
TermFreqQueue tiq = new TermFreqQueue(count);
TermEnum terms = reader.terms();
int minFreq = 0;
while (terms.next()) {
if (terms.docFreq() > minFreq) {
tiq.put(new TermFreq(terms.term(), terms.docFreq()));
if (tiq.size() >= count) { // if tiq overfull
tiq.pop(); // remove lowest in tiq
minFreq = ((TermFreq)tiq.top()).docFreq; // reset minFreq
}
}
}
OutputStreamWriter out = new OutputStreamWriter(System.out, "UTF-8");
while (tiq.size() != 0) {
TermFreq termInfo = (TermFreq)tiq.pop();
out.write(termInfo.term.toString());
if (!noFreqs) {
out.write(" ");
out.write(Integer.toString(termInfo.docFreq));
}
out.write("\n");
}
out.flush();
reader.close();
}
|