public final Token next(Token result) throws IOException {
Token t = input.next(result);
if (t == null)
return null;
char[] buffer = t.termBuffer();
final int bufferLength = t.termLength();
final String type = t.type();
if (type == APOSTROPHE_TYPE && // remove 's
bufferLength >= 2 &&
buffer[bufferLength-2] == '\'" &&
(buffer[bufferLength-1] == 's" || buffer[bufferLength-1] == 'S")) {
// Strip last 2 characters off
t.setTermLength(bufferLength - 2);
} else if (type == ACRONYM_TYPE) { // remove dots
int upto = 0;
for(int i=0;i< bufferLength;i++) {
char c = buffer[i];
if (c != '.")
buffer[upto++] = c;
}
t.setTermLength(upto);
}
return t;
}
Returns the next token in the stream, or null at EOS.
Removes 's from the end of words.
Removes dots from acronyms. |