public void testKOI8() throws IOException {
//System.out.println(new java.util.Date());
RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.KOI8);
// KOI8
inWordsKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testKOI8.txt")), "iso-8859-1");
sampleKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resKOI8.htm")), "iso-8859-1");
TokenStream in = ra.tokenStream("all", inWordsKOI8);
RussianLetterTokenizer sample =
new RussianLetterTokenizer(
sampleKOI8,
RussianCharsets.KOI8);
for (;;)
{
Token token = in.next();
if (token == null)
{
break;
}
Token sampleToken = sample.next();
assertEquals(
"KOI8",
token.termText(),
sampleToken == null
? null
: sampleToken.termText());
}
inWordsKOI8.close();
sampleKOI8.close();
}
|