| Constructor: |
public StandardAnalyzer() {
this(STOP_WORDS);
}
Builds an analyzer with the default stop words (#STOP_WORDS ). |
public StandardAnalyzer(Set stopWords) {
stopSet = stopWords;
}
Builds an analyzer with the given stop words. |
public StandardAnalyzer(String[] stopWords) {
stopSet = StopFilter.makeStopSet(stopWords);
}
Builds an analyzer with the given stop words. |
public StandardAnalyzer(File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
}
Builds an analyzer with the stop words from the given file. Also see:
- WordlistLoader#getWordSet(File)
|
public StandardAnalyzer(Reader stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
}
Builds an analyzer with the stop words from the given reader. Also see:
- WordlistLoader#getWordSet(Reader)
|
public StandardAnalyzer(boolean replaceInvalidAcronym) {
this(STOP_WORDS);
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Parameters:
replaceInvalidAcronym - Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
See https://issues.apache.org/jira/browse/LUCENE-1068
|
public StandardAnalyzer(Reader stopwords,
boolean replaceInvalidAcronym) throws IOException {
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Parameters:
stopwords - The stopwords to use
replaceInvalidAcronym - Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
See https://issues.apache.org/jira/browse/LUCENE-1068
|
public StandardAnalyzer(File stopwords,
boolean replaceInvalidAcronym) throws IOException {
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Parameters:
stopwords - The stopwords to use
replaceInvalidAcronym - Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
See https://issues.apache.org/jira/browse/LUCENE-1068
|
public StandardAnalyzer(String[] stopwords,
boolean replaceInvalidAcronym) throws IOException {
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Parameters:
stopwords - The stopwords to use
replaceInvalidAcronym - Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
See https://issues.apache.org/jira/browse/LUCENE-1068
|
public StandardAnalyzer(Set stopwords,
boolean replaceInvalidAcronym) throws IOException {
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Parameters:
stopwords - The stopwords to use
replaceInvalidAcronym - Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
See https://issues.apache.org/jira/browse/LUCENE-1068
|
| Method from org.apache.lucene.analysis.standard.StandardAnalyzer Detail: |
public int getMaxTokenLength() {
return maxTokenLength;
}
|
public boolean isReplaceInvalidAcronym() {
return replaceInvalidAcronym;
}
|
public TokenStream reusableTokenStream(String fieldName,
Reader reader) throws IOException {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new StandardTokenizer(reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
} else {
streams.tokenStream.reset(reader);
}
streams.tokenStream.setMaxTokenLength(maxTokenLength);
streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);
return streams.filteredTokenStream;
}
|
public void setMaxTokenLength(int length) {
maxTokenLength = length;
}
Set maximum allowed token length. If a token is seen
that exceeds this length then it is discarded. This
setting only takes effect the next time tokenStream or
reusableTokenStream is called. |
public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
|
public TokenStream tokenStream(String fieldName,
Reader reader) {
StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet);
return result;
}
|