| Method from org.apache.lucene.analysis.Token Detail: |
public void clear() {
payload = null;
// Leave termBuffer to allow re-use
termLength = 0;
termText = null;
positionIncrement = 1;
flags = 0;
// startOffset = endOffset = 0;
// type = DEFAULT_TYPE;
}
Resets the term text, payload, flags, and positionIncrement to default.
Other fields such as startOffset, endOffset and the token type are
not reset since they are normally overwritten by the tokenizer. |
public Object clone() {
try {
Token t = (Token)super.clone();
// Do a deep clone
if (termBuffer != null) {
t.termBuffer = (char[]) termBuffer.clone();
}
if (payload != null) {
t.setPayload((Payload) payload.clone());
}
return t;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // shouldn't happen
}
}
|
public Token clone(char[] newTermBuffer,
int newTermOffset,
int newTermLength,
int newStartOffset,
int newEndOffset) {
final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
t.positionIncrement = positionIncrement;
t.flags = flags;
t.type = type;
if (payload != null)
t.payload = (Payload) payload.clone();
return t;
}
Makes a clone, but replaces the term buffer &
start/end offset in the process. This is more
efficient than doing a full clone (and then calling
setTermBuffer) because it saves a wasted copy of the old
termBuffer. |
public final int endOffset() {
return endOffset;
}
Returns this Token's ending offset, one greater than the position of the
last character corresponding to this token in the source text. The length
of the token in the source text is (endOffset - startOffset). |
public boolean equals(Object obj) {
if (obj == this)
return true;
if (obj instanceof Token) {
Token other = (Token) obj;
initTermBuffer();
other.initTermBuffer();
if (termLength == other.termLength &&
startOffset == other.startOffset &&
endOffset == other.endOffset &&
flags == other.flags &&
positionIncrement == other.positionIncrement &&
subEqual(type, other.type) &&
subEqual(payload, other.payload)) {
for(int i=0;i< termLength;i++)
if (termBuffer[i] != other.termBuffer[i])
return false;
return true;
} else
return false;
} else
return false;
}
|
public int getFlags() {
return flags;
}
EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
Get the bitset for any bits that have been set. This is completely distinct from #type() , although they do share similar purposes.
The flags can be used to encode information about the token for use by other org.apache.lucene.analysis.TokenFilter s. |
public Payload getPayload() {
return this.payload;
}
Returns this Token's payload. |
public int getPositionIncrement() {
return positionIncrement;
}
Returns the position increment of this Token. |
public int hashCode() {
initTermBuffer();
int code = termLength;
code = code * 31 + startOffset;
code = code * 31 + endOffset;
code = code * 31 + flags;
code = code * 31 + positionIncrement;
code = code * 31 + type.hashCode();
code = (payload == null ? code : code * 31 + payload.hashCode());
code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
return code;
}
|
public void reinit(Token prototype) {
prototype.initTermBuffer();
setTermBuffer(prototype.termBuffer, 0, prototype.termLength);
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
}
Copy the prototype token's fields into this one. Note: Payloads are shared. |
public void reinit(Token prototype,
String newTerm) {
setTermBuffer(newTerm);
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
}
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. |
public Token reinit(String newTerm,
int newStartOffset,
int newEndOffset) {
clearNoTermBuffer();
setTermBuffer(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
|
public Token reinit(String newTerm,
int newStartOffset,
int newEndOffset,
String newType) {
clearNoTermBuffer();
setTermBuffer(newTerm);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
|
public void reinit(Token prototype,
char[] newTermBuffer,
int offset,
int length) {
setTermBuffer(newTermBuffer, offset, length);
positionIncrement = prototype.positionIncrement;
flags = prototype.flags;
startOffset = prototype.startOffset;
endOffset = prototype.endOffset;
type = prototype.type;
payload = prototype.payload;
}
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. |
public Token reinit(char[] newTermBuffer,
int newTermOffset,
int newTermLength,
int newStartOffset,
int newEndOffset) {
clearNoTermBuffer();
setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
|
public Token reinit(String newTerm,
int newTermOffset,
int newTermLength,
int newStartOffset,
int newEndOffset) {
clearNoTermBuffer();
setTermBuffer(newTerm, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = DEFAULT_TYPE;
return this;
}
|
public Token reinit(char[] newTermBuffer,
int newTermOffset,
int newTermLength,
int newStartOffset,
int newEndOffset,
String newType) {
clearNoTermBuffer();
payload = null;
positionIncrement = 1;
setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
|
public Token reinit(String newTerm,
int newTermOffset,
int newTermLength,
int newStartOffset,
int newEndOffset,
String newType) {
clearNoTermBuffer();
setTermBuffer(newTerm, newTermOffset, newTermLength);
startOffset = newStartOffset;
endOffset = newEndOffset;
type = newType;
return this;
}
|
public char[] resizeTermBuffer(int newSize) {
char[] newCharBuffer = growTermBuffer(newSize);
if (termBuffer == null) {
// If there were termText, then preserve it.
// note that if termBuffer is null then newCharBuffer cannot be null
assert newCharBuffer != null;
if (termText != null) {
termText.getChars(0, termText.length(), newCharBuffer, 0);
}
termBuffer = newCharBuffer;
} else if (newCharBuffer != null) {
// Note: if newCharBuffer != null then termBuffer needs to grow.
// If there were a termBuffer, then preserve it
System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
termBuffer = newCharBuffer;
}
termText = null;
return termBuffer;
}
|
public void setEndOffset(int offset) {
this.endOffset = offset;
}
|
public void setFlags(int flags) {
this.flags = flags;
}
|
public void setPayload(Payload payload) {
this.payload = payload;
}
Sets this Token's payload. |
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
throw new IllegalArgumentException
("Increment must be zero or greater: " + positionIncrement);
this.positionIncrement = positionIncrement;
}
Set the position increment. This determines the position of this token
relative to the previous Token in a TokenStream , used in phrase
searching.
The default value is one.
Some common uses for this are:
- Set it to zero to put multiple terms in the same position. This is
useful if, e.g., a word has multiple stems. Searches for phrases
including either stem will match. In this case, all but the first stem's
increment should be set to zero: the increment of the first instance
should be one. Repeating a token with an increment of zero can also be
used to boost the scores of matches on that token.
- Set it to values greater than one to inhibit exact phrase matches.
If, for example, one does not want phrases to match across removed stop
words, then one could build a stop word filter that removes stop words and
also sets the increment to the number of stop words removed before each
non-stop word. Then exact phrase queries will only match when the terms
occur with no intervening stop words.
|
public void setStartOffset(int offset) {
this.startOffset = offset;
}
|
public final void setTermBuffer(String buffer) {
termText = null;
int length = buffer.length();
char[] newCharBuffer = growTermBuffer(length);
if (newCharBuffer != null) {
termBuffer = newCharBuffer;
}
buffer.getChars(0, length, termBuffer, 0);
termLength = length;
}
Copies the contents of buffer into the termBuffer array. |
public final void setTermBuffer(char[] buffer,
int offset,
int length) {
termText = null;
char[] newCharBuffer = growTermBuffer(length);
if (newCharBuffer != null) {
termBuffer = newCharBuffer;
}
System.arraycopy(buffer, offset, termBuffer, 0, length);
termLength = length;
}
Copies the contents of buffer, starting at offset for
length characters, into the termBuffer array. |
public final void setTermBuffer(String buffer,
int offset,
int length) {
assert offset < = buffer.length();
assert offset + length < = buffer.length();
termText = null;
char[] newCharBuffer = growTermBuffer(length);
if (newCharBuffer != null) {
termBuffer = newCharBuffer;
}
buffer.getChars(offset, offset + length, termBuffer, 0);
termLength = length;
}
Copies the contents of buffer, starting at offset and continuing
for length characters, into the termBuffer array. |
public final void setTermLength(int length) {
initTermBuffer();
if (length > termBuffer.length)
throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
termLength = length;
}
Set number of valid characters (length of the term) in
the termBuffer array. Use this to truncate the termBuffer
or to synchronize with external manipulation of the termBuffer.
Note: to grow the size of the array,
use #resizeTermBuffer(int) first. |
public void setTermText(String text) {
termText = text;
termBuffer = null;
} Deprecated! use - #setTermBuffer(char[], int, int) or
#setTermBuffer(String) or
#setTermBuffer(String, int, int) .
Sets the Token's term text. NOTE: for better
indexing speed you should instead use the char[]
termBuffer methods to set the term text. |
public final void setType(String type) {
this.type = type;
}
|
public final int startOffset() {
return startOffset;
}
Returns this Token's starting offset, the position of the first character
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
equal to termText.length(), as the term text may have been altered by a
stemmer or some other filter. |
public final String term() {
if (termText != null)
return termText;
initTermBuffer();
return new String(termBuffer, 0, termLength);
}
Returns the Token's term text.
This method has a performance penalty
because the text is stored internally in a char[]. If
possible, use #termBuffer() and #termLength() directly instead. If you really need a
String, use this method, which is nothing more than
a convenience call to new String(token.termBuffer(), 0, token.termLength()) |
public final char[] termBuffer() {
initTermBuffer();
return termBuffer;
}
Returns the internal termBuffer character array which
you can then directly alter. If the array is too
small for your token, use #resizeTermBuffer(int) to increase it. After
altering the buffer be sure to call #setTermLength to record the number of valid
characters that were placed into the termBuffer. |
public final int termLength() {
initTermBuffer();
return termLength;
}
Return number of valid characters (length of the term)
in the termBuffer array. |
public final String termText() {
if (termText == null && termBuffer != null)
termText = new String(termBuffer, 0, termLength);
return termText;
} Deprecated! This - method now has a performance penalty
because the text is stored internally in a char[]. If
possible, use #termBuffer() and #termLength() directly instead. If you really need a
String, use #term()
Returns the Token's term text. |
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append('(");
initTermBuffer();
if (termBuffer == null)
sb.append("null");
else
sb.append(termBuffer, 0, termLength);
sb.append(',").append(startOffset).append(',").append(endOffset);
if (!type.equals("word"))
sb.append(",type=").append(type);
if (positionIncrement != 1)
sb.append(",posIncr=").append(positionIncrement);
sb.append(')");
return sb.toString();
}
|
public final String type() {
return type;
}
Returns this Token's lexical type. Defaults to "word". |