1 package org.apache.lucene.analysis;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.index.Payload;
21 import org.apache.lucene.index.TermPositions; // for javadoc
22 import org.apache.lucene.util.ArrayUtil;
23
24 /** A Token is an occurrence of a term from the text of a field. It consists of
25 a term's text, the start and end offset of the term in the text of the field,
26 and a type string.
27 <p>
28 The start and end offsets permit applications to re-associate a token with
29 its source text, e.g., to display highlighted query terms in a document
30 browser, or to show matching text fragments in a KWIC (KeyWord In Context)
31 display, etc.
32 <p>
33 The type is a string, assigned by a lexical analyzer
34 (a.k.a. tokenizer), naming the lexical or syntactic class that the token
35 belongs to. For example an end of sentence marker token might be implemented
36 with type "eos". The default token type is "word".
37 <p>
38 A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
39 length byte array. Use {@link TermPositions#getPayloadLength()} and
40 {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index.
41
42 <br><br>
43 <p><font color="#FF0000">
44 WARNING: The status of the <b>Payloads</b> feature is experimental.
45 The APIs introduced here might change in the future and will not be
46 supported anymore in such a case.</font>
47
48 <br><br>
49
50 <p><b>NOTE:</b> As of 2.3, Token stores the term text
51 internally as a malleable char[] termBuffer instead of
52 String termText. The indexing code and core tokenizers
53 have been changed to re-use a single Token instance, changing
54 its buffer and other fields in-place as the Token is
55 processed. This provides substantially better indexing
56 performance as it saves the GC cost of new'ing a Token and
57 String for every term. The APIs that accept String
58 termText are still available but a warning about the
59 associated performance cost has been added (below). The
60 {@link #termText()} method has been deprecated.</p>
61
62 <p>Tokenizers and filters should try to re-use a Token
63 instance when possible for best performance, by
64 implementing the {@link TokenStream#next(Token)} API.
65 Failing that, to create a new Token you should first use
66 one of the constructors that starts with null text. To load
67 the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
68 To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}.
69 Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()},
70 if you know that your text is shorter than the capacity of the termBuffer
71 or {@link #resizeTermBuffer(int)}, if there is any possibility
72 that you may need to grow the buffer. Fill in the characters of your term into this
73 buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
74 or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to
75 set the length of the term text. See <a target="_top"
76 href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
77 for details.</p>
78 <p>Typical reuse patterns:
79 <ul>
80 <li> Copying text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
81 <pre>
82 return reusableToken.reinit(string, startOffset, endOffset[, type]);
83 </pre>
84 </li>
85 <li> Copying some text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
86 <pre>
87 return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
88 </pre>
89 </li>
90 </li>
91 <li> Copying text from char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
92 <pre>
93 return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
94 </pre>
95 </li>
96 <li> Copying some text from a char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
97 <pre>
98 return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
99 </pre>
100 </li>
101 <li> Copying from one one Token to another (type is reset to #DEFAULT_TYPE if not specified):<br/>
102 <pre>
103 return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
104 </pre>
105 </li>
106 </ul>
107 A few things to note:
108 <ul>
109 <li>clear() initializes most of the fields to default values, but not startOffset, endOffset and type.</li>
110 <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
111 <li>The startOffset and endOffset represent the start and offset in the source text. So be careful in adjusting them.</li>
112 <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
113 </ul>
114 </p>
115
116 @see org.apache.lucene.index.Payload
117 */
118 public class Token implements Cloneable {
119
120 public static final String DEFAULT_TYPE = "word";
121
122 private static int MIN_BUFFER_SIZE = 10;
123
124 /** @deprecated We will remove this when we remove the
125 * deprecated APIs */
126 private String termText;
127
128 /**
129 * Characters for the term text.
130 * @deprecated This will be made private. Instead, use:
131 * {@link termBuffer()},
132 * {@link #setTermBuffer(char[], int, int)},
133 * {@link #setTermBuffer(String)}, or
134 * {@link #setTermBuffer(String, int, int)}
135 */
136 char[] termBuffer;
137
138 /**
139 * Length of term text in the buffer.
140 * @deprecated This will be made private. Instead, use:
141 * {@link termLength()}, or @{link setTermLength(int)}.
142 */
143 int termLength;
144
145 /**
146 * Start in source text.
147 * @deprecated This will be made private. Instead, use:
148 * {@link startOffset()}, or @{link setStartOffset(int)}.
149 */
150 int startOffset;
151
152 /**
153 * End in source text.
154 * @deprecated This will be made private. Instead, use:
155 * {@link endOffset()}, or @{link setEndOffset(int)}.
156 */
157 int endOffset;
158
159 /**
160 * The lexical type of the token.
161 * @deprecated This will be made private. Instead, use:
162 * {@link type()}, or @{link setType(String)}.
163 */
164 String type = DEFAULT_TYPE;
165
166 private int flags;
167
168 /**
169 * @deprecated This will be made private. Instead, use:
170 * {@link getPayload()}, or @{link setPayload(Payload)}.
171 */
172 Payload payload;
173
174 /**
175 * @deprecated This will be made private. Instead, use:
176 * {@link getPositionIncrement()}, or @{link setPositionIncrement(String)}.
177 */
178 int positionIncrement = 1;
179
180 /** Constructs a Token will null text. */
181 public Token() {
182 }
183
184 /** Constructs a Token with null text and start & end
185 * offsets.
186 * @param start start offset in the source text
187 * @param end end offset in the source text */
188 public Token(int start, int end) {
189 startOffset = start;
190 endOffset = end;
191 }
192
193 /** Constructs a Token with null text and start & end
194 * offsets plus the Token type.
195 * @param start start offset in the source text
196 * @param end end offset in the source text
197 * @param typ the lexical type of this Token */
198 public Token(int start, int end, String typ) {
199 startOffset = start;
200 endOffset = end;
201 type = typ;
202 }
203
204 /**
205 * Constructs a Token with null text and start & end
206 * offsets plus flags. NOTE: flags is EXPERIMENTAL.
207 * @param start start offset in the source text
208 * @param end end offset in the source text
209 * @param flags The bits to set for this token
210 */
211 public Token(int start, int end, int flags) {
212 startOffset = start;
213 endOffset = end;
214 this.flags = flags;
215 }
216
217 /** Constructs a Token with the given term text, and start
218 * & end offsets. The type defaults to "word."
219 * <b>NOTE:</b> for better indexing speed you should
220 * instead use the char[] termBuffer methods to set the
221 * term text.
222 * @param text term text
223 * @param start start offset
224 * @param end end offset
225 * @deprecated
226 */
227 public Token(String text, int start, int end) {
228 termText = text;
229 startOffset = start;
230 endOffset = end;
231 }
232
233 /** Constructs a Token with the given text, start and end
234 * offsets, & type. <b>NOTE:</b> for better indexing
235 * speed you should instead use the char[] termBuffer
236 * methods to set the term text.
237 * @param text term text
238 * @param start start offset
239 * @param end end offset
240 * @param typ token type
241 * @deprecated
242 */
243 public Token(String text, int start, int end, String typ) {
244 termText = text;
245 startOffset = start;
246 endOffset = end;
247 type = typ;
248 }
249
250 /**
251 * Constructs a Token with the given text, start and end
252 * offsets, & type. <b>NOTE:</b> for better indexing
253 * speed you should instead use the char[] termBuffer
254 * methods to set the term text.
255 * @param text
256 * @param start
257 * @param end
258 * @param flags token type bits
259 * @deprecated
260 */
261 public Token(String text, int start, int end, int flags) {
262 termText = text;
263 startOffset = start;
264 endOffset = end;
265 this.flags = flags;
266 }
267
268 /**
269 * Constructs a Token with the given term buffer (offset
270 * & length), start and end
271 * offsets
272 * @param startTermBuffer
273 * @param termBufferOffset
274 * @param termBufferLength
275 * @param start
276 * @param end
277 */
278 public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
279 setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
280 startOffset = start;
281 endOffset = end;
282 }
283
284 /** Set the position increment. This determines the position of this token
285 * relative to the previous Token in a {@link TokenStream}, used in phrase
286 * searching.
287 *
288 * <p>The default value is one.
289 *
290 * <p>Some common uses for this are:<ul>
291 *
292 * <li>Set it to zero to put multiple terms in the same position. This is
293 * useful if, e.g., a word has multiple stems. Searches for phrases
294 * including either stem will match. In this case, all but the first stem's
295 * increment should be set to zero: the increment of the first instance
296 * should be one. Repeating a token with an increment of zero can also be
297 * used to boost the scores of matches on that token.
298 *
299 * <li>Set it to values greater than one to inhibit exact phrase matches.
300 * If, for example, one does not want phrases to match across removed stop
301 * words, then one could build a stop word filter that removes stop words and
302 * also sets the increment to the number of stop words removed before each
303 * non-stop word. Then exact phrase queries will only match when the terms
304 * occur with no intervening stop words.
305 *
306 * </ul>
307 * @param positionIncrement the distance from the prior term
308 * @see org.apache.lucene.index.TermPositions
309 */
310 public void setPositionIncrement(int positionIncrement) {
311 if (positionIncrement < 0)
312 throw new IllegalArgumentException
313 ("Increment must be zero or greater: " + positionIncrement);
314 this.positionIncrement = positionIncrement;
315 }
316
317 /** Returns the position increment of this Token.
318 * @see #setPositionIncrement
319 */
320 public int getPositionIncrement() {
321 return positionIncrement;
322 }
323
324 /** Sets the Token's term text. <b>NOTE:</b> for better
325 * indexing speed you should instead use the char[]
326 * termBuffer methods to set the term text.
327 * @deprecated use {@link #setTermBuffer(char[], int, int)} or
328 * {@link #setTermBuffer(String)} or
329 * {@link #setTermBuffer(String, int, int)}.
330 */
331 public void setTermText(String text) {
332 termText = text;
333 termBuffer = null;
334 }
335
336 /** Returns the Token's term text.
337 *
338 * @deprecated This method now has a performance penalty
339 * because the text is stored internally in a char[]. If
340 * possible, use {@link #termBuffer()} and {@link
341 * #termLength()} directly instead. If you really need a
342 * String, use {@link #term()}</b>
343 */
344 public final String termText() {
345 if (termText == null && termBuffer != null)
346 termText = new String(termBuffer, 0, termLength);
347 return termText;
348 }
349
350 /** Returns the Token's term text.
351 *
352 * This method has a performance penalty
353 * because the text is stored internally in a char[]. If
354 * possible, use {@link #termBuffer()} and {@link
355 * #termLength()} directly instead. If you really need a
356 * String, use this method, which is nothing more than
357 * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
358 */
359 public final String term() {
360 if (termText != null)
361 return termText;
362 initTermBuffer();
363 return new String(termBuffer, 0, termLength);
364 }
365
366 /** Copies the contents of buffer, starting at offset for
367 * length characters, into the termBuffer array.
368 * @param buffer the buffer to copy
369 * @param offset the index in the buffer of the first character to copy
370 * @param length the number of characters to copy
371 */
372 public final void setTermBuffer(char[] buffer, int offset, int length) {
373 termText = null;
374 char[] newCharBuffer = growTermBuffer(length);
375 if (newCharBuffer != null) {
376 termBuffer = newCharBuffer;
377 }
378 System.arraycopy(buffer, offset, termBuffer, 0, length);
379 termLength = length;
380 }
381
382 /** Copies the contents of buffer into the termBuffer array.
383 * @param buffer the buffer to copy
384 */
385 public final void setTermBuffer(String buffer) {
386 termText = null;
387 int length = buffer.length();
388 char[] newCharBuffer = growTermBuffer(length);
389 if (newCharBuffer != null) {
390 termBuffer = newCharBuffer;
391 }
392 buffer.getChars(0, length, termBuffer, 0);
393 termLength = length;
394 }
395
396 /** Copies the contents of buffer, starting at offset and continuing
397 * for length characters, into the termBuffer array.
398 * @param buffer the buffer to copy
399 * @param offset the index in the buffer of the first character to copy
400 * @param length the number of characters to copy
401 */
402 public final void setTermBuffer(String buffer, int offset, int length) {
403 assert offset <= buffer.length();
404 assert offset + length <= buffer.length();
405 termText = null;
406 char[] newCharBuffer = growTermBuffer(length);
407 if (newCharBuffer != null) {
408 termBuffer = newCharBuffer;
409 }
410 buffer.getChars(offset, offset + length, termBuffer, 0);
411 termLength = length;
412 }
413
414 /** Returns the internal termBuffer character array which
415 * you can then directly alter. If the array is too
416 * small for your token, use {@link
417 * #resizeTermBuffer(int)} to increase it. After
418 * altering the buffer be sure to call {@link
419 * #setTermLength} to record the number of valid
420 * characters that were placed into the termBuffer. */
421 public final char[] termBuffer() {
422 initTermBuffer();
423 return termBuffer;
424 }
425
426 /** Grows the termBuffer to at least size newSize, preserving the
427 * existing content. Note: If the next operation is to change
428 * the contents of the term buffer use
429 * {@link #setTermBuffer(char[], int, int)},
430 * {@link #setTermBuffer(String)}, or
431 * {@link #setTermBuffer(String, int, int)}
432 * to optimally combine the resize with the setting of the termBuffer.
433 * @param newSize minimum size of the new termBuffer
434 * @return newly created termBuffer with length >= newSize
435 */
436 public char[] resizeTermBuffer(int newSize) {
437 char[] newCharBuffer = growTermBuffer(newSize);
438 if (termBuffer == null) {
439 // If there were termText, then preserve it.
440 // note that if termBuffer is null then newCharBuffer cannot be null
441 assert newCharBuffer != null;
442 if (termText != null) {
443 termText.getChars(0, termText.length(), newCharBuffer, 0);
444 }
445 termBuffer = newCharBuffer;
446 } else if (newCharBuffer != null) {
447 // Note: if newCharBuffer != null then termBuffer needs to grow.
448 // If there were a termBuffer, then preserve it
449 System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
450 termBuffer = newCharBuffer;
451 }
452 termText = null;
453 return termBuffer;
454 }
455
456 /** Allocates a buffer char[] of at least newSize
457 * @param newSize minimum size of the buffer
458 * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough
459 */
460 private char[] growTermBuffer(int newSize) {
461 if (termBuffer != null) {
462 if (termBuffer.length >= newSize)
463 // Already big enough
464 return null;
465 else
466 // Not big enough; create a new array with slight
467 // over allocation:
468 return new char[ArrayUtil.getNextSize(newSize)];
469 } else {
470
471 // determine the best size
472 // The buffer is always at least MIN_BUFFER_SIZE
473 if (newSize < MIN_BUFFER_SIZE) {
474 newSize = MIN_BUFFER_SIZE;
475 }
476
477 // If there is already a termText, then the size has to be at least that big
478 if (termText != null) {
479 int ttLength = termText.length();
480 if (newSize < ttLength) {
481 newSize = ttLength;
482 }
483 }
484
485 return new char[newSize];
486 }
487 }
488
489 // TODO: once we remove the deprecated termText() method
490 // and switch entirely to char[] termBuffer we don't need
491 // to use this method anymore
492 private void initTermBuffer() {
493 if (termBuffer == null) {
494 if (termText == null) {
495 termBuffer = new char[MIN_BUFFER_SIZE];
496 termLength = 0;
497 } else {
498 int length = termText.length();
499 if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE;
500 termBuffer = new char[length];
501 termLength = termText.length();
502 termText.getChars(0, termText.length(), termBuffer, 0);
503 termText = null;
504 }
505 } else if (termText != null)
506 termText = null;
507 }
508
509 /** Return number of valid characters (length of the term)
510 * in the termBuffer array. */
511 public final int termLength() {
512 initTermBuffer();
513 return termLength;
514 }
515
516 /** Set number of valid characters (length of the term) in
517 * the termBuffer array. Use this to truncate the termBuffer
518 * or to synchronize with external manipulation of the termBuffer.
519 * Note: to grow the size of the array,
520 * use {@link #resizeTermBuffer(int)} first.
521 * @param length the truncated length
522 */
523 public final void setTermLength(int length) {
524 initTermBuffer();
525 if (length > termBuffer.length)
526 throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
527 termLength = length;
528 }
529
530 /** Returns this Token's starting offset, the position of the first character
531 corresponding to this token in the source text.
532
533 Note that the difference between endOffset() and startOffset() may not be
534 equal to termText.length(), as the term text may have been altered by a
535 stemmer or some other filter. */
536 public final int startOffset() {
537 return startOffset;
538 }
539
540 /** Set the starting offset.
541 @see #startOffset() */
542 public void setStartOffset(int offset) {
543 this.startOffset = offset;
544 }
545
546 /** Returns this Token's ending offset, one greater than the position of the
547 last character corresponding to this token in the source text. The length
548 of the token in the source text is (endOffset - startOffset). */
549 public final int endOffset() {
550 return endOffset;
551 }
552
553 /** Set the ending offset.
554 @see #endOffset() */
555 public void setEndOffset(int offset) {
556 this.endOffset = offset;
557 }
558
559 /** Returns this Token's lexical type. Defaults to "word". */
560 public final String type() {
561 return type;
562 }
563
564 /** Set the lexical type.
565 @see #type() */
566 public final void setType(String type) {
567 this.type = type;
568 }
569
570 /**
571 * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
572 * <p/>
573 *
574 * Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes.
575 * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
576 *
577 *
578 * @return The bits
579 */
580 public int getFlags() {
581 return flags;
582 }
583
584 /**
585 * @see #getFlags()
586 */
587 public void setFlags(int flags) {
588 this.flags = flags;
589 }
590
591 /**
592 * Returns this Token's payload.
593 */
594 public Payload getPayload() {
595 return this.payload;
596 }
597
598 /**
599 * Sets this Token's payload.
600 */
601 public void setPayload(Payload payload) {
602 this.payload = payload;
603 }
604
605 public String toString() {
606 StringBuffer sb = new StringBuffer();
607 sb.append('(');
608 initTermBuffer();
609 if (termBuffer == null)
610 sb.append("null");
611 else
612 sb.append(termBuffer, 0, termLength);
613 sb.append(',').append(startOffset).append(',').append(endOffset);
614 if (!type.equals("word"))
615 sb.append(",type=").append(type);
616 if (positionIncrement != 1)
617 sb.append(",posIncr=").append(positionIncrement);
618 sb.append(')');
619 return sb.toString();
620 }
621
622 /** Resets the term text, payload, flags, and positionIncrement to default.
623 * Other fields such as startOffset, endOffset and the token type are
624 * not reset since they are normally overwritten by the tokenizer. */
625 public void clear() {
626 payload = null;
627 // Leave termBuffer to allow re-use
628 termLength = 0;
629 termText = null;
630 positionIncrement = 1;
631 flags = 0;
632 // startOffset = endOffset = 0;
633 // type = DEFAULT_TYPE;
634 }
635
636 public Object clone() {
637 try {
638 Token t = (Token)super.clone();
639 // Do a deep clone
640 if (termBuffer != null) {
641 t.termBuffer = (char[]) termBuffer.clone();
642 }
643 if (payload != null) {
644 t.setPayload((Payload) payload.clone());
645 }
646 return t;
647 } catch (CloneNotSupportedException e) {
648 throw new RuntimeException(e); // shouldn't happen
649 }
650 }
651
652 /** Makes a clone, but replaces the term buffer &
653 * start/end offset in the process. This is more
654 * efficient than doing a full clone (and then calling
655 * setTermBuffer) because it saves a wasted copy of the old
656 * termBuffer. */
657 public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
658 final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
659 t.positionIncrement = positionIncrement;
660 t.flags = flags;
661 t.type = type;
662 if (payload != null)
663 t.payload = (Payload) payload.clone();
664 return t;
665 }
666
667 public boolean equals(Object obj) {
668 if (obj == this)
669 return true;
670
671 if (obj instanceof Token) {
672 Token other = (Token) obj;
673
674 initTermBuffer();
675 other.initTermBuffer();
676
677 if (termLength == other.termLength &&
678 startOffset == other.startOffset &&
679 endOffset == other.endOffset &&
680 flags == other.flags &&
681 positionIncrement == other.positionIncrement &&
682 subEqual(type, other.type) &&
683 subEqual(payload, other.payload)) {
684 for(int i=0;i<termLength;i++)
685 if (termBuffer[i] != other.termBuffer[i])
686 return false;
687 return true;
688 } else
689 return false;
690 } else
691 return false;
692 }
693
694 private boolean subEqual(Object o1, Object o2) {
695 if (o1 == null)
696 return o2 == null;
697 else
698 return o1.equals(o2);
699 }
700
701 public int hashCode() {
702 initTermBuffer();
703 int code = termLength;
704 code = code * 31 + startOffset;
705 code = code * 31 + endOffset;
706 code = code * 31 + flags;
707 code = code * 31 + positionIncrement;
708 code = code * 31 + type.hashCode();
709 code = (payload == null ? code : code * 31 + payload.hashCode());
710 code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
711 return code;
712 }
713
714 // like clear() but doesn't clear termBuffer/text
715 private void clearNoTermBuffer() {
716 payload = null;
717 positionIncrement = 1;
718 flags = 0;
719 }
720
721 /** Shorthand for calling {@link #clear},
722 * {@link #setTermBuffer(char[], int, int)},
723 * {@link #setStartOffset},
724 * {@link #setEndOffset},
725 * {@link #setType}
726 * @return this Token instance */
727 public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
728 clearNoTermBuffer();
729 payload = null;
730 positionIncrement = 1;
731 setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
732 startOffset = newStartOffset;
733 endOffset = newEndOffset;
734 type = newType;
735 return this;
736 }
737
738 /** Shorthand for calling {@link #clear},
739 * {@link #setTermBuffer(char[], int, int)},
740 * {@link #setStartOffset},
741 * {@link #setEndOffset}
742 * {@link #setType} on Token.DEFAULT_TYPE
743 * @return this Token instance */
744 public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
745 clearNoTermBuffer();
746 setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
747 startOffset = newStartOffset;
748 endOffset = newEndOffset;
749 type = DEFAULT_TYPE;
750 return this;
751 }
752
753 /** Shorthand for calling {@link #clear},
754 * {@link #setTermBuffer(String)},
755 * {@link #setStartOffset},
756 * {@link #setEndOffset}
757 * {@link #setType}
758 * @return this Token instance */
759 public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
760 clearNoTermBuffer();
761 setTermBuffer(newTerm);
762 startOffset = newStartOffset;
763 endOffset = newEndOffset;
764 type = newType;
765 return this;
766 }
767
768 /** Shorthand for calling {@link #clear},
769 * {@link #setTermBuffer(String, int, int)},
770 * {@link #setStartOffset},
771 * {@link #setEndOffset}
772 * {@link #setType}
773 * @return this Token instance */
774 public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
775 clearNoTermBuffer();
776 setTermBuffer(newTerm, newTermOffset, newTermLength);
777 startOffset = newStartOffset;
778 endOffset = newEndOffset;
779 type = newType;
780 return this;
781 }
782
783 /** Shorthand for calling {@link #clear},
784 * {@link #setTermBuffer(String)},
785 * {@link #setStartOffset},
786 * {@link #setEndOffset}
787 * {@link #setType} on Token.DEFAULT_TYPE
788 * @return this Token instance */
789 public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
790 clearNoTermBuffer();
791 setTermBuffer(newTerm);
792 startOffset = newStartOffset;
793 endOffset = newEndOffset;
794 type = DEFAULT_TYPE;
795 return this;
796 }
797
798 /** Shorthand for calling {@link #clear},
799 * {@link #setTermBuffer(String, int, int)},
800 * {@link #setStartOffset},
801 * {@link #setEndOffset}
802 * {@link #setType} on Token.DEFAULT_TYPE
803 * @return this Token instance */
804 public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
805 clearNoTermBuffer();
806 setTermBuffer(newTerm, newTermOffset, newTermLength);
807 startOffset = newStartOffset;
808 endOffset = newEndOffset;
809 type = DEFAULT_TYPE;
810 return this;
811 }
812
813 /**
814 * Copy the prototype token's fields into this one. Note: Payloads are shared.
815 * @param prototype
816 */
817 public void reinit(Token prototype) {
818 prototype.initTermBuffer();
819 setTermBuffer(prototype.termBuffer, 0, prototype.termLength);
820 positionIncrement = prototype.positionIncrement;
821 flags = prototype.flags;
822 startOffset = prototype.startOffset;
823 endOffset = prototype.endOffset;
824 type = prototype.type;
825 payload = prototype.payload;
826 }
827
828 /**
829 * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
830 * @param prototype
831 * @param newTerm
832 */
833 public void reinit(Token prototype, String newTerm) {
834 setTermBuffer(newTerm);
835 positionIncrement = prototype.positionIncrement;
836 flags = prototype.flags;
837 startOffset = prototype.startOffset;
838 endOffset = prototype.endOffset;
839 type = prototype.type;
840 payload = prototype.payload;
841 }
842
843 /**
844 * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
845 * @param prototype
846 * @param newTermBuffer
847 * @param offset
848 * @param length
849 */
850 public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
851 setTermBuffer(newTermBuffer, offset, length);
852 positionIncrement = prototype.positionIncrement;
853 flags = prototype.flags;
854 startOffset = prototype.startOffset;
855 endOffset = prototype.endOffset;
856 type = prototype.type;
857 payload = prototype.payload;
858 }
859 }