Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.index.Payload;
   21   
   22   import java.io.IOException;
   23   
   24   /** A TokenStream enumerates the sequence of tokens, either from
   25     fields of a document or from query text.
   26     <p>
   27     This is an abstract class.  Concrete subclasses are:
   28     <ul>
   29     <li>{@link Tokenizer}, a TokenStream
   30     whose input is a Reader; and
   31     <li>{@link TokenFilter}, a TokenStream
   32     whose input is another TokenStream.
   33     </ul>
   34     NOTE: subclasses must override {@link #next(Token)}.  It's
   35     also OK to instead override {@link #next()} but that
   36     method is now deprecated in favor of {@link #next(Token)}.
   37     */
   38   
   39   public abstract class TokenStream {
   40   
   41     /** Returns the next token in the stream, or null at EOS.
   42      *  @deprecated The returned Token is a "full private copy" (not
   43      *  re-used across calls to next()) but will be slower
   44      *  than calling {@link #next(Token)} instead.. */
   45     public Token next() throws IOException {
   46       final Token reusableToken = new Token();
   47       Token nextToken = next(reusableToken);
   48   
   49       if (nextToken != null) {
   50         Payload p = nextToken.getPayload();
   51         if (p != null) {
   52           nextToken.setPayload((Payload) p.clone());
   53         }
   54       }
   55   
   56       return nextToken;
   57     }
   58   
   59     /** Returns the next token in the stream, or null at EOS.
   60      *  When possible, the input Token should be used as the
   61      *  returned Token (this gives fastest tokenization
   62      *  performance), but this is not required and a new Token
   63      *  may be returned. Callers may re-use a single Token
   64      *  instance for successive calls to this method.
   65      *  <p>
   66      *  This implicitly defines a "contract" between 
   67      *  consumers (callers of this method) and 
   68      *  producers (implementations of this method 
   69      *  that are the source for tokens):
   70      *  <ul>
   71      *   <li>A consumer must fully consume the previously 
   72      *       returned Token before calling this method again.</li>
   73      *   <li>A producer must call {@link Token#clear()}
   74      *       before setting the fields in it & returning it</li>
   75      *  </ul>
   76      *  Also, the producer must make no assumptions about a
   77      *  Token after it has been returned: the caller may
   78      *  arbitrarily change it.  If the producer needs to hold
   79      *  onto the token for subsequent calls, it must clone()
   80      *  it before storing it.
   81      *  Note that a {@link TokenFilter} is considered a consumer.
   82      *  @param reusableToken a Token that may or may not be used to
   83      *  return; this parameter should never be null (the callee
   84      *  is not required to check for null before using it, but it is a
   85      *  good idea to assert that it is not null.)
   86      *  @return next token in the stream or null if end-of-stream was hit
   87      */
   88     public Token next(final Token reusableToken) throws IOException {
   89       // We don't actually use inputToken, but still add this assert
   90       assert reusableToken != null;
   91       return next();
   92     }
   93   
   94     /** Resets this stream to the beginning. This is an
   95      *  optional operation, so subclasses may or may not
   96      *  implement this method. Reset() is not needed for
   97      *  the standard indexing process. However, if the Tokens 
   98      *  of a TokenStream are intended to be consumed more than 
   99      *  once, it is necessary to implement reset().  Note that
  100      *  if your TokenStream caches tokens and feeds them back
  101      *  again after a reset, it is imperative that you
  102      *  clone the tokens when you store them away (on the
  103      *  first pass) as well as when you return them (on future
  104      *  passes after reset()).
  105      */
  106     public void reset() throws IOException {}
  107     
  108     /** Releases resources associated with this stream. */
  109     public void close() throws IOException {}
  110   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » analysis » [javadoc | source]