Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.Reader;
   21   import java.io.IOException;
   22   
   23   import org.apache.lucene.util.CloseableThreadLocal;
   24   import org.apache.lucene.store.AlreadyClosedException;
   25   
   26   /** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
   27    *  policy for extracting index terms from text.
   28    *  <p>
   29    *  Typical implementations first build a Tokenizer, which breaks the stream of
   30    *  characters from the Reader into raw Tokens.  One or more TokenFilters may
   31    *  then be applied to the output of the Tokenizer.
   32    */
   33   public abstract class Analyzer {
   34     /** Creates a TokenStream which tokenizes all the text in the provided
   35      * Reader.  Must be able to handle null field name for backward compatibility.
   36      */
   37     public abstract TokenStream tokenStream(String fieldName, Reader reader);
   38   
   39     /** Creates a TokenStream that is allowed to be re-used
   40      *  from the previous time that the same thread called
   41      *  this method.  Callers that do not need to use more
   42      *  than one TokenStream at the same time from this
   43      *  analyzer should use this method for better
   44      *  performance.
   45      */
   46     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
   47       return tokenStream(fieldName, reader);
   48     }
   49   
   50     private CloseableThreadLocal tokenStreams = new CloseableThreadLocal();
   51   
   52     /** Used by Analyzers that implement reusableTokenStream
   53      *  to retrieve previously saved TokenStreams for re-use
   54      *  by the same thread. */
   55     protected Object getPreviousTokenStream() {
   56       try {
   57         return tokenStreams.get();
   58       } catch (NullPointerException npe) {
   59         if (tokenStreams == null) {
   60           throw new AlreadyClosedException("this Analyzer is closed");
   61         } else {
   62           throw npe;
   63         }
   64       }
   65     }
   66   
   67     /** Used by Analyzers that implement reusableTokenStream
   68      *  to save a TokenStream for later re-use by the same
   69      *  thread. */
   70     protected void setPreviousTokenStream(Object obj) {
   71       try {
   72         tokenStreams.set(obj);
   73       } catch (NullPointerException npe) {
   74         if (tokenStreams == null) {
   75           throw new AlreadyClosedException("this Analyzer is closed");
   76         } else {
   77           throw npe;
   78         }
   79       }
   80     }
   81   
   82   
   83     /**
   84      * Invoked before indexing a Fieldable instance if
   85      * terms have already been added to that field.  This allows custom
   86      * analyzers to place an automatic position increment gap between
   87      * Fieldable instances using the same field name.  The default value
   88      * position increment gap is 0.  With a 0 position increment gap and
   89      * the typical default token position increment of 1, all terms in a field,
   90      * including across Fieldable instances, are in successive positions, allowing
   91      * exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
   92      *
   93      * @param fieldName Fieldable name being indexed.
   94      * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
   95      */
   96     public int getPositionIncrementGap(String fieldName)
   97     {
   98       return 0;
   99     }
  100   
  101     /** Frees persistent resources used by this Analyzer */
  102     public void close() {
  103       tokenStreams.close();
  104       tokenStreams = null;
  105     }
  106   }

Save This Page
Home » lucene-2.4.1-src » org.apache » lucene » analysis » [javadoc | source]