1 package org.apache.lucene.document;
2 /**
3 * Copyright 2006 The Apache Software Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18
19 /**
20 *
21 *
22 **/
23 public abstract class AbstractField implements Fieldable {
24
25 protected String name = "body";
26 protected boolean storeTermVector = false;
27 protected boolean storeOffsetWithTermVector = false;
28 protected boolean storePositionWithTermVector = false;
29 protected boolean omitNorms = false;
30 protected boolean isStored = false;
31 protected boolean isIndexed = true;
32 protected boolean isTokenized = true;
33 protected boolean isBinary = false;
34 protected boolean isCompressed = false;
35 protected boolean lazy = false;
36 protected float boost = 1.0f;
37 // the one and only data object for all different kind of field values
38 protected Object fieldsData = null;
39
40 protected AbstractField()
41 {
42
43 }
44
45 protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
46 if (name == null)
47 throw new NullPointerException("name cannot be null");
48 this.name = name.intern(); // field names are interned
49
50 if (store == Field.Store.YES){
51 this.isStored = true;
52 this.isCompressed = false;
53 }
54 else if (store == Field.Store.COMPRESS) {
55 this.isStored = true;
56 this.isCompressed = true;
57 }
58 else if (store == Field.Store.NO){
59 this.isStored = false;
60 this.isCompressed = false;
61 }
62 else
63 throw new IllegalArgumentException("unknown store parameter " + store);
64
65 if (index == Field.Index.NO) {
66 this.isIndexed = false;
67 this.isTokenized = false;
68 } else if (index == Field.Index.TOKENIZED) {
69 this.isIndexed = true;
70 this.isTokenized = true;
71 } else if (index == Field.Index.UN_TOKENIZED) {
72 this.isIndexed = true;
73 this.isTokenized = false;
74 } else if (index == Field.Index.NO_NORMS) {
75 this.isIndexed = true;
76 this.isTokenized = false;
77 this.omitNorms = true;
78 } else {
79 throw new IllegalArgumentException("unknown index parameter " + index);
80 }
81
82 this.isBinary = false;
83
84 setStoreTermVector(termVector);
85 }
86
87 /** Sets the boost factor hits on this field. This value will be
88 * multiplied into the score of all hits on this this field of this
89 * document.
90 *
91 * <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
92 * containing this field. If a document has multiple fields with the same
93 * name, all such values are multiplied together. This product is then
94 * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
95 * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
96 * index. One should attempt to ensure that this product does not overflow
97 * the range of that encoding.
98 *
99 * @see org.apache.lucene.document.Document#setBoost(float)
100 * @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
101 * @see org.apache.lucene.search.Similarity#encodeNorm(float)
102 */
103 public void setBoost(float boost) {
104 this.boost = boost;
105 }
106
107 /** Returns the boost factor for hits for this field.
108 *
109 * <p>The default value is 1.0.
110 *
111 * <p>Note: this value is not stored directly with the document in the index.
112 * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
113 * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
114 * this field was indexed.
115 *
116 * @see #setBoost(float)
117 */
118 public float getBoost() {
119 return boost;
120 }
121
122 /** Returns the name of the field as an interned string.
123 * For example "date", "title", "body", ...
124 */
125 public String name() { return name; }
126
127 protected void setStoreTermVector(Field.TermVector termVector) {
128 if (termVector == Field.TermVector.NO) {
129 this.storeTermVector = false;
130 this.storePositionWithTermVector = false;
131 this.storeOffsetWithTermVector = false;
132 }
133 else if (termVector == Field.TermVector.YES) {
134 this.storeTermVector = true;
135 this.storePositionWithTermVector = false;
136 this.storeOffsetWithTermVector = false;
137 }
138 else if (termVector == Field.TermVector.WITH_POSITIONS) {
139 this.storeTermVector = true;
140 this.storePositionWithTermVector = true;
141 this.storeOffsetWithTermVector = false;
142 }
143 else if (termVector == Field.TermVector.WITH_OFFSETS) {
144 this.storeTermVector = true;
145 this.storePositionWithTermVector = false;
146 this.storeOffsetWithTermVector = true;
147 }
148 else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
149 this.storeTermVector = true;
150 this.storePositionWithTermVector = true;
151 this.storeOffsetWithTermVector = true;
152 }
153 else {
154 throw new IllegalArgumentException("unknown termVector parameter " + termVector);
155 }
156 }
157
158 /** True iff the value of the field is to be stored in the index for return
159 with search hits. It is an error for this to be true if a field is
160 Reader-valued. */
161 public final boolean isStored() { return isStored; }
162
163 /** True iff the value of the field is to be indexed, so that it may be
164 searched on. */
165 public final boolean isIndexed() { return isIndexed; }
166
167 /** True iff the value of the field should be tokenized as text prior to
168 indexing. Un-tokenized fields are indexed as a single word and may not be
169 Reader-valued. */
170 public final boolean isTokenized() { return isTokenized; }
171
172 /** True if the value of the field is stored and compressed within the index */
173 public final boolean isCompressed() { return isCompressed; }
174
175 /** True iff the term or terms used to index this field are stored as a term
176 * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
177 * These methods do not provide access to the original content of the field,
178 * only to terms used to index it. If the original content must be
179 * preserved, use the <code>stored</code> attribute instead.
180 *
181 * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
182 */
183 public final boolean isTermVectorStored() { return storeTermVector; }
184
185 /**
186 * True iff terms are stored as term vector together with their offsets
187 * (start and end positon in source text).
188 */
189 public boolean isStoreOffsetWithTermVector(){
190 return storeOffsetWithTermVector;
191 }
192
193 /**
194 * True iff terms are stored as term vector together with their token positions.
195 */
196 public boolean isStorePositionWithTermVector(){
197 return storePositionWithTermVector;
198 }
199
200 /** True iff the value of the filed is stored as binary */
201 public final boolean isBinary() { return isBinary; }
202
203 /** True if norms are omitted for this indexed field */
204 public boolean getOmitNorms() { return omitNorms; }
205
206 /** Expert:
207 *
208 * If set, omit normalization factors associated with this indexed field.
209 * This effectively disables indexing boosts and length normalization for this field.
210 */
211 public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
212
213 public boolean isLazy() {
214 return lazy;
215 }
216
217 /** Prints a Field for human consumption. */
218 public final String toString() {
219 StringBuffer result = new StringBuffer();
220 if (isStored) {
221 result.append("stored");
222 if (isCompressed)
223 result.append("/compressed");
224 else
225 result.append("/uncompressed");
226 }
227 if (isIndexed) {
228 if (result.length() > 0)
229 result.append(",");
230 result.append("indexed");
231 }
232 if (isTokenized) {
233 if (result.length() > 0)
234 result.append(",");
235 result.append("tokenized");
236 }
237 if (storeTermVector) {
238 if (result.length() > 0)
239 result.append(",");
240 result.append("termVector");
241 }
242 if (storeOffsetWithTermVector) {
243 if (result.length() > 0)
244 result.append(",");
245 result.append("termVectorOffsets");
246 }
247 if (storePositionWithTermVector) {
248 if (result.length() > 0)
249 result.append(",");
250 result.append("termVectorPosition");
251 }
252 if (isBinary) {
253 if (result.length() > 0)
254 result.append(",");
255 result.append("binary");
256 }
257 if (omitNorms) {
258 result.append(",omitNorms");
259 }
260 if (lazy){
261 result.append(",lazy");
262 }
263 result.append('<');
264 result.append(name);
265 result.append(':');
266
267 if (fieldsData != null && lazy == false) {
268 result.append(fieldsData);
269 }
270
271 result.append('>');
272 return result.toString();
273 }
274 }