1 package org.apache.lucene.document;
2
3 /**
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.search.Hits;
22 import org.apache.lucene.search.Searcher;
23
24 import java.util.*; // for javadoc
25
26 /** Documents are the unit of indexing and search.
27 *
28 * A Document is a set of fields. Each field has a name and a textual value.
29 * A field may be {@link Fieldable#isStored() stored} with the document, in which
30 * case it is returned with search hits on the document. Thus each document
31 * should typically contain one or more stored fields which uniquely identify
32 * it.
33 *
34 * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
35 * <i>not</i> available in documents retrieved from the index, e.g. with {@link
36 * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link
37 * IndexReader#document(int)}.
38 */
39
40 public final class Document implements java.io.Serializable {
41 List fields = new ArrayList();
42 private float boost = 1.0f;
43
44 /** Constructs a new document with no fields. */
45 public Document() {}
46
47
48 /** Sets a boost factor for hits on any field of this document. This value
49 * will be multiplied into the score of all hits on this document.
50 *
51 * <p>The default value is 1.0.
52 *
53 * <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
54 * each field in this document. Thus, this method in effect sets a default
55 * boost for the fields of this document.
56 *
57 * @see Fieldable#setBoost(float)
58 */
59 public void setBoost(float boost) {
60 this.boost = boost;
61 }
62
63 /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
64 *
65 * <p>Note that once a document is indexed this value is no longer available
66 * from the index. At search time, for retrieved documents, this method always
67 * returns 1. This however does not mean that the boost value set at indexing
68 * time was ignored - it was just combined with other indexing time factors and
69 * stored elsewhere, for better indexing and search performance. (For more
70 * information see the "norm(t,d)" part of the scoring formula in
71 * {@link org.apache.lucene.search.Similarity Similarity}.)
72 *
73 * @see #setBoost(float)
74 */
75 public float getBoost() {
76 return boost;
77 }
78
79 /**
80 * <p>Adds a field to a document. Several fields may be added with
81 * the same name. In this case, if the fields are indexed, their text is
82 * treated as though appended for the purposes of search.</p>
83 * <p> Note that add like the removeField(s) methods only makes sense
84 * prior to adding a document to an index. These methods cannot
85 * be used to change the content of an existing index! In order to achieve this,
86 * a document has to be deleted from an index and a new changed version of that
87 * document has to be added.</p>
88 */
89 public final void add(Fieldable field) {
90 fields.add(field);
91 }
92
93 /**
94 * <p>Removes field with the specified name from the document.
95 * If multiple fields exist with this name, this method removes the first field that has been added.
96 * If there is no field with the specified name, the document remains unchanged.</p>
97 * <p> Note that the removeField(s) methods like the add method only make sense
98 * prior to adding a document to an index. These methods cannot
99 * be used to change the content of an existing index! In order to achieve this,
100 * a document has to be deleted from an index and a new changed version of that
101 * document has to be added.</p>
102 */
103 public final void removeField(String name) {
104 Iterator it = fields.iterator();
105 while (it.hasNext()) {
106 Fieldable field = (Fieldable)it.next();
107 if (field.name().equals(name)) {
108 it.remove();
109 return;
110 }
111 }
112 }
113
114 /**
115 * <p>Removes all fields with the given name from the document.
116 * If there is no field with the specified name, the document remains unchanged.</p>
117 * <p> Note that the removeField(s) methods like the add method only make sense
118 * prior to adding a document to an index. These methods cannot
119 * be used to change the content of an existing index! In order to achieve this,
120 * a document has to be deleted from an index and a new changed version of that
121 * document has to be added.</p>
122 */
123 public final void removeFields(String name) {
124 Iterator it = fields.iterator();
125 while (it.hasNext()) {
126 Fieldable field = (Fieldable)it.next();
127 if (field.name().equals(name)) {
128 it.remove();
129 }
130 }
131 }
132
133 /** Returns a field with the given name if any exist in this document, or
134 * null. If multiple fields exists with this name, this method returns the
135 * first value added.
136 * Do not use this method with lazy loaded fields.
137 */
138 public final Field getField(String name) {
139 for (int i = 0; i < fields.size(); i++) {
140 Field field = (Field)fields.get(i);
141 if (field.name().equals(name))
142 return field;
143 }
144 return null;
145 }
146
147
148 /** Returns a field with the given name if any exist in this document, or
149 * null. If multiple fields exists with this name, this method returns the
150 * first value added.
151 */
152 public Fieldable getFieldable(String name) {
153 for (int i = 0; i < fields.size(); i++) {
154 Fieldable field = (Fieldable)fields.get(i);
155 if (field.name().equals(name))
156 return field;
157 }
158 return null;
159 }
160
161 /** Returns the string value of the field with the given name if any exist in
162 * this document, or null. If multiple fields exist with this name, this
163 * method returns the first value added. If only binary fields with this name
164 * exist, returns null.
165 */
166 public final String get(String name) {
167 for (int i = 0; i < fields.size(); i++) {
168 Fieldable field = (Fieldable)fields.get(i);
169 if (field.name().equals(name) && (!field.isBinary()))
170 return field.stringValue();
171 }
172 return null;
173 }
174
175 /** Returns an Enumeration of all the fields in a document.
176 * @deprecated use {@link #getFields()} instead
177 */
178 public final Enumeration fields() {
179 return new Enumeration() {
180 final Iterator iter = fields.iterator();
181 public boolean hasMoreElements() {
182 return iter.hasNext();
183 }
184 public Object nextElement() {
185 return iter.next();
186 }
187 };
188 }
189
190 /** Returns a List of all the fields in a document.
191 * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
192 * <i>not</i> available in documents retrieved from the index, e.g. with {@link
193 * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link IndexReader#document(int)}.
194 */
195 public final List getFields() {
196 return fields;
197 }
198
199 /**
200 * Returns an array of {@link Field}s with the given name.
201 * This method can return <code>null</code>.
202 * Do not use with lazy loaded fields.
203 *
204 * @param name the name of the field
205 * @return a <code>Field[]</code> array
206 */
207 public final Field[] getFields(String name) {
208 List result = new ArrayList();
209 for (int i = 0; i < fields.size(); i++) {
210 Field field = (Field)fields.get(i);
211 if (field.name().equals(name)) {
212 result.add(field);
213 }
214 }
215
216 if (result.size() == 0)
217 return null;
218
219 return (Field[])result.toArray(new Field[result.size()]);
220 }
221
222
223 /**
224 * Returns an array of {@link Fieldable}s with the given name.
225 * This method can return <code>null</code>.
226 *
227 * @param name the name of the field
228 * @return a <code>Fieldable[]</code> array or <code>null</code>
229 */
230 public Fieldable[] getFieldables(String name) {
231 List result = new ArrayList();
232 for (int i = 0; i < fields.size(); i++) {
233 Fieldable field = (Fieldable)fields.get(i);
234 if (field.name().equals(name)) {
235 result.add(field);
236 }
237 }
238
239 if (result.size() == 0)
240 return null;
241
242 return (Fieldable[])result.toArray(new Fieldable[result.size()]);
243 }
244
245
246 /**
247 * Returns an array of values of the field specified as the method parameter.
248 * This method can return <code>null</code>.
249 *
250 * @param name the name of the field
251 * @return a <code>String[]</code> of field values or <code>null</code>
252 */
253 public final String[] getValues(String name) {
254 List result = new ArrayList();
255 for (int i = 0; i < fields.size(); i++) {
256 Fieldable field = (Fieldable)fields.get(i);
257 if (field.name().equals(name) && (!field.isBinary()))
258 result.add(field.stringValue());
259 }
260
261 if (result.size() == 0)
262 return null;
263
264 return (String[])result.toArray(new String[result.size()]);
265 }
266
267 /**
268 * Returns an array of byte arrays for of the fields that have the name specified
269 * as the method parameter. This method will return <code>null</code> if no
270 * binary fields with the specified name are available.
271 *
272 * @param name the name of the field
273 * @return a <code>byte[][]</code> of binary field values or <code>null</code>
274 */
275 public final byte[][] getBinaryValues(String name) {
276 List result = new ArrayList();
277 for (int i = 0; i < fields.size(); i++) {
278 Fieldable field = (Fieldable)fields.get(i);
279 if (field.name().equals(name) && (field.isBinary()))
280 result.add(field.binaryValue());
281 }
282
283 if (result.size() == 0)
284 return null;
285
286 return (byte[][])result.toArray(new byte[result.size()][]);
287 }
288
289 /**
290 * Returns an array of bytes for the first (or only) field that has the name
291 * specified as the method parameter. This method will return <code>null</code>
292 * if no binary fields with the specified name are available.
293 * There may be non-binary fields with the same name.
294 *
295 * @param name the name of the field.
296 * @return a <code>byte[]</code> containing the binary field value or <code>null</code>
297 */
298 public final byte[] getBinaryValue(String name) {
299 for (int i=0; i < fields.size(); i++) {
300 Fieldable field = (Fieldable)fields.get(i);
301 if (field.name().equals(name) && (field.isBinary()))
302 return field.binaryValue();
303 }
304 return null;
305 }
306
307 /** Prints the fields of a document for human consumption. */
308 public final String toString() {
309 StringBuffer buffer = new StringBuffer();
310 buffer.append("Document<");
311 for (int i = 0; i < fields.size(); i++) {
312 Fieldable field = (Fieldable)fields.get(i);
313 buffer.append(field.toString());
314 if (i != fields.size()-1)
315 buffer.append(" ");
316 }
317 buffer.append(">");
318 return buffer.toString();
319 }
320 }