Source code: com/RuntimeCollective/search/bean/SearchCriterion.java
1 /* $Header: /home/CVS/rjp/src/com/RuntimeCollective/search/bean/SearchCriterion.java,v 1.16 2003/09/30 15:12:57 joe Exp $
2 * $Revision: 1.16 $
3 * $Date: 2003/09/30 15:12:57 $
4 *
5 * ====================================================================
6 *
7 * Josephine : http://www.runtime-collective.com/josephine/index.html
8 *
9 * Copyright (C) 2003 Runtime Collective
10 *
11 * This product includes software developed by the
12 * Apache Software Foundation (http://www.apache.org/).
13 *
14 * This library is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU Lesser General Public
16 * License as published by the Free Software Foundation; either
17 * version 2.1 of the License, or (at your option) any later version.
18 *
19 * This library is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 *
28 */
29
30 package com.RuntimeCollective.search.bean;
31
32 import java.util.StringTokenizer;
33
34 import com.RuntimeCollective.webapps.RuntimeParameters;
35
36 import org.apache.oro.text.regex.Util;
37 import org.apache.oro.text.regex.Perl5Matcher;
38 import org.apache.oro.text.regex.Perl5Compiler;
39 import org.apache.oro.text.regex.Perl5Pattern;
40 import org.apache.oro.text.regex.Pattern;
41 import org.apache.oro.text.regex.StringSubstitution;
42 import org.apache.oro.text.regex.MalformedPatternException;
43
44 import org.apache.log4j.*;
45 import org.apache.log4j.xml.*;
46 import org.apache.log4j.net.*;
47
48 /**
49 * One criterion to search by.
50 * Construct a new <code>Criterion<code>, giving the string to search for, e.g. "fish rubbish goat". Enclose sentences in quotes.
51 * You add criteria to a <code>Search</code>, before calling Search's <code>getResults()</code> method.
52 *
53 * @author Joe Holmberg
54 * @version $Id: SearchCriterion.java,v 1.16 2003/09/30 15:12:57 joe Exp $
55 */
56 public class SearchCriterion {
57
58 /**
59 * These are the current reserved characters used by Lucene. Any search term
60 * containing these will cause a problem. Therefore every character listed
61 * below gets escaped ('\' is placed infront of it). It is important that
62 * '\' comes at the beginning of the array otherwise the '\' used to escaped
63 * the other characters will themselves get escaped, which is not the desired
64 * behaviour.
65 */
66 public static final char[] SPECIAL_CHARACTERS =
67 new char[]{'\\', '+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']',
68 '^', '~', '*', '?', ':'};
69 // FR: '"' was in the special characters, that messed up phrases, so removed
70
71 protected String criterion;
72 protected boolean findAllWords;
73 protected String field;
74 protected int weight = DEFAULT_WEIGHT;
75
76 protected static final int DEFAULT_WEIGHT = 1;
77
78 /**
79 * Make a new SearchCriterion, searching for <b>any</b> of <code>criterion</code>
80 * @param criterion The keywords, or phrase, to search for
81 */
82 public SearchCriterion(String criterion) {
83 this(criterion, false);
84 }
85
86 /**
87 * Make a new SearchCriterion
88 * @param criterion The keywords, or phrase, to search for
89 * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
90 */
91 public SearchCriterion(String criterion, boolean findAllWords) {
92 this(criterion, findAllWords, "");
93 }
94
95
96 /**
97 * Make a new SearchCriterion
98 *
99 * @param criterion The keywords, or phrase, to search for
100 * @param field Restrict the criteria to the specified <code>Field</code>.
101 */
102 public SearchCriterion(String criterion, String field) {
103 this(criterion, false, field);
104 }
105
106
107 /**
108 * Make a new SearchCriterion
109 * @param criterion The keywords, or phrase, to search for
110 * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
111 * @param field Restrict these criterion to the specified <code>Field</code>
112 */
113 public SearchCriterion(String criterion, boolean findAllWords, String field) {
114 setCriterion(criterion);
115 setFindAllWords(findAllWords);
116 setField(field);
117 }
118
119 /**
120 * Make a new SearchCriterion
121 * @param criterion The keywords, or phrase, to search for
122 * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
123 * @param field Restrict these criterion to the specified <code>Field</code>
124 * @param weight The weight
125 */
126 public SearchCriterion(String criterion, boolean findAllWords, String field, int weight) {
127 this(criterion, findAllWords, field);
128 setWeight(weight);
129 }
130
131 /** Get the keywords, or phrase, to search for */
132 public String getCriterion() { return this.criterion; }
133 /** Set the keywords, or phrase, to search for. Also cleans the given criterion of disallowed characters - see <code>cleanCriterion</code> */
134 public void setCriterion(String criterion) {
135 this.criterion = cleanCriterion(criterion);
136 }
137
138 /** Get Whether to search for all of these words (true), or any of them (false) */
139 public boolean getFindAllWords() { return this.findAllWords; }
140 /** Set Whether to search for all of these words (true), or any of them (false) */
141 public void setFindAllWords(boolean findAllWords) { this.findAllWords = findAllWords; }
142
143 /** Get the Field to limit this criterion to (optional) */
144 public String getField() { return this.field; }
145 /** Set the Field to limit this criterion to (optional) */
146 public void setField(String field) { this.field = field; }
147
148 /** Get the Weight, default to DEFAULT_WEIGHT */
149 public int getWeight() { return this.weight; }
150 /** Set the Weight, default to DEFAULT_WEIGHT */
151 public void setWeight(int weight) { this.weight = weight; }
152
153 /**
154 * Returns a Lucene query string for searching for the given criteria
155 */
156 public String getQuery() {
157
158 // Ticket #4119 : if the criterion is null (eg "??" was typed), return an empty string
159 String crit = getCriterion();
160 //RuntimeParameters.logDebug(this, "Crit: "+crit+" .");
161 if ((crit == null) || (crit.equals("")))
162 return "";
163
164 // Whether we're inside a set of quotes
165 boolean inPhrase = false;
166
167 // Break down into tokens
168 StringBuffer query = new StringBuffer(40);
169
170 StringTokenizer st = new StringTokenizer(crit);
171 String token, newPart;
172 while (st.hasMoreTokens()) {
173 token = st.nextToken();
174
175 // Check if we're in a phrase
176 if (token.startsWith("\"")) {
177 inPhrase = true;
178
179 // Sort out field first
180 if (!field.equals("")) {
181 token = field + ":" + token;
182 }
183
184 if (findAllWords) {
185 token = "+" + token;
186 }
187 }
188
189
190 // Ticket 4098: transform "farmer's" into "farmer"
191 int apos = token.indexOf('\'');
192 if (apos > 0)
193 token = token.substring(0, apos - 1);
194
195 newPart = token;
196 if (!inPhrase) {
197
198 // Sort out field first
199 if (!field.equals("")) {
200 newPart = field + ":" + newPart;
201 }
202
203 // Now sort out findAllWords
204 if (findAllWords) {
205 newPart = "+" + newPart;
206 }
207
208 // Add "*" for partial words
209 newPart = newPart + "*";
210
211 // Add weight if necessary
212 if (getWeight() != DEFAULT_WEIGHT) {
213 newPart = newPart + "^" + getWeight();
214 }
215
216 // Add final space
217 newPart = newPart + " ";
218
219 } else {
220 // We want to treat the quoted phrase as one large token
221 query.append(" ");
222 }
223
224 if (token.endsWith("\"")) {
225 inPhrase = false;
226 newPart = newPart.substring(0, newPart.length() - 1) + "*\"";
227
228 // Add weight if necessary
229 if (getWeight() != DEFAULT_WEIGHT) {
230 newPart = newPart + "^" + getWeight();
231 }
232
233 // Add final space
234 newPart = newPart + " ";
235 }
236
237 // RuntimeParameters.logDebug(this, "New token : " + newPart);
238 query.append(newPart);
239 }
240
241 return query.toString().trim();
242 }
243
244 // Returns a String array of all the characters that will be replaced with a space
245 /*
246 protected static char[] getBadSpaceCharacters() {
247 return new char[]{ stuff }
248 }
249 */
250
251
252
253 /**
254 * Replaces +, -, :, ( and ) with a space in the given String,
255 * using Oro regular expressions
256 * @param criterion the String to remove characters from
257 * @return the String, with these characters replaced with a space
258 */
259 public static String cleanCriterion(String criterion) {
260
261 // Remove all naughty characters (+, - etc.) from the criteria
262 String munged = criterion.toLowerCase();
263
264 Perl5Compiler compiler = new Perl5Compiler();
265 Perl5Matcher matcher = new Perl5Matcher();
266 StringSubstitution space = new StringSubstitution(" ");
267
268 try {
269 for (int i = 0; i < SPECIAL_CHARACTERS.length; i++) {
270 String pt = "\\" + SPECIAL_CHARACTERS[i];
271 Pattern pattern = compiler.compile(pt);
272
273 // munged = Util.substitute(matcher, pattern, space, munged,
274 // Util.SUBSTITUTE_ALL);
275
276 // Instead of replacing character with a space we escape it.
277 StringSubstitution replacement = new StringSubstitution(pt);
278 munged = Util.substitute(matcher, pattern, replacement, munged,
279 Util.SUBSTITUTE_ALL);
280 }
281
282 criterion = munged;
283 }
284 catch (MalformedPatternException e) {
285 // This is screwed... we didn't expect THAT!
286 RuntimeParameters.logDebug("SearchCriterion", "COULDN'T clean... bad " +
287 "regular expression!" + e);
288 }
289
290 // Ticket #4119 : trimming
291 criterion = criterion.trim();
292
293 // If there is an open quote, close it so lucene's parser doesn't barf
294 if (numberOfDoubleQuotes(criterion) % 2 == 1) {
295 criterion = criterion + "\"";
296 }
297
298 return criterion;
299
300 }
301
302 private static int numberOfDoubleQuotes (String string) {
303 int count = 0;
304 int pos = string.indexOf("\"", 0);
305
306 while (pos != -1) {
307 count++;
308 pos = string.indexOf("\"", ++pos);
309 }
310
311 return count;
312 }
313
314 public static void main(String[] args) {
315 RuntimeParameters.setLog4jLogging(false);
316 RuntimeParameters.setLogLevel(RuntimeParameters.LOG_LEVEL_DEBUG);
317
318 SearchCriterion sc = new SearchCriterion("\"some phrase\"", true);
319 RuntimeParameters.log(SearchCriterion.class.getName(), "Query : " + sc.getQuery());
320 }
321
322 }