Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/RuntimeCollective/search/bean/SearchCriterion.java


1   /* $Header: /home/CVS/rjp/src/com/RuntimeCollective/search/bean/SearchCriterion.java,v 1.16 2003/09/30 15:12:57 joe Exp $
2    * $Revision: 1.16 $
3    * $Date: 2003/09/30 15:12:57 $
4    *
5    * ====================================================================
6    *
7    * Josephine : http://www.runtime-collective.com/josephine/index.html
8    *
9    * Copyright (C) 2003 Runtime Collective
10   * 
11   * This product includes software developed by the
12   * Apache Software Foundation (http://www.apache.org/).
13   *
14   * This library is free software; you can redistribute it and/or
15   * modify it under the terms of the GNU Lesser General Public
16   * License as published by the Free Software Foundation; either
17   * version 2.1 of the License, or (at your option) any later version.
18   *
19   * This library is distributed in the hope that it will be useful,
20   * but WITHOUT ANY WARRANTY; without even the implied warranty of
21   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   * Lesser General Public License for more details.
23   *
24   * You should have received a copy of the GNU Lesser General Public
25   * License along with this library; if not, write to the Free Software
26   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27   *
28   */
29  
30  package com.RuntimeCollective.search.bean;
31  
32  import java.util.StringTokenizer;
33  
34  import com.RuntimeCollective.webapps.RuntimeParameters;
35  
36  import org.apache.oro.text.regex.Util;
37  import org.apache.oro.text.regex.Perl5Matcher;
38  import org.apache.oro.text.regex.Perl5Compiler;
39  import org.apache.oro.text.regex.Perl5Pattern;
40  import org.apache.oro.text.regex.Pattern;
41  import org.apache.oro.text.regex.StringSubstitution;
42  import org.apache.oro.text.regex.MalformedPatternException;
43  
44  import org.apache.log4j.*;
45  import org.apache.log4j.xml.*;
46  import org.apache.log4j.net.*;
47  
48  /**
49   * One criterion to search by.
50   * Construct a new <code>Criterion<code>, giving the string to search for, e.g. "fish rubbish goat".  Enclose sentences in quotes.
51   * You add criteria to a <code>Search</code>, before calling Search's <code>getResults()</code> method.
52   *
53   * @author Joe Holmberg
54   * @version $Id: SearchCriterion.java,v 1.16 2003/09/30 15:12:57 joe Exp $
55   */
56  public class SearchCriterion {
57  
58      /**
59       * These are the current reserved characters used by Lucene. Any search term
60       * containing these will cause a problem. Therefore every character listed
61       * below gets escaped ('\' is placed infront of it). It is important that
62       * '\' comes at the beginning of the array otherwise the '\' used to escaped
63       * the other characters will themselves get escaped, which is not the desired
64       * behaviour.
65       */
66      public static final char[] SPECIAL_CHARACTERS =
67          new char[]{'\\', '+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']',
68                     '^', '~', '*', '?', ':'};
69      // FR: '"' was in the special characters, that messed up phrases, so removed
70      
71      protected String criterion;
72      protected boolean findAllWords;
73      protected String field;
74      protected int weight = DEFAULT_WEIGHT;
75  
76      protected static final int DEFAULT_WEIGHT = 1;
77  
78    /**
79     * Make a new SearchCriterion, searching for <b>any</b> of <code>criterion</code>
80     * @param criterion The keywords, or phrase, to search for
81     */
82    public SearchCriterion(String criterion) {
83      this(criterion, false);
84    }
85  
86    /**
87     * Make a new SearchCriterion
88     * @param criterion The keywords, or phrase, to search for
89     * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
90     */
91    public SearchCriterion(String criterion, boolean findAllWords) {
92      this(criterion, findAllWords, "");
93    }
94  
95  
96    /**
97     * Make a new SearchCriterion
98     *
99     * @param criterion The keywords, or phrase, to search for
100    * @param field Restrict the criteria to the specified <code>Field</code>.
101    */
102   public SearchCriterion(String criterion, String field) {
103     this(criterion, false, field);
104   }
105 
106 
107   /**
108    * Make a new SearchCriterion
109    * @param criterion The keywords, or phrase, to search for
110    * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
111    * @param field Restrict these criterion to the specified <code>Field</code>
112    */
113   public SearchCriterion(String criterion, boolean findAllWords, String field) {
114     setCriterion(criterion);
115     setFindAllWords(findAllWords);
116     setField(field);
117   }
118 
119   /**
120    * Make a new SearchCriterion
121    * @param criterion The keywords, or phrase, to search for
122    * @param findAllWords Whether to search for all of these words (<code>true</code>), or any of them (<code>false</code>)
123    * @param field Restrict these criterion to the specified <code>Field</code>
124    * @param weight The weight
125    */
126   public SearchCriterion(String criterion, boolean findAllWords, String field, int weight) {
127       this(criterion, findAllWords, field);
128       setWeight(weight);
129   }
130 
131   /** Get the keywords, or phrase, to search for */
132   public String getCriterion() { return this.criterion; }
133   /** Set the keywords, or phrase, to search for.  Also cleans the given criterion of disallowed characters - see <code>cleanCriterion</code> */
134   public void setCriterion(String criterion) {
135     this.criterion = cleanCriterion(criterion);
136   }
137 
138   /** Get  Whether to search for all of these words (true), or any of them (false) */
139   public boolean getFindAllWords() { return this.findAllWords; }
140   /** Set  Whether to search for all of these words (true), or any of them (false) */
141   public void setFindAllWords(boolean findAllWords) { this.findAllWords = findAllWords; }
142 
143   /** Get the Field to limit this criterion to (optional) */
144   public String getField() { return this.field; }
145   /** Set the Field to limit this criterion to (optional) */
146   public void setField(String field) { this.field = field; }
147 
148   /** Get the Weight, default to DEFAULT_WEIGHT */
149   public int getWeight() { return this.weight; }
150   /** Set the Weight, default to DEFAULT_WEIGHT */
151   public void setWeight(int weight) { this.weight = weight; }
152 
153   /**
154    * Returns a Lucene query string for searching for the given criteria
155    */
156   public String getQuery() {
157 
158     // Ticket #4119 : if the criterion is null (eg "??" was typed), return an empty string
159     String crit = getCriterion();
160     //RuntimeParameters.logDebug(this, "Crit: "+crit+" .");
161     if ((crit == null) || (crit.equals("")))
162       return "";
163 
164     // Whether we're inside a set of quotes
165     boolean inPhrase = false;
166 
167     // Break down into tokens
168     StringBuffer query = new StringBuffer(40);
169 
170     StringTokenizer st = new StringTokenizer(crit);
171     String token, newPart;
172     while (st.hasMoreTokens()) {
173       token = st.nextToken();
174 
175       // Check if we're in a phrase
176       if (token.startsWith("\"")) {
177         inPhrase = true;
178 
179         // Sort out field first
180         if (!field.equals("")) {
181           token = field + ":" + token;
182         }
183 
184         if (findAllWords) {
185           token = "+" + token;
186         }
187       }
188 
189 
190       // Ticket 4098: transform "farmer's" into "farmer"
191       int apos = token.indexOf('\'');
192       if (apos > 0)
193         token = token.substring(0, apos - 1);
194 
195       newPart = token;
196       if (!inPhrase) {
197 
198         // Sort out field first
199         if (!field.equals("")) {
200           newPart = field + ":" + newPart;
201         }
202 
203         // Now sort out findAllWords
204         if (findAllWords) {
205           newPart = "+" + newPart;
206         }
207 
208         // Add "*" for partial words
209         newPart = newPart + "*";
210 
211         // Add weight if necessary
212         if (getWeight() != DEFAULT_WEIGHT) {
213             newPart = newPart + "^" + getWeight();
214         }
215 
216         // Add final space
217         newPart = newPart + " ";
218 
219       } else {
220         // We want to treat the quoted phrase as one large token
221         query.append(" ");
222       }
223 
224       if (token.endsWith("\"")) {
225         inPhrase = false;
226         newPart = newPart.substring(0, newPart.length() - 1) + "*\"";
227 
228         // Add weight if necessary
229         if (getWeight() != DEFAULT_WEIGHT) {
230             newPart = newPart + "^" + getWeight();
231         }
232 
233         // Add final space
234         newPart = newPart + " ";
235       }
236 
237       // RuntimeParameters.logDebug(this, "New token : " + newPart);
238       query.append(newPart);
239     }
240 
241     return query.toString().trim();
242   }
243 
244   // Returns a String array of all the characters that will be replaced with a space
245   /*
246     protected static char[] getBadSpaceCharacters() {
247     return new char[]{ stuff }
248     }
249   */
250 
251 
252 
253   /**
254    * Replaces +, -, :, ( and ) with a space in the given String,
255    * using Oro regular expressions
256    * @param criterion the String to remove characters from
257    * @return the String, with these characters replaced with a space
258    */
259   public static String cleanCriterion(String criterion)  {
260 
261       // Remove all naughty characters (+, - etc.) from the criteria
262       String munged = criterion.toLowerCase();
263 
264       Perl5Compiler compiler = new Perl5Compiler();
265       Perl5Matcher matcher = new Perl5Matcher();
266       StringSubstitution space = new StringSubstitution(" ");
267       
268       try {
269           for (int i = 0; i < SPECIAL_CHARACTERS.length; i++) {
270               String pt = "\\" + SPECIAL_CHARACTERS[i];
271               Pattern pattern = compiler.compile(pt);
272               
273               //   munged = Util.substitute(matcher, pattern, space, munged,
274               //  Util.SUBSTITUTE_ALL);
275               
276               // Instead of replacing character with a space we escape it.
277               StringSubstitution replacement = new StringSubstitution(pt);
278               munged = Util.substitute(matcher, pattern, replacement, munged,
279                                        Util.SUBSTITUTE_ALL);
280           }
281           
282           criterion = munged;
283       }
284       catch (MalformedPatternException e) {
285       // This is screwed... we didn't expect THAT!
286       RuntimeParameters.logDebug("SearchCriterion", "COULDN'T clean... bad " +
287                                  "regular expression!" + e);
288     }
289     
290     // Ticket #4119 : trimming
291     criterion = criterion.trim();
292     
293     // If there is an open quote, close it so lucene's parser doesn't barf
294     if (numberOfDoubleQuotes(criterion) % 2 == 1) {
295       criterion = criterion + "\"";
296     }
297     
298     return criterion;
299     
300   }
301   
302   private static int numberOfDoubleQuotes (String string) {
303     int count = 0;
304     int pos = string.indexOf("\"", 0);
305     
306     while (pos != -1) {
307       count++;
308       pos = string.indexOf("\"", ++pos);
309     }
310     
311     return count;
312   }
313 
314   public static void main(String[] args) { 
315     RuntimeParameters.setLog4jLogging(false);
316     RuntimeParameters.setLogLevel(RuntimeParameters.LOG_LEVEL_DEBUG);
317 
318     SearchCriterion sc = new SearchCriterion("\"some phrase\"", true);
319     RuntimeParameters.log(SearchCriterion.class.getName(), "Query : " + sc.getQuery());
320   }
321 
322 }