Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/eireneh/bible/book/BookUtil.java


1   
2   package com.eireneh.bible.book;
3   
4   import java.io.*;
5   import java.util.*;
6   import java.net.*;
7   
8   import org.w3c.dom.*;
9   
10  import com.eireneh.bible.passage.*;
11  import com.eireneh.bible.util.*;
12  import com.eireneh.util.StringUtil;
13  
14  /**
15  * The BookUtil class provide utility functions for the various Books. 
16  * 
17  * <table border='1' cellPadding='3' cellSpacing='0' width="100%">
18  * <tr><td bgColor='white'class='TableRowColor'><font size='-7'>
19  * Distribution Licence:<br />
20  * Project B is free software; you can redistribute it
21  * and/or modify it under the terms of the GNU General Public License,
22  * version 2 as published by the Free Software Foundation.<br />
23  * This program is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26  * General Public License for more details.<br />
27  * The License is available on the internet
28  * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, by writing to
29  * <i>Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
30  * MA 02111-1307, USA</i>, Or locally at the Licence link below.<br />
31  * The copyright to this program is held by it's authors.
32  * </font></td></tr></table>
33  * @see <a href='http://www.eireneh.com/servlets/Web'>Project B Home</a>
34  * @see docs.Licence
35  * @author Joe Walker
36  * @version D2.I0.T0
37  */
38  public class BookUtil
39  {
40      /**
41      * Ensure we can not be instansiated
42      */
43      private BookUtil()
44      {
45      }
46  
47      /**
48      * A basic version of getPassageTally(String[]) simply calls
49      * getPassage(String) in a loop for each word, adding the Verses
50      * to an PassageTally that is returned
51      * @param version The version to search using
52      * @param tally The PassageTally to update
53      * @param words The words to search for
54      * @return The tallied Passage
55      */
56      public static void updatePassageTally(Bible version, PassageTally tally, String[] words) throws BookException
57      {
58          for (int i=0; i<words.length; i++)
59          {
60              tally.addAll(version.findPassage(words[i]));
61          }
62      }
63  
64      /**
65      * This is similar to updatePassageTally() however if a verse matches
66      * many words it still only adds on for that verse in the given tally
67      * @param version The version to search using
68      * @param tally The PassageTally to update
69      * @param words The words to search for
70      * @return The tallied Passage
71      */
72      public static void updatePassageTallyFlat(Bible version, PassageTally tally, String[] words) throws BookException
73      {
74          PassageTally temp = new PassageTally();
75  
76          for (int i=0; i<words.length; i++)
77          {
78              temp.addAll(version.findPassage(words[i]));
79          }
80  
81          temp.flatten();
82          tally.addAll(temp);
83      }
84  
85      /**
86      * A basic version of getPassage(String[]) simply calls
87      * getPassage(String) in a loop for each word, adding the Verses
88      * to an Passage that is returned
89      * @param version The version to search using
90      * @param words The words to search for
91      * @return The Passage
92      */
93      public static Passage getPassage(Bible version, String[] words) throws BookException
94      {
95          Passage ref = PassageFactory.createPassage();
96  
97          for (int i=0; i<words.length; i++)
98          {
99              ref.addAll(version.findPassage(words[i]));
100         }
101 
102         return ref;
103     }
104 
105     /**
106     * This is a helper method to detect an attribute of para=true in the
107     * first ref node in the document. Now this code looks very badly
108     * written - it is very deeply nested. 8 levels of indentation is
109     * enough to give any code analysis tool a fit. However I think it is
110     * such simple code, and the alternative does not actually make it any
111     * simpler?...
112     * @param doc The document to search
113     * @return True if this ref contains a new paragraph
114     */
115     public static boolean isNewPara(BibleEle doc)
116     {
117         // This should be the <bible> node
118         Node bible = doc.getDocument().getDocumentElement();
119 
120         // Loop through the <bible> node
121         NodeList bible_list = bible.getChildNodes();
122         for (int i=0; i<bible_list.getLength(); i++)
123         {
124             // If we have found a <section> node
125             Node section = bible_list.item(i);
126             if (section.getNodeName().equals("section"))
127             {
128                 // Loop through the <section> node
129                 NodeList section_list = section.getChildNodes();
130                 for (int j=0; j<section_list.getLength(); j++)
131                 {
132                     // If we have a <ref> node
133                     Node ref = section_list.item(j);
134                     if (ref.getNodeName().equals("ref"))
135                     {
136                         // Loop through the <ref> attributes
137                         NamedNodeMap attr = ref.getAttributes();
138                         if (attr != null)
139                         {
140                             for (int k=0; k<attr.getLength(); k++)
141                             {
142                                 // If we have a para attribute
143                                 Node para = attr.item(k);
144                                 if (para.getNodeName().equals("para"))
145                                 {
146                                     if (para.getNodeValue().equals("true"))
147                                     {
148                                         return true;
149                                     }
150                                     else
151                                     {
152                                         return false;
153                                     }
154                                 }
155                             }
156                         }
157                     }
158                 }
159             }
160         }
161 
162         return false;
163     }
164 
165     /**
166     * Take a string and tokenize it using " " and "--" as delimiters
167     * into an Array of Strings. There is a question mark over what to do
168     * with initial spaces. This algorithm disgards them, I'm not sure if
169     * this is the right thing to do.
170     * @param command The string to parse.
171     * @return The string array
172     */
173     public static String[] tokenize(String sentance)
174     {
175         Vector tokens = new Vector();
176 
177         int pos = 0;
178         String temp;
179         boolean alive = true;
180 
181         while (alive)
182         {
183             // Find the next space and double dash
184             int next_space = sentance.indexOf(" ", pos);
185             int next_ddash = sentance.indexOf("--", pos);
186 
187             // If there is a space just after the ddash then ignore the ddash
188             if (next_space == next_ddash + 2)
189             {
190                 next_ddash = -1;
191             }
192 
193             // If there is a ddash just after the space then ignore the space
194             if (next_ddash == next_space + 1)
195             {
196                 next_space = -1;
197             }
198 
199             // if there are no more tokens then just add in what we've got.
200             if (next_space == -1 && next_ddash == -1)
201             {
202                 temp = sentance.substring(pos);
203                 alive = false;
204             }
205             // Space is next if it is not -1 and it is less than ddash
206             else if ((next_space != -1 && next_space < next_ddash) ||
207                     (next_ddash == -1))
208             {
209                 // The next separator is a space
210                 temp = sentance.substring(pos, next_space) + " ";
211                 pos = next_space + 1;
212             }
213             else
214             {
215                 // The next separator is a ddash
216                 temp = sentance.substring(pos, next_ddash) + "--";
217                 pos = next_ddash + 2;
218             }
219 
220             if (temp != null && !temp.trim().equals(""))
221                 tokens.addElement(temp);
222         }
223 
224         // Create a String[]
225         String[] retcode = new String[tokens.size()];
226         int i = 0;
227         for (Enumeration en = tokens.elements(); en.hasMoreElements();)
228         {
229             retcode[i++] = (String) en.nextElement();
230         }
231 
232         return retcode;
233     }
234 
235     /**
236     * From a sentance get a list of words (in original order) without
237     * any punctuation, and all in lower case.
238     */
239     public static String[] getWords(String sentance)
240     {
241         String words[] = tokenize(sentance);
242         String[] retcode = new String[words.length];
243 
244         // Remove the punctuation from the ends of the words.
245         for (int i=0; i<words.length; i++)
246         {
247             retcode[i] = stripPunctuationWord(words[i]).toLowerCase();
248         }
249 
250         return retcode;
251     }
252 
253     /**
254     * From a sentance get a list of words (in original order) without
255     * any punctuation, and all in lower case.
256     */
257     public static String[] stripPunctuation(String[] words)
258     {
259         String[] retcode = new String[words.length];
260 
261         // Remove the punctuation from the ends of the words.
262         for (int i=0; i<words.length; i++)
263         {
264             retcode[i] = stripPunctuationWord(words[i]);
265         }
266 
267         return retcode;
268     }
269 
270     /**
271     * Remove the punctuation from the ends of the word
272     */
273     protected static String stripPunctuationWord(String word)
274     {
275         int first = firstLetter(word);
276         int last = lastLetter(word)+1;
277 
278         if (first > last) return word;
279 
280         return word.substring(first, last);
281     }
282 
283     /**
284     * From a sentance get a list of words (in original order) without
285     * any punctuation, and all in lower case.
286     */
287     public static String[] stripWords(String[] words)
288     {
289         if (words.length == 0)
290             return new String[0];
291 
292         String[] retcode = new String[words.length+1];
293 
294         // The first bit of punctuation is what comes in front of the first word
295         int first = firstLetter(words[0]);
296         if (first == 0) retcode[0] = "";
297         else            retcode[0] = words[0].substring(0, first);
298 
299         // The rest of the words
300         for (int i=1; i<words.length; i++)
301         {
302             retcode[i] = stripWords(words[i-1], words[i]);
303         }
304 
305         // The last bit of punctuation is what comes at the end of the last word
306         int last = lastLetter(words[words.length-1]);
307         if (last == words[words.length-1].length())
308             retcode[words.length] = "";
309         else
310             retcode[words.length] = words[words.length-1].substring(last+1);
311 
312         return retcode;
313     }
314 
315     /**
316     * Remove the punctuation from the ends of the word. The special
317     * case is that if the first word ends "--" and the last word has
318     * no punctuation at the beginning, then the answer is "--" and not
319     * "-- ". We miss out the space because "--" is a special separator.
320     * @param first The word to grab the punctuation from the end of
321     * @param last The word to grab the punctuation from the start of
322     * @return The end of the first, a space, and the end of the first
323     */
324     protected static String stripWords(String first, String last)
325     {
326         String init1 = first.substring(lastLetter(first)+1);
327         String init2 = last.substring(0, firstLetter(last));
328 
329         return init1 + init2;
330     }
331     
332     /**
333     * From a sentance get a list of words (in original order) without
334     * any punctuation, and all in lower case.
335     */
336     public static int[] getCases(String[] words)
337     {
338         int[] retcode = new int[words.length];
339 
340         // Remove the punctuation from the ends of the words.
341         for (int i=0; i<words.length; i++)
342         {
343             retcode[i] = PassageUtil.getCase(words[i]);
344         }
345 
346         return retcode;
347     }
348 
349     /**
350     * Where is the first letter in this word
351     * @param word The word to search for letters
352     * @return The offset of the first letter
353     */
354     protected static final int firstLetter(String word)
355     {
356         int first;
357 
358         for (first=0; first<word.length(); first++)
359         {
360             char c = word.charAt(first);
361             if (Character.isLetterOrDigit(c))
362                 break;
363         }
364 
365         return first;
366     }
367     
368     /**
369     * Where is the last letter in this word
370     * @param word The word to search for letters
371     * @return The offset of the last letter
372     */
373     protected static final int lastLetter(String word)
374     {
375         int last;
376 
377         for (last=word.length()-1; last>=0; last--)
378         {
379             char c = word.charAt(last);
380             if (Character.isLetterOrDigit(c))
381                 break;
382         }
383 
384         return last;
385     }
386 }