Source code: com/eireneh/bible/book/BookUtil.java
1
2 package com.eireneh.bible.book;
3
4 import java.io.*;
5 import java.util.*;
6 import java.net.*;
7
8 import org.w3c.dom.*;
9
10 import com.eireneh.bible.passage.*;
11 import com.eireneh.bible.util.*;
12 import com.eireneh.util.StringUtil;
13
14 /**
15 * The BookUtil class provide utility functions for the various Books.
16 *
17 * <table border='1' cellPadding='3' cellSpacing='0' width="100%">
18 * <tr><td bgColor='white'class='TableRowColor'><font size='-7'>
19 * Distribution Licence:<br />
20 * Project B is free software; you can redistribute it
21 * and/or modify it under the terms of the GNU General Public License,
22 * version 2 as published by the Free Software Foundation.<br />
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * General Public License for more details.<br />
27 * The License is available on the internet
28 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, by writing to
29 * <i>Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
30 * MA 02111-1307, USA</i>, Or locally at the Licence link below.<br />
31 * The copyright to this program is held by it's authors.
32 * </font></td></tr></table>
33 * @see <a href='http://www.eireneh.com/servlets/Web'>Project B Home</a>
34 * @see docs.Licence
35 * @author Joe Walker
36 * @version D2.I0.T0
37 */
38 public class BookUtil
39 {
40 /**
41 * Ensure we can not be instansiated
42 */
43 private BookUtil()
44 {
45 }
46
47 /**
48 * A basic version of getPassageTally(String[]) simply calls
49 * getPassage(String) in a loop for each word, adding the Verses
50 * to an PassageTally that is returned
51 * @param version The version to search using
52 * @param tally The PassageTally to update
53 * @param words The words to search for
54 * @return The tallied Passage
55 */
56 public static void updatePassageTally(Bible version, PassageTally tally, String[] words) throws BookException
57 {
58 for (int i=0; i<words.length; i++)
59 {
60 tally.addAll(version.findPassage(words[i]));
61 }
62 }
63
64 /**
65 * This is similar to updatePassageTally() however if a verse matches
66 * many words it still only adds on for that verse in the given tally
67 * @param version The version to search using
68 * @param tally The PassageTally to update
69 * @param words The words to search for
70 * @return The tallied Passage
71 */
72 public static void updatePassageTallyFlat(Bible version, PassageTally tally, String[] words) throws BookException
73 {
74 PassageTally temp = new PassageTally();
75
76 for (int i=0; i<words.length; i++)
77 {
78 temp.addAll(version.findPassage(words[i]));
79 }
80
81 temp.flatten();
82 tally.addAll(temp);
83 }
84
85 /**
86 * A basic version of getPassage(String[]) simply calls
87 * getPassage(String) in a loop for each word, adding the Verses
88 * to an Passage that is returned
89 * @param version The version to search using
90 * @param words The words to search for
91 * @return The Passage
92 */
93 public static Passage getPassage(Bible version, String[] words) throws BookException
94 {
95 Passage ref = PassageFactory.createPassage();
96
97 for (int i=0; i<words.length; i++)
98 {
99 ref.addAll(version.findPassage(words[i]));
100 }
101
102 return ref;
103 }
104
105 /**
106 * This is a helper method to detect an attribute of para=true in the
107 * first ref node in the document. Now this code looks very badly
108 * written - it is very deeply nested. 8 levels of indentation is
109 * enough to give any code analysis tool a fit. However I think it is
110 * such simple code, and the alternative does not actually make it any
111 * simpler?...
112 * @param doc The document to search
113 * @return True if this ref contains a new paragraph
114 */
115 public static boolean isNewPara(BibleEle doc)
116 {
117 // This should be the <bible> node
118 Node bible = doc.getDocument().getDocumentElement();
119
120 // Loop through the <bible> node
121 NodeList bible_list = bible.getChildNodes();
122 for (int i=0; i<bible_list.getLength(); i++)
123 {
124 // If we have found a <section> node
125 Node section = bible_list.item(i);
126 if (section.getNodeName().equals("section"))
127 {
128 // Loop through the <section> node
129 NodeList section_list = section.getChildNodes();
130 for (int j=0; j<section_list.getLength(); j++)
131 {
132 // If we have a <ref> node
133 Node ref = section_list.item(j);
134 if (ref.getNodeName().equals("ref"))
135 {
136 // Loop through the <ref> attributes
137 NamedNodeMap attr = ref.getAttributes();
138 if (attr != null)
139 {
140 for (int k=0; k<attr.getLength(); k++)
141 {
142 // If we have a para attribute
143 Node para = attr.item(k);
144 if (para.getNodeName().equals("para"))
145 {
146 if (para.getNodeValue().equals("true"))
147 {
148 return true;
149 }
150 else
151 {
152 return false;
153 }
154 }
155 }
156 }
157 }
158 }
159 }
160 }
161
162 return false;
163 }
164
165 /**
166 * Take a string and tokenize it using " " and "--" as delimiters
167 * into an Array of Strings. There is a question mark over what to do
168 * with initial spaces. This algorithm disgards them, I'm not sure if
169 * this is the right thing to do.
170 * @param command The string to parse.
171 * @return The string array
172 */
173 public static String[] tokenize(String sentance)
174 {
175 Vector tokens = new Vector();
176
177 int pos = 0;
178 String temp;
179 boolean alive = true;
180
181 while (alive)
182 {
183 // Find the next space and double dash
184 int next_space = sentance.indexOf(" ", pos);
185 int next_ddash = sentance.indexOf("--", pos);
186
187 // If there is a space just after the ddash then ignore the ddash
188 if (next_space == next_ddash + 2)
189 {
190 next_ddash = -1;
191 }
192
193 // If there is a ddash just after the space then ignore the space
194 if (next_ddash == next_space + 1)
195 {
196 next_space = -1;
197 }
198
199 // if there are no more tokens then just add in what we've got.
200 if (next_space == -1 && next_ddash == -1)
201 {
202 temp = sentance.substring(pos);
203 alive = false;
204 }
205 // Space is next if it is not -1 and it is less than ddash
206 else if ((next_space != -1 && next_space < next_ddash) ||
207 (next_ddash == -1))
208 {
209 // The next separator is a space
210 temp = sentance.substring(pos, next_space) + " ";
211 pos = next_space + 1;
212 }
213 else
214 {
215 // The next separator is a ddash
216 temp = sentance.substring(pos, next_ddash) + "--";
217 pos = next_ddash + 2;
218 }
219
220 if (temp != null && !temp.trim().equals(""))
221 tokens.addElement(temp);
222 }
223
224 // Create a String[]
225 String[] retcode = new String[tokens.size()];
226 int i = 0;
227 for (Enumeration en = tokens.elements(); en.hasMoreElements();)
228 {
229 retcode[i++] = (String) en.nextElement();
230 }
231
232 return retcode;
233 }
234
235 /**
236 * From a sentance get a list of words (in original order) without
237 * any punctuation, and all in lower case.
238 */
239 public static String[] getWords(String sentance)
240 {
241 String words[] = tokenize(sentance);
242 String[] retcode = new String[words.length];
243
244 // Remove the punctuation from the ends of the words.
245 for (int i=0; i<words.length; i++)
246 {
247 retcode[i] = stripPunctuationWord(words[i]).toLowerCase();
248 }
249
250 return retcode;
251 }
252
253 /**
254 * From a sentance get a list of words (in original order) without
255 * any punctuation, and all in lower case.
256 */
257 public static String[] stripPunctuation(String[] words)
258 {
259 String[] retcode = new String[words.length];
260
261 // Remove the punctuation from the ends of the words.
262 for (int i=0; i<words.length; i++)
263 {
264 retcode[i] = stripPunctuationWord(words[i]);
265 }
266
267 return retcode;
268 }
269
270 /**
271 * Remove the punctuation from the ends of the word
272 */
273 protected static String stripPunctuationWord(String word)
274 {
275 int first = firstLetter(word);
276 int last = lastLetter(word)+1;
277
278 if (first > last) return word;
279
280 return word.substring(first, last);
281 }
282
283 /**
284 * From a sentance get a list of words (in original order) without
285 * any punctuation, and all in lower case.
286 */
287 public static String[] stripWords(String[] words)
288 {
289 if (words.length == 0)
290 return new String[0];
291
292 String[] retcode = new String[words.length+1];
293
294 // The first bit of punctuation is what comes in front of the first word
295 int first = firstLetter(words[0]);
296 if (first == 0) retcode[0] = "";
297 else retcode[0] = words[0].substring(0, first);
298
299 // The rest of the words
300 for (int i=1; i<words.length; i++)
301 {
302 retcode[i] = stripWords(words[i-1], words[i]);
303 }
304
305 // The last bit of punctuation is what comes at the end of the last word
306 int last = lastLetter(words[words.length-1]);
307 if (last == words[words.length-1].length())
308 retcode[words.length] = "";
309 else
310 retcode[words.length] = words[words.length-1].substring(last+1);
311
312 return retcode;
313 }
314
315 /**
316 * Remove the punctuation from the ends of the word. The special
317 * case is that if the first word ends "--" and the last word has
318 * no punctuation at the beginning, then the answer is "--" and not
319 * "-- ". We miss out the space because "--" is a special separator.
320 * @param first The word to grab the punctuation from the end of
321 * @param last The word to grab the punctuation from the start of
322 * @return The end of the first, a space, and the end of the first
323 */
324 protected static String stripWords(String first, String last)
325 {
326 String init1 = first.substring(lastLetter(first)+1);
327 String init2 = last.substring(0, firstLetter(last));
328
329 return init1 + init2;
330 }
331
332 /**
333 * From a sentance get a list of words (in original order) without
334 * any punctuation, and all in lower case.
335 */
336 public static int[] getCases(String[] words)
337 {
338 int[] retcode = new int[words.length];
339
340 // Remove the punctuation from the ends of the words.
341 for (int i=0; i<words.length; i++)
342 {
343 retcode[i] = PassageUtil.getCase(words[i]);
344 }
345
346 return retcode;
347 }
348
349 /**
350 * Where is the first letter in this word
351 * @param word The word to search for letters
352 * @return The offset of the first letter
353 */
354 protected static final int firstLetter(String word)
355 {
356 int first;
357
358 for (first=0; first<word.length(); first++)
359 {
360 char c = word.charAt(first);
361 if (Character.isLetterOrDigit(c))
362 break;
363 }
364
365 return first;
366 }
367
368 /**
369 * Where is the last letter in this word
370 * @param word The word to search for letters
371 * @return The offset of the last letter
372 */
373 protected static final int lastLetter(String word)
374 {
375 int last;
376
377 for (last=word.length()-1; last>=0; last--)
378 {
379 char c = word.charAt(last);
380 if (Character.isLetterOrDigit(c))
381 break;
382 }
383
384 return last;
385 }
386 }