Source code: org/apache/xerces/readers/EBCDICRecognizer.java
1 /*
2 * The Apache Software License, Version 1.1
3 *
4 *
5 * Copyright (c) 1999 The Apache Software Foundation. All rights
6 * reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Apache Software Foundation (http://www.apache.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Xerces" and "Apache Software Foundation" must
28 * not be used to endorse or promote products derived from this
29 * software without prior written permission. For written
30 * permission, please contact apache@apache.org.
31 *
32 * 5. Products derived from this software may not be called "Apache",
33 * nor may "Apache" appear in their name, without prior written
34 * permission of the Apache Software Foundation.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This software consists of voluntary contributions made by many
51 * individuals on behalf of the Apache Software Foundation and was
52 * originally based on software copyright (c) 1999, International
53 * Business Machines, Inc., http://www.apache.org. For more
54 * information on the Apache Software Foundation, please see
55 * <http://www.apache.org/>.
56 */
57
58 package org.apache.xerces.readers;
59
60 import org.apache.xerces.framework.XMLErrorReporter;
61 import org.apache.xerces.utils.ChunkyByteArray;
62 import org.apache.xerces.utils.StringPool;
63
64 import java.io.InputStreamReader;
65 import java.io.IOException;
66 import java.io.UnsupportedEncodingException;
67
68 /**
69 *
70 * @version
71 */
72 final class EBCDICRecognizer extends XMLDeclRecognizer {
73 //
74 //
75 //
76 public XMLEntityHandler.EntityReader recognize(XMLEntityReaderFactory readerFactory,
77 XMLEntityHandler entityHandler,
78 XMLErrorReporter errorReporter,
79 boolean sendCharDataAsCharArray,
80 StringPool stringPool,
81 ChunkyByteArray data,
82 boolean xmlDecl,
83 boolean allowJavaEncodingName) throws Exception
84 {
85 XMLEntityHandler.EntityReader reader = null;
86 byte b0 = data.byteAt(0);
87 byte b1 = data.byteAt(1);
88 byte b2 = data.byteAt(2);
89 byte b3 = data.byteAt(3);
90 boolean debug = false;
91
92 if (b0 != 0x4c || b1 != 0x6f || b2 != (byte)0xa7 || b3 != (byte)0x94)
93 return reader;
94 XMLEntityHandler.EntityReader declReader = readerFactory.createCharReader(entityHandler, errorReporter, sendCharDataAsCharArray, new InputStreamReader(data, "CP037"), stringPool);
95 int encoding = prescanXMLDeclOrTextDecl(declReader, xmlDecl);
96 if (encoding == -1) {
97 data.rewind();
98 // REVISIT - The document is not well-formed. There is no encoding, yet the file is
99 // clearly not UTF8.
100 throw new UnsupportedEncodingException(null);
101 }
102 String enc = stringPool.orphanString(encoding).toUpperCase();
103 if ("ISO-10646-UCS-2".equals(enc)) throw new UnsupportedEncodingException(enc);
104 if ("ISO-10646-UCS-4".equals(enc)) throw new UnsupportedEncodingException(enc);
105 if ("UTF-16".equals(enc)) throw new UnsupportedEncodingException(enc);
106 String javaencname = MIME2Java.convert(enc);
107 if (null == javaencname) {
108 if (allowJavaEncodingName) {
109 javaencname = enc;
110 } else {
111 throw new UnsupportedEncodingException(enc);
112 }
113 }
114 try {
115 data.rewind();
116 reader = readerFactory.createCharReader(entityHandler, errorReporter, sendCharDataAsCharArray, new InputStreamReader(data, javaencname), stringPool);
117 } catch (UnsupportedEncodingException e) {
118 throw e;
119 } catch (Exception e) {
120 if( debug == true )
121 e.printStackTrace(); // Internal Error
122 }
123 return reader;
124 }
125 }