1 /* 2 * $HeadURL: https://svn.apache.org/repos/asf/jakarta/httpcomponents/httpcore/tags/4.0-alpha2/src/java/org/apache/http/io/ChunkedInputStream.java $ 3 * $Revision: 411090 $ 4 * $Date: 2006-06-02 10:39:44 +0200 (Fri, 02 Jun 2006) $ 5 * 6 * ==================================================================== 7 * 8 * Copyright 2002-2004 The Apache Software Foundation 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 * ==================================================================== 22 * 23 * This software consists of voluntary contributions made by many 24 * individuals on behalf of the Apache Software Foundation. For more 25 * information on the Apache Software Foundation, please see 26 * <http://www.apache.org/>. 27 * 28 */ 29 30 package org.apache.http.io; 31 32 import java.io.IOException; 33 import java.io.InputStream; 34 35 import org.apache.http.Header; 36 import org.apache.http.HttpException; 37 import org.apache.http.protocol.HTTP; 38 import org.apache.http.util.ExceptionUtils; 39 import org.apache.http.util.HeaderUtils; 40 41 /** 42 * <p>This class implements chunked transfer coding as described in the 43 * <a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6">Section 3.6.1</a> 44 * of <a href="http://www.w3.org/Protocols/rfc2616/rfc2616.txt">RFC 2616</a>. 45 * It transparently coalesces chunks of a HTTP stream that uses chunked transfer coding.</p> 46 * 47 * <h>3.6.1 Chunked Transfer Coding</h> 48 * <p> 49 * The chunked encoding modifies the body of a message in order to transfer it as a series 50 * of chunks, each with its own size indicator, followed by an OPTIONAL trailer containing 51 * entity-header fields. This allows dynamically produced content to be transferred along 52 * with the information necessary for the recipient to verify that it has received the full 53 * message. 54 * </p> 55 * <pre> 56 * Chunked-Body = *chunk 57 * last-chunk 58 * trailer 59 * CRLF 60 * 61 * chunk = chunk-size [ chunk-extension ] CRLF 62 * chunk-data CRLF 63 * chunk-size = 1*HEX 64 * last-chunk = 1*("0") [ chunk-extension ] CRLF 65 * 66 * chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) 67 * chunk-ext-name = token 68 * chunk-ext-val = token | quoted-string 69 * chunk-data = chunk-size(OCTET) 70 * trailer = *(entity-header CRLF) 71 * </pre> 72 * <p> 73 * The chunk-size field is a string of hex digits indicating the size of the chunk. The 74 * chunked encoding is ended by any chunk whose size is zero, followed by the trailer, 75 * which is terminated by an empty line. 76 * </p> 77 * <p> 78 * The trailer allows the sender to include additional HTTP header fields at the end 79 * of the message. The Trailer header field can be used to indicate which header fields 80 * are included in a trailer (see section 14.40). 81 * </p> 82 * <p> 83 * A server using chunked transfer-coding in a response MUST NOT use the trailer for any 84 * header fields unless at least one of the following is true: 85 * </p> 86 * <p> 87 * a)the request included a TE header field that indicates "trailers" is acceptable in 88 * the transfer-coding of the response, as described in section 14.39; or, 89 * </p> 90 * <p> 91 * b)the server is the origin server for the response, the trailer fields consist entirely 92 * of optional metadata, and the recipient could use the message (in a manner acceptable 93 * to the origin server) without receiving this metadata. In other words, the origin server 94 * is willing to accept the possibility that the trailer fields might be silently discarded 95 * along the path to the client. 96 * </p> 97 * <p> 98 * This requirement prevents an interoperability failure when the message is being received 99 * by an HTTP/1.1 (or later) proxy and forwarded to an HTTP/1.0 recipient. It avoids a 100 * situation where compliance with the protocol would have necessitated a possibly infinite 101 * buffer on the proxy. 102 * </p> 103 * <p> 104 * Note that this class NEVER closes the underlying stream, even when close 105 * gets called. Instead, it will read until the "end" of its chunking on close, 106 * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while 107 * not requiring the client to remember to read the entire contents of the 108 * response. 109 * </p> 110 * 111 * @author Ortwin Glueck 112 * @author Sean C. Sullivan 113 * @author Martin Elwin 114 * @author Eric Johnson 115 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a> 116 * @author Michael Becke 117 * @author <a href="mailto:oleg at ural.ru">Oleg Kalnichevski</a> 118 * 119 * @since 2.0 120 * 121 */ 122 public class ChunkedInputStream extends InputStream { 123 124 /** The data receiver that we're wrapping */ 125 private HttpDataReceiver in; 126 127 private final CharArrayBuffer buffer; 128 129 /** The chunk size */ 130 private int chunkSize; 131 132 /** The current position within the current chunk */ 133 private int pos; 134 135 /** True if we'are at the beginning of stream */ 136 private boolean bof = true; 137 138 /** True if we've reached the end of stream */ 139 private boolean eof = false; 140 141 /** True if this stream is closed */ 142 private boolean closed = false; 143 144 private Header[] footers = new Header[] {}; 145 146 public ChunkedInputStream(final HttpDataReceiver in) { 147 super(); 148 if (in == null) { 149 throw new IllegalArgumentException("InputStream parameter may not be null"); 150 } 151 this.in = in; 152 this.pos = 0; 153 this.buffer = new CharArrayBuffer(16); 154 } 155 156 /** 157 * <p> Returns all the data in a chunked stream in coalesced form. A chunk 158 * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0 159 * is detected.</p> 160 * 161 * <p> Trailer headers are read automcatically at the end of the stream and 162 * can be obtained with the getResponseFooters() method.</p> 163 * 164 * @return -1 of the end of the stream has been reached or the next data 165 * byte 166 * @throws IOException If an IO problem occurs 167 */ 168 public int read() throws IOException { 169 if (this.closed) { 170 throw new IOException("Attempted read from closed stream."); 171 } 172 if (this.eof) { 173 return -1; 174 } 175 if (this.pos >= this.chunkSize) { 176 nextChunk(); 177 if (this.eof) { 178 return -1; 179 } 180 } 181 pos++; 182 return in.read(); 183 } 184 185 /** 186 * Read some bytes from the stream. 187 * @param b The byte array that will hold the contents from the stream. 188 * @param off The offset into the byte array at which bytes will start to be 189 * placed. 190 * @param len the maximum number of bytes that can be returned. 191 * @return The number of bytes returned or -1 if the end of stream has been 192 * reached. 193 * @see java.io.InputStream#read(byte[], int, int) 194 * @throws IOException if an IO problem occurs. 195 */ 196 public int read (byte[] b, int off, int len) throws IOException { 197 198 if (closed) { 199 throw new IOException("Attempted read from closed stream."); 200 } 201 202 if (eof) { 203 return -1; 204 } 205 if (pos >= chunkSize) { 206 nextChunk(); 207 if (eof) { 208 return -1; 209 } 210 } 211 len = Math.min(len, chunkSize - pos); 212 int count = in.read(b, off, len); 213 pos += count; 214 return count; 215 } 216 217 /** 218 * Read some bytes from the stream. 219 * @param b The byte array that will hold the contents from the stream. 220 * @return The number of bytes returned or -1 if the end of stream has been 221 * reached. 222 * @see java.io.InputStream#read(byte[]) 223 * @throws IOException if an IO problem occurs. 224 */ 225 public int read (byte[] b) throws IOException { 226 return read(b, 0, b.length); 227 } 228 229 /** 230 * Read the next chunk. 231 * @throws IOException If an IO error occurs. 232 */ 233 private void nextChunk() throws IOException { 234 chunkSize = getChunkSize(); 235 if (chunkSize < 0) { 236 throw new MalformedChunkCodingException("Negative chunk size"); 237 } 238 bof = false; 239 pos = 0; 240 if (chunkSize == 0) { 241 eof = true; 242 parseTrailerHeaders(); 243 } 244 } 245 246 /** 247 * Expects the stream to start with a chunksize in hex with optional 248 * comments after a semicolon. The line must end with a CRLF: "a3; some 249 * comment\r\n" Positions the stream at the start of the next line. 250 * 251 * @param in The new input stream. 252 * @param required <tt>true<tt/> if a valid chunk must be present, 253 * <tt>false<tt/> otherwise. 254 * 255 * @return the chunk size as integer 256 * 257 * @throws IOException when the chunk size could not be parsed 258 */ 259 private int getChunkSize() throws IOException { 260 // skip CRLF 261 if (!bof) { 262 int cr = in.read(); 263 int lf = in.read(); 264 if ((cr != HTTP.CR) || (lf != HTTP.LF)) { 265 throw new MalformedChunkCodingException( 266 "CRLF expected at end of chunk"); 267 } 268 } 269 //parse data 270 this.buffer.clear(); 271 int i = this.in.readLine(this.buffer); 272 if (i == -1) { 273 throw new MalformedChunkCodingException( 274 "Chunked stream ended unexpectedly"); 275 } 276 int separator = this.buffer.indexOf(';'); 277 if (separator < 0) { 278 separator = this.buffer.length(); 279 } 280 try { 281 return Integer.parseInt(this.buffer.substringTrimmed(0, separator), 16); 282 } catch (NumberFormatException e) { 283 throw new MalformedChunkCodingException("Bad chunk header"); 284 } 285 } 286 287 /** 288 * Reads and stores the Trailer headers. 289 * @throws IOException If an IO problem occurs 290 */ 291 private void parseTrailerHeaders() throws IOException { 292 try { 293 this.footers = HeaderUtils.parseHeaders(in); 294 } catch (HttpException e) { 295 IOException ioe = new MalformedChunkCodingException("Invalid footer: " 296 + e.getMessage()); 297 ExceptionUtils.initCause(ioe, e); 298 throw ioe; 299 } 300 } 301 302 /** 303 * Upon close, this reads the remainder of the chunked message, 304 * leaving the underlying socket at a position to start reading the 305 * next response without scanning. 306 * @throws IOException If an IO problem occurs. 307 */ 308 public void close() throws IOException { 309 if (!closed) { 310 try { 311 if (!eof) { 312 exhaustInputStream(this); 313 } 314 } finally { 315 eof = true; 316 closed = true; 317 } 318 } 319 } 320 321 public Header[] getFooters() { 322 return (Header[])this.footers.clone(); 323 } 324 325 /** 326 * Exhaust an input stream, reading until EOF has been encountered. 327 * 328 * <p>Note that this function is intended as a non-public utility. 329 * This is a little weird, but it seemed silly to make a utility 330 * class for this one function, so instead it is just static and 331 * shared that way.</p> 332 * 333 * @param inStream The {@link InputStream} to exhaust. 334 * @throws IOException If an IO problem occurs 335 */ 336 static void exhaustInputStream(final InputStream inStream) throws IOException { 337 // read and discard the remainder of the message 338 byte buffer[] = new byte[1024]; 339 while (inStream.read(buffer) >= 0) { 340 ; 341 } 342 } 343 344 }