Source code: org/mortbay/util/LineInput.java
1 // ===========================================================================
2 // Copyright (c) 1996 Mort Bay Consulting Pty. Ltd. All rights reserved.
3 // $Id: LineInput.java,v 1.11 2003/09/18 13:29:27 gregwilkins Exp $
4 // ---------------------------------------------------------------------------
5
6 package org.mortbay.util;
7
8 import java.io.ByteArrayInputStream;
9 import java.io.FilterInputStream;
10 import java.io.IOException;
11 import java.io.InputStream;
12 import java.io.InputStreamReader;
13 import java.io.UnsupportedEncodingException;
14
15 import org.apache.commons.logging.Log;
16 import org.apache.commons.logging.LogFactory;
17
18
19 /* ------------------------------------------------------------ */
20 /** Fast LineInput InputStream.
21 * This buffered InputStream provides methods for reading lines
22 * of bytes. The lines can be converted to String or character
23 * arrays either using the default encoding or a user supplied
24 * encoding.
25 *
26 * Buffering and data copying are highly optimized, making this
27 * an ideal class for protocols that mix character encoding lines
28 * with arbitrary byte data (eg HTTP).
29 *
30 * The buffer size is also the maximum line length in bytes and/or
31 * characters. If the byte length of a line is less than the max,
32 * but the character length is greater, than then trailing characters
33 * are lost.
34 *
35 * Line termination is forgiving and accepts CR, LF, CRLF or EOF.
36 * Line input uses the mark/reset mechanism, so any marks set
37 * prior to a readLine call are lost.
38 *
39 * @version $Id: LineInput.java,v 1.11 2003/09/18 13:29:27 gregwilkins Exp $
40 * @author Greg Wilkins (gregw)
41 */
42 public class LineInput extends FilterInputStream
43 {
44 private static Log log = LogFactory.getLog(LineInput.class);
45
46 /* ------------------------------------------------------------ */
47 private byte _buf[];
48 private ByteBuffer _byteBuffer;
49 private InputStreamReader _reader;
50 private int _mark=-1; // reset marker
51 private int _pos; // Start marker
52 private int _avail; // Available back marker, may be byte limited
53 private int _contents; // Absolute back marker of buffer
54 private int _byteLimit=-1;
55 private boolean _newByteLimit;
56 private LineBuffer _lineBuffer;
57 private String _encoding;
58 private boolean _eof=false;
59 private boolean _lastCr=false;
60 private boolean _seenCrLf=false;
61
62 private final static int LF=10;
63 private final static int CR=13;
64
65
66 /* ------------------------------------------------------------ */
67 /** Constructor.
68 * Default buffer and maximum line size is 2048.
69 * @param in The underlying input stream.
70 */
71 public LineInput(InputStream in)
72 {
73 this(in,0);
74 }
75
76 /* ------------------------------------------------------------ */
77 /** Constructor.
78 * @param in The underlying input stream.
79 * @param bufferSize The buffer size and maximum line length.
80 */
81 public LineInput(InputStream in, int bufferSize)
82 {
83 super(in);
84 _mark=-1;
85 if (bufferSize==0)
86 bufferSize=8192;
87 _buf=ByteArrayPool.getByteArray(bufferSize);
88 _byteBuffer=new ByteBuffer(_buf);
89 _lineBuffer=new LineBuffer(bufferSize);
90 _reader=new InputStreamReader(_byteBuffer);
91 }
92
93 /* ------------------------------------------------------------ */
94 /** Constructor.
95 * @param in The underlying input stream.
96 * @param bufferSize The buffer size and maximum line length.
97 * @param encoding the character encoding to use for readLine methods.
98 * @exception UnsupportedEncodingException
99 */
100 public LineInput(InputStream in, int bufferSize, String encoding)
101 throws UnsupportedEncodingException
102 {
103 super(in);
104 _mark=-1;
105 if (bufferSize==0)
106 bufferSize=2048;
107 _buf=ByteArrayPool.getByteArray(bufferSize);
108 _byteBuffer=new ByteBuffer(_buf);
109 _lineBuffer=new LineBuffer(bufferSize);
110 _reader=new InputStreamReader(_byteBuffer,encoding);
111 _encoding=encoding;
112 }
113
114 /* ------------------------------------------------------------ */
115 public InputStream getInputStream()
116 {
117 return in;
118 }
119
120 /* ------------------------------------------------------------ */
121 /** Set the byte limit.
122 * If set, only this number of bytes are read before EOF.
123 * @param bytes Limit number of bytes, or -1 for no limit.
124 */
125 public void setByteLimit(int bytes)
126 {
127 _byteLimit=bytes;
128
129 if (bytes>=0)
130 {
131 _newByteLimit=true;
132 _byteLimit-=_contents-_pos;
133 if (_byteLimit<0)
134 {
135 _avail+=_byteLimit;
136 _byteLimit=0;
137 }
138 }
139 else
140 {
141 _newByteLimit=false;
142 _avail=_contents;
143 _eof=false;
144 }
145 }
146
147
148 /* ------------------------------------------------------------ */
149 /** Get the byte limit.
150 * @return Number of bytes until EOF is returned or -1 for no limit.
151 */
152 public int getByteLimit()
153 {
154 if (_byteLimit<0)
155 return _byteLimit;
156
157 return _byteLimit+_avail-_pos;
158 }
159
160 /* ------------------------------------------------------------ */
161 /** Read a line ended by CR, LF or CRLF.
162 * The default or supplied encoding is used to convert bytes to
163 * characters.
164 * @return The line as a String or null for EOF.
165 * @exception IOException
166 */
167 public synchronized String readLine()
168 throws IOException
169 {
170 int len=fillLine(_buf.length);
171
172 if (len<0)
173 return null;
174
175 String s=null;
176 if (_encoding==null)
177 s=new String(_buf,_mark,len);
178 else
179 {
180 try
181 {
182 s=new String(_buf,_mark,len,_encoding);
183 }
184 catch(UnsupportedEncodingException e)
185 {
186 log.warn(LogSupport.EXCEPTION,e);
187 }
188 }
189 _mark=-1;
190
191 return s;
192 }
193
194 /* ------------------------------------------------------------ */
195 /** Read a line ended by CR, LF or CRLF.
196 * The default or supplied encoding is used to convert bytes to
197 * characters.
198 * @param c Character buffer to place the line into.
199 * @param off Offset into the buffer.
200 * @param len Maximum length of line.
201 * @return The length of the line or -1 for EOF.
202 * @exception IOException
203 */
204 public int readLine(char[] c,int off,int len)
205 throws IOException
206 {
207 int blen=fillLine(len);
208
209 if (blen<0)
210 return -1;
211 if (blen==0)
212 return 0;
213
214 _byteBuffer.setStream(_mark,blen);
215 len=_reader.read(c,off,len);
216 _mark=-1;
217
218 return len;
219 }
220
221 /* ------------------------------------------------------------ */
222 /** Read a line ended by CR, LF or CRLF.
223 * @param b Byte array to place the line into.
224 * @param off Offset into the buffer.
225 * @param len Maximum length of line.
226 * @return The length of the line or -1 for EOF.
227 * @exception IOException
228 */
229 public int readLine(byte[] b,int off,int len)
230 throws IOException
231 {
232 len=fillLine(len);
233
234 if (len<0)
235 return -1;
236 if (len==0)
237 return 0;
238
239 System.arraycopy(_buf,_mark, b, off, len);
240 _mark=-1;
241
242 return len;
243 }
244
245
246 /* ------------------------------------------------------------ */
247 /** Read a Line ended by CR, LF or CRLF.
248 * Read a line into a shared LineBuffer instance. The LineBuffer is
249 * resused between calls and should not be held by the caller.
250 * The default or supplied encoding is used to convert bytes to
251 * characters.
252 * @return LineBuffer instance or null for EOF.
253 * @exception IOException
254 */
255 public LineBuffer readLineBuffer()
256 throws IOException
257 {
258 return readLineBuffer(_buf.length);
259 }
260
261 /* ------------------------------------------------------------ */
262 /** Read a Line ended by CR, LF or CRLF.
263 * Read a line into a shared LineBuffer instance. The LineBuffer is
264 * resused between calls and should not be held by the caller.
265 * The default or supplied encoding is used to convert bytes to
266 * characters.
267 * @param len Maximum length of a line, or 0 for default
268 * @return LineBuffer instance or null for EOF.
269 * @exception IOException
270 */
271 public LineBuffer readLineBuffer(int len)
272 throws IOException
273 {
274 len=fillLine(len>0?len:_buf.length);
275
276 if (len<0)
277 return null;
278
279 if (len==0)
280 {
281 _lineBuffer.size=0;
282 return _lineBuffer;
283 }
284
285 _byteBuffer.setStream(_mark,len);
286 _lineBuffer.size=
287 _reader.read(_lineBuffer.buffer,0,_lineBuffer.buffer.length);
288 _mark=-1;
289
290 return _lineBuffer;
291 }
292
293 /* ------------------------------------------------------------ */
294 public synchronized int read() throws IOException
295 {
296 int b;
297 if (_pos >=_avail)
298 fill();
299 if (_pos >=_avail)
300 b=-1;
301 else
302 b=_buf[_pos++]&255;
303
304 return b;
305 }
306
307
308 /* ------------------------------------------------------------ */
309 public synchronized int read(byte b[], int off, int len) throws IOException
310 {
311 int avail=_avail-_pos;
312 if (avail <=0)
313 {
314 fill();
315 avail=_avail-_pos;
316 }
317
318 if (avail <=0)
319 len=-1;
320 else
321 {
322 len=(avail < len) ? avail : len;
323 System.arraycopy(_buf,_pos,b,off,len);
324 _pos +=len;
325 }
326
327 return len;
328 }
329
330 /* ------------------------------------------------------------ */
331 public long skip(long n) throws IOException
332 {
333 int avail=_avail-_pos;
334 if (avail <=0)
335 {
336 fill();
337 avail=_avail-_pos;
338 }
339
340 if (avail <=0)
341 n=0;
342 else
343 {
344 n=(avail < n) ? avail : n;
345 _pos +=n;
346 }
347
348 return n;
349 }
350
351
352 /* ------------------------------------------------------------ */
353 public synchronized int available()
354 throws IOException
355 {
356 int in_stream=in.available();
357 if (_byteLimit>=0 && in_stream>_byteLimit)
358 in_stream=_byteLimit;
359
360 return _avail - _pos + in_stream;
361 }
362
363 /* ------------------------------------------------------------ */
364 public synchronized void mark(int limit)
365 throws IllegalArgumentException
366 {
367 if (limit>_buf.length)
368 {
369 byte[] new_buf=new byte[limit];
370 System.arraycopy(_buf,_pos,new_buf,_pos,_avail-_pos);
371 _buf=new_buf;
372 if (_byteBuffer!=null)
373 _byteBuffer.setBuffer(_buf);
374 }
375 _mark=_pos;
376 }
377
378 /* ------------------------------------------------------------ */
379 public synchronized void reset()
380 throws IOException
381 {
382 if (_mark < 0)
383 throw new IOException("Resetting to invalid mark");
384 if (_byteLimit>=0)
385 _byteLimit+=_pos-_mark;
386 _pos=_mark;
387 _mark=-1;
388 }
389
390 /* ------------------------------------------------------------ */
391 public boolean markSupported()
392 {
393 return true;
394 }
395
396 /* ------------------------------------------------------------ */
397 private void fill()
398 throws IOException
399 {
400 // if the mark is in the middle of the buffer
401 if (_mark > 0)
402 {
403 // moved saved bytes to start of buffer
404 int saved=_contents - _mark;
405 System.arraycopy(_buf, _mark, _buf, 0, saved);
406 _pos-=_mark;
407 _avail-=_mark;
408 _contents=saved;
409 _mark=0;
410 }
411 else if (_mark<0 && _pos>0)
412 {
413 // move remaining bytes to start of buffer
414 int saved=_contents-_pos;
415 System.arraycopy(_buf,_pos, _buf, 0, saved);
416 _avail-=_pos;
417 _contents=saved;
418 _pos=0;
419 }
420 else if (_mark==0 && _pos>0 && _contents==_buf.length)
421 {
422 // Discard the mark as we need the space.
423 _mark=-1;
424 fill();
425 return;
426 }
427
428 // Get ready to top up the buffer
429 int n=0;
430 _eof=false;
431
432 // Handle byte limited EOF
433 if (_byteLimit==0)
434 _eof=true;
435 // else loop until something is read.
436 else while (!_eof && n==0 && _buf.length>_contents)
437 {
438 // try to read as much as will fit.
439 int space=_buf.length-_contents;
440
441 n=in.read(_buf,_contents,space);
442
443 if (n<=0)
444 {
445 // If no bytes - we could be NBIO, so we want to avoid
446 // a busy loop.
447 if (n==0)
448 {
449 // Yield to give a chance for some bytes to turn up
450 Thread.yield();
451
452 // Do a byte read as that is blocking
453 int b = in.read();
454 if (b>=0)
455 {
456 n=1;
457 _buf[_contents++]=(byte)b;
458 }
459 else
460 _eof=true;
461 }
462 else
463 _eof=true;
464 }
465 else
466 _contents+=n;
467 _avail=_contents;
468
469 // If we have a byte limit
470 if (_byteLimit>0)
471 {
472 // adjust the bytes available
473 if (_contents-_pos >=_byteLimit)
474 _avail=_byteLimit+_pos;
475
476 if (n>_byteLimit)
477 _byteLimit=0;
478 else if (n>=0)
479 _byteLimit-=n;
480 else if (n==-1)
481 throw new IOException("Premature EOF");
482 }
483 }
484
485 // If we have some characters and the last read was a CR and
486 // the first char is a LF, skip it
487 if (_avail-_pos>0 && _lastCr && _buf[_pos]==LF)
488 {
489 _seenCrLf=true;
490 _pos++;
491 if (_mark>=0)
492 _mark++;
493 _lastCr=false;
494
495 // If the byte limit has just been imposed, dont count
496 // LF as content.
497 if(_byteLimit>=0 && _newByteLimit)
498 {
499 if (_avail<_contents)
500 _avail++;
501 else
502 _byteLimit++;
503 }
504 // If we ate all that ws filled, fill some more
505 if (_pos==_avail)
506 fill();
507 }
508 _newByteLimit=false;
509 }
510
511
512 /* ------------------------------------------------------------ */
513 private int fillLine(int maxLen)
514 throws IOException
515 {
516 _mark=_pos;
517
518 if (_pos>=_avail)
519 fill();
520 if (_pos>=_avail)
521 return -1;
522
523 byte b;
524 boolean cr=_lastCr;
525 boolean lf=false;
526 _lastCr=false;
527 int len=0;
528
529 LineLoop:
530 while (_pos<=_avail)
531 {
532 // if we have gone past the end of the buffer
533 while (_pos==_avail)
534 {
535 // If EOF or no more space in the buffer,
536 // return a line.
537 if (_eof || (_mark==0 && _contents==_buf.length))
538 {
539 _lastCr=!_eof && _buf[_avail-1]==CR;
540
541 cr=true;
542 lf=true;
543 break LineLoop;
544 }
545
546 // If we have a CR and no more characters are available
547 if (cr && in.available()==0 && !_seenCrLf)
548 {
549 _lastCr=true;
550 cr=true;
551 lf=true;
552 break LineLoop;
553 }
554 else
555 {
556 // Else just wait for more...
557 _pos=_mark;
558 fill();
559 _pos=len;
560 cr=false;
561 }
562 }
563
564 // Get the byte
565 b=_buf[_pos++];
566
567 switch(b)
568 {
569 case LF:
570 if (cr) _seenCrLf=true;
571 lf=true;
572 break LineLoop;
573
574 case CR:
575 if (cr)
576 {
577 // Double CR
578 if (_pos>1)
579 {
580 _pos--;
581 break LineLoop;
582 }
583 }
584 cr=true;
585 break;
586
587 default:
588 if(cr)
589 {
590 if (_pos==1)
591 cr=false;
592 else
593 {
594 _pos--;
595 break LineLoop;
596 }
597 }
598
599 len++;
600 if (len==maxLen)
601 {
602 // look for EOL
603 if (_mark!=0 && _pos+2>=_avail && _avail<_buf.length)
604 fill();
605
606 if (_pos<_avail && _buf[_pos]==CR)
607 {
608 cr=true;
609 _pos++;
610 }
611 if (_pos<_avail && _buf[_pos]==LF)
612 {
613 lf=true;
614 _pos++;
615 }
616
617 if (!cr && !lf)
618 {
619 // fake EOL
620 lf=true;
621 cr=true;
622 }
623 break LineLoop;
624 }
625
626 break;
627 }
628 }
629
630 if (!cr && !lf && len==0)
631 len=-1;
632
633 return len;
634 }
635
636 /* ------------------------------------------------------------ */
637 private static class ByteBuffer extends ByteArrayInputStream
638 {
639 ByteBuffer(byte[] buffer)
640 {
641 super(buffer);
642 }
643
644 void setBuffer(byte[] buffer)
645 {
646 buf=buffer;
647 }
648
649 void setStream(int offset,int length)
650 {
651 pos=offset;
652 count=offset+length;
653 mark=-1;
654 }
655 }
656
657 /* ------------------------------------------------------------ */
658 /** Reusable LineBuffer.
659 * Externalized LineBuffer for fast line parsing.
660 */
661 public static class LineBuffer
662 {
663 public char[] buffer;
664 public int size;
665 public LineBuffer(int maxLineLength)
666 {buffer=new char[maxLineLength];}
667
668 public String toString(){return new String(buffer,0,size);}
669 }
670
671 /* ------------------------------------------------------------ */
672 public void destroy()
673 {
674 ByteArrayPool.returnByteArray(_buf);
675 _byteBuffer=null;
676 _reader=null;
677 _lineBuffer=null;
678 _encoding=null;
679 }
680 }
681