1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.tomcat.util.buf;
19
20 import java.io.IOException;
21 import java.io.Serializable;
22
23 /*
24 * In a server it is very important to be able to operate on
25 * the original byte[] without converting everything to chars.
26 * Some protocols are ASCII only, and some allow different
27 * non-UNICODE encodings. The encoding is not known beforehand,
28 * and can even change during the execution of the protocol.
29 * ( for example a multipart message may have parts with different
30 * encoding )
31 *
32 * For HTTP it is not very clear how the encoding of RequestURI
33 * and mime values can be determined, but it is a great advantage
34 * to be able to parse the request without converting to string.
35 */
36
37 // TODO: This class could either extend ByteBuffer, or better a ByteBuffer inside
38 // this way it could provide the search/etc on ByteBuffer, as a helper.
39
40 /**
41 * This class is used to represent a chunk of bytes, and
42 * utilities to manipulate byte[].
43 *
44 * The buffer can be modified and used for both input and output.
45 *
46 * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel,
47 * which will be used when the buffer is empty ( on input ) or filled ( on output ).
48 * For output, it can also grow. This operating mode is selected by calling setLimit() or
49 * allocate(initial, limit) with limit != -1.
50 *
51 * Various search and append method are defined - similar with String and StringBuffer, but
52 * operating on bytes.
53 *
54 * This is important because it allows processing the http headers directly on the received bytes,
55 * without converting to chars and Strings until the strings are needed. In addition, the charset
56 * is determined later, from headers or user code.
57 *
58 *
59 * @author dac@sun.com
60 * @author James Todd [gonzo@sun.com]
61 * @author Costin Manolache
62 * @author Remy Maucherat
63 */
64 public final class ByteChunk implements Cloneable, Serializable {
65
66 /** Input interface, used when the buffer is emptiy
67 *
68 * Same as java.nio.channel.ReadableByteChannel
69 */
70 public static interface ByteInputChannel {
71 /**
72 * Read new bytes ( usually the internal conversion buffer ).
73 * The implementation is allowed to ignore the parameters,
74 * and mutate the chunk if it wishes to implement its own buffering.
75 */
76 public int realReadBytes(byte cbuf[], int off, int len)
77 throws IOException;
78 }
79
80 /** Same as java.nio.channel.WrittableByteChannel.
81 */
82 public static interface ByteOutputChannel {
83 /**
84 * Send the bytes ( usually the internal conversion buffer ).
85 * Expect 8k output if the buffer is full.
86 */
87 public void realWriteBytes(byte cbuf[], int off, int len)
88 throws IOException;
89 }
90
91 // --------------------
92
93 /** Default encoding used to convert to strings. It should be UTF8,
94 as most standards seem to converge, but the servlet API requires
95 8859_1, and this object is used mostly for servlets.
96 */
97 public static final String DEFAULT_CHARACTER_ENCODING="ISO-8859-1";
98
99 // byte[]
100 private byte[] buff;
101
102 private int start=0;
103 private int end;
104
105 private String enc;
106
107 private boolean isSet=false; // XXX
108
109 // How much can it grow, when data is added
110 private int limit=-1;
111
112 private ByteInputChannel in = null;
113 private ByteOutputChannel out = null;
114
115 private boolean isOutput=false;
116 private boolean optimizedWrite=true;
117
118 /**
119 * Creates a new, uninitialized ByteChunk object.
120 */
121 public ByteChunk() {
122 }
123
124 public ByteChunk( int initial ) {
125 allocate( initial, -1 );
126 }
127
128 //--------------------
129 public ByteChunk getClone() {
130 try {
131 return (ByteChunk)this.clone();
132 } catch( Exception ex) {
133 return null;
134 }
135 }
136
137 public boolean isNull() {
138 return ! isSet; // buff==null;
139 }
140
141 /**
142 * Resets the message buff to an uninitialized state.
143 */
144 public void recycle() {
145 // buff = null;
146 enc=null;
147 start=0;
148 end=0;
149 isSet=false;
150 }
151
152 public void reset() {
153 buff=null;
154 }
155
156 // -------------------- Setup --------------------
157
158 public void allocate( int initial, int limit ) {
159 isOutput=true;
160 if( buff==null || buff.length < initial ) {
161 buff=new byte[initial];
162 }
163 this.limit=limit;
164 start=0;
165 end=0;
166 isSet=true;
167 }
168
169 /**
170 * Sets the message bytes to the specified subarray of bytes.
171 *
172 * @param b the ascii bytes
173 * @param off the start offset of the bytes
174 * @param len the length of the bytes
175 */
176 public void setBytes(byte[] b, int off, int len) {
177 buff = b;
178 start = off;
179 end = start+ len;
180 isSet=true;
181 }
182
183 public void setOptimizedWrite(boolean optimizedWrite) {
184 this.optimizedWrite = optimizedWrite;
185 }
186
187 public void setEncoding( String enc ) {
188 this.enc=enc;
189 }
190 public String getEncoding() {
191 if (enc == null)
192 enc=DEFAULT_CHARACTER_ENCODING;
193 return enc;
194 }
195
196 /**
197 * Returns the message bytes.
198 */
199 public byte[] getBytes() {
200 return getBuffer();
201 }
202
203 /**
204 * Returns the message bytes.
205 */
206 public byte[] getBuffer() {
207 return buff;
208 }
209
210 /**
211 * Returns the start offset of the bytes.
212 * For output this is the end of the buffer.
213 */
214 public int getStart() {
215 return start;
216 }
217
218 public int getOffset() {
219 return start;
220 }
221
222 public void setOffset(int off) {
223 if (end < off ) end=off;
224 start=off;
225 }
226
227 /**
228 * Returns the length of the bytes.
229 * XXX need to clean this up
230 */
231 public int getLength() {
232 return end-start;
233 }
234
235 /** Maximum amount of data in this buffer.
236 *
237 * If -1 or not set, the buffer will grow undefinitely.
238 * Can be smaller than the current buffer size ( which will not shrink ).
239 * When the limit is reached, the buffer will be flushed ( if out is set )
240 * or throw exception.
241 */
242 public void setLimit(int limit) {
243 this.limit=limit;
244 }
245
246 public int getLimit() {
247 return limit;
248 }
249
250 /**
251 * When the buffer is empty, read the data from the input channel.
252 */
253 public void setByteInputChannel(ByteInputChannel in) {
254 this.in = in;
255 }
256
257 /** When the buffer is full, write the data to the output channel.
258 * Also used when large amount of data is appended.
259 *
260 * If not set, the buffer will grow to the limit.
261 */
262 public void setByteOutputChannel(ByteOutputChannel out) {
263 this.out=out;
264 }
265
266 public int getEnd() {
267 return end;
268 }
269
270 public void setEnd( int i ) {
271 end=i;
272 }
273
274 // -------------------- Adding data to the buffer --------------------
275 /** Append a char, by casting it to byte. This IS NOT intended for unicode.
276 *
277 * @param c
278 * @throws IOException
279 */
280 public void append( char c )
281 throws IOException
282 {
283 append( (byte)c);
284 }
285
286 public void append( byte b )
287 throws IOException
288 {
289 makeSpace( 1 );
290
291 // couldn't make space
292 if( limit >0 && end >= limit ) {
293 flushBuffer();
294 }
295 buff[end++]=b;
296 }
297
298 public void append( ByteChunk src )
299 throws IOException
300 {
301 append( src.getBytes(), src.getStart(), src.getLength());
302 }
303
304 /** Add data to the buffer
305 */
306 public void append( byte src[], int off, int len )
307 throws IOException
308 {
309 // will grow, up to limit
310 makeSpace( len );
311
312 // if we don't have limit: makeSpace can grow as it wants
313 if( limit < 0 ) {
314 // assert: makeSpace made enough space
315 System.arraycopy( src, off, buff, end, len );
316 end+=len;
317 return;
318 }
319
320 // Optimize on a common case.
321 // If the buffer is empty and the source is going to fill up all the
322 // space in buffer, may as well write it directly to the output,
323 // and avoid an extra copy
324 if ( optimizedWrite && len == limit && end == start && out != null ) {
325 out.realWriteBytes( src, off, len );
326 return;
327 }
328 // if we have limit and we're below
329 if( len <= limit - end ) {
330 // makeSpace will grow the buffer to the limit,
331 // so we have space
332 System.arraycopy( src, off, buff, end, len );
333 end+=len;
334 return;
335 }
336
337 // need more space than we can afford, need to flush
338 // buffer
339
340 // the buffer is already at ( or bigger than ) limit
341
342 // We chunk the data into slices fitting in the buffer limit, although
343 // if the data is written directly if it doesn't fit
344
345 int avail=limit-end;
346 System.arraycopy(src, off, buff, end, avail);
347 end += avail;
348
349 flushBuffer();
350
351 int remain = len - avail;
352
353 while (remain > (limit - end)) {
354 out.realWriteBytes( src, (off + len) - remain, limit - end );
355 remain = remain - (limit - end);
356 }
357
358 System.arraycopy(src, (off + len) - remain, buff, end, remain);
359 end += remain;
360
361 }
362
363
364 // -------------------- Removing data from the buffer --------------------
365
366 public int substract()
367 throws IOException {
368
369 if ((end - start) == 0) {
370 if (in == null)
371 return -1;
372 int n = in.realReadBytes( buff, 0, buff.length );
373 if (n < 0)
374 return -1;
375 }
376
377 return (buff[start++] & 0xFF);
378
379 }
380
381 public int substract(ByteChunk src)
382 throws IOException {
383
384 if ((end - start) == 0) {
385 if (in == null)
386 return -1;
387 int n = in.realReadBytes( buff, 0, buff.length );
388 if (n < 0)
389 return -1;
390 }
391
392 int len = getLength();
393 src.append(buff, start, len);
394 start = end;
395 return len;
396
397 }
398
399 public int substract( byte src[], int off, int len )
400 throws IOException {
401
402 if ((end - start) == 0) {
403 if (in == null)
404 return -1;
405 int n = in.realReadBytes( buff, 0, buff.length );
406 if (n < 0)
407 return -1;
408 }
409
410 int n = len;
411 if (len > getLength()) {
412 n = getLength();
413 }
414 System.arraycopy(buff, start, src, off, n);
415 start += n;
416 return n;
417
418 }
419
420
421 /** Send the buffer to the sink. Called by append() when the limit is reached.
422 * You can also call it explicitely to force the data to be written.
423 *
424 * @throws IOException
425 */
426 public void flushBuffer()
427 throws IOException
428 {
429 //assert out!=null
430 if( out==null ) {
431 throw new IOException( "Buffer overflow, no sink " + limit + " " +
432 buff.length );
433 }
434 out.realWriteBytes( buff, start, end-start );
435 end=start;
436 }
437
438 /** Make space for len chars. If len is small, allocate
439 * a reserve space too. Never grow bigger than limit.
440 */
441 private void makeSpace(int count)
442 {
443 byte[] tmp = null;
444
445 int newSize;
446 int desiredSize=end + count;
447
448 // Can't grow above the limit
449 if( limit > 0 &&
450 desiredSize > limit) {
451 desiredSize=limit;
452 }
453
454 if( buff==null ) {
455 if( desiredSize < 256 ) desiredSize=256; // take a minimum
456 buff=new byte[desiredSize];
457 }
458
459 // limit < buf.length ( the buffer is already big )
460 // or we already have space XXX
461 if( desiredSize <= buff.length ) {
462 return;
463 }
464 // grow in larger chunks
465 if( desiredSize < 2 * buff.length ) {
466 newSize= buff.length * 2;
467 if( limit >0 &&
468 newSize > limit ) newSize=limit;
469 tmp=new byte[newSize];
470 } else {
471 newSize= buff.length * 2 + count ;
472 if( limit > 0 &&
473 newSize > limit ) newSize=limit;
474 tmp=new byte[newSize];
475 }
476
477 System.arraycopy(buff, start, tmp, 0, end-start);
478 buff = tmp;
479 tmp = null;
480 end=end-start;
481 start=0;
482 }
483
484 // -------------------- Conversion and getters --------------------
485
486 public String toString() {
487 if (null == buff) {
488 return null;
489 } else if (end-start == 0) {
490 return "";
491 }
492 return StringCache.toString(this);
493 }
494
495 public String toStringInternal() {
496 String strValue=null;
497 try {
498 if( enc==null ) enc=DEFAULT_CHARACTER_ENCODING;
499 strValue = new String( buff, start, end-start, enc );
500 /*
501 Does not improve the speed too much on most systems,
502 it's safer to use the "clasical" new String().
503
504 Most overhead is in creating char[] and copying,
505 the internal implementation of new String() is very close to
506 what we do. The decoder is nice for large buffers and if
507 we don't go to String ( so we can take advantage of reduced GC)
508
509 // Method is commented out, in:
510 return B2CConverter.decodeString( enc );
511 */
512 } catch (java.io.UnsupportedEncodingException e) {
513 // Use the platform encoding in that case; the usage of a bad
514 // encoding will have been logged elsewhere already
515 strValue = new String(buff, start, end-start);
516 }
517 return strValue;
518 }
519
520 public int getInt()
521 {
522 return Ascii.parseInt(buff, start,end-start);
523 }
524
525 public long getLong() {
526 return Ascii.parseLong(buff, start,end-start);
527 }
528
529
530 // -------------------- equals --------------------
531
532 /**
533 * Compares the message bytes to the specified String object.
534 * @param s the String to compare
535 * @return true if the comparison succeeded, false otherwise
536 */
537 public boolean equals(String s) {
538 // XXX ENCODING - this only works if encoding is UTF8-compat
539 // ( ok for tomcat, where we compare ascii - header names, etc )!!!
540
541 byte[] b = buff;
542 int blen = end-start;
543 if (b == null || blen != s.length()) {
544 return false;
545 }
546 int boff = start;
547 for (int i = 0; i < blen; i++) {
548 if (b[boff++] != s.charAt(i)) {
549 return false;
550 }
551 }
552 return true;
553 }
554
555 /**
556 * Compares the message bytes to the specified String object.
557 * @param s the String to compare
558 * @return true if the comparison succeeded, false otherwise
559 */
560 public boolean equalsIgnoreCase(String s) {
561 byte[] b = buff;
562 int blen = end-start;
563 if (b == null || blen != s.length()) {
564 return false;
565 }
566 int boff = start;
567 for (int i = 0; i < blen; i++) {
568 if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
569 return false;
570 }
571 }
572 return true;
573 }
574
575 public boolean equals( ByteChunk bb ) {
576 return equals( bb.getBytes(), bb.getStart(), bb.getLength());
577 }
578
579 public boolean equals( byte b2[], int off2, int len2) {
580 byte b1[]=buff;
581 if( b1==null && b2==null ) return true;
582
583 int len=end-start;
584 if ( len2 != len || b1==null || b2==null )
585 return false;
586
587 int off1 = start;
588
589 while ( len-- > 0) {
590 if (b1[off1++] != b2[off2++]) {
591 return false;
592 }
593 }
594 return true;
595 }
596
597 public boolean equals( CharChunk cc ) {
598 return equals( cc.getChars(), cc.getStart(), cc.getLength());
599 }
600
601 public boolean equals( char c2[], int off2, int len2) {
602 // XXX works only for enc compatible with ASCII/UTF !!!
603 byte b1[]=buff;
604 if( c2==null && b1==null ) return true;
605
606 if (b1== null || c2==null || end-start != len2 ) {
607 return false;
608 }
609 int off1 = start;
610 int len=end-start;
611
612 while ( len-- > 0) {
613 if ( (char)b1[off1++] != c2[off2++]) {
614 return false;
615 }
616 }
617 return true;
618 }
619
620 /**
621 * Returns true if the message bytes starts with the specified string.
622 * @param s the string
623 */
624 public boolean startsWith(String s) {
625 // Works only if enc==UTF
626 byte[] b = buff;
627 int blen = s.length();
628 if (b == null || blen > end-start) {
629 return false;
630 }
631 int boff = start;
632 for (int i = 0; i < blen; i++) {
633 if (b[boff++] != s.charAt(i)) {
634 return false;
635 }
636 }
637 return true;
638 }
639
640 /* Returns true if the message bytes start with the specified byte array */
641 public boolean startsWith(byte[] b2) {
642 byte[] b1 = buff;
643 if (b1 == null && b2 == null) {
644 return true;
645 }
646
647 int len = end - start;
648 if (b1 == null || b2 == null || b2.length > len) {
649 return false;
650 }
651 for (int i = start, j = 0; i < end && j < b2.length; ) {
652 if (b1[i++] != b2[j++])
653 return false;
654 }
655 return true;
656 }
657
658 /**
659 * Returns true if the message bytes starts with the specified string.
660 * @param s the string
661 * @param pos The position
662 */
663 public boolean startsWithIgnoreCase(String s, int pos) {
664 byte[] b = buff;
665 int len = s.length();
666 if (b == null || len+pos > end-start) {
667 return false;
668 }
669 int off = start+pos;
670 for (int i = 0; i < len; i++) {
671 if (Ascii.toLower( b[off++] ) != Ascii.toLower( s.charAt(i))) {
672 return false;
673 }
674 }
675 return true;
676 }
677
678 public int indexOf( String src, int srcOff, int srcLen, int myOff ) {
679 char first=src.charAt( srcOff );
680
681 // Look for first char
682 int srcEnd = srcOff + srcLen;
683
684 for( int i=myOff+start; i <= (end - srcLen); i++ ) {
685 if( buff[i] != first ) continue;
686 // found first char, now look for a match
687 int myPos=i+1;
688 for( int srcPos=srcOff + 1; srcPos< srcEnd; ) {
689 if( buff[myPos++] != src.charAt( srcPos++ ))
690 break;
691 if( srcPos==srcEnd ) return i-start; // found it
692 }
693 }
694 return -1;
695 }
696
697 // -------------------- Hash code --------------------
698
699 // normal hash.
700 public int hash() {
701 return hashBytes( buff, start, end-start);
702 }
703
704 // hash ignoring case
705 public int hashIgnoreCase() {
706 return hashBytesIC( buff, start, end-start );
707 }
708
709 private static int hashBytes( byte buff[], int start, int bytesLen ) {
710 int max=start+bytesLen;
711 byte bb[]=buff;
712 int code=0;
713 for (int i = start; i < max ; i++) {
714 code = code * 37 + bb[i];
715 }
716 return code;
717 }
718
719 private static int hashBytesIC( byte bytes[], int start,
720 int bytesLen )
721 {
722 int max=start+bytesLen;
723 byte bb[]=bytes;
724 int code=0;
725 for (int i = start; i < max ; i++) {
726 code = code * 37 + Ascii.toLower(bb[i]);
727 }
728 return code;
729 }
730
731 /**
732 * Returns true if the message bytes starts with the specified string.
733 * @param c the character
734 * @param starting The start position
735 */
736 public int indexOf(char c, int starting) {
737 int ret = indexOf( buff, start+starting, end, c);
738 return (ret >= start) ? ret - start : -1;
739 }
740
741 public static int indexOf( byte bytes[], int off, int end, char qq )
742 {
743 // Works only for UTF
744 while( off < end ) {
745 byte b=bytes[off];
746 if( b==qq )
747 return off;
748 off++;
749 }
750 return -1;
751 }
752
753 /** Find a character, no side effects.
754 * @return index of char if found, -1 if not
755 */
756 public static int findChar( byte buf[], int start, int end, char c ) {
757 byte b=(byte)c;
758 int offset = start;
759 while (offset < end) {
760 if (buf[offset] == b) {
761 return offset;
762 }
763 offset++;
764 }
765 return -1;
766 }
767
768 /** Find a character, no side effects.
769 * @return index of char if found, -1 if not
770 */
771 public static int findChars( byte buf[], int start, int end, byte c[] ) {
772 int clen=c.length;
773 int offset = start;
774 while (offset < end) {
775 for( int i=0; i<clen; i++ )
776 if (buf[offset] == c[i]) {
777 return offset;
778 }
779 offset++;
780 }
781 return -1;
782 }
783
784 /** Find the first character != c
785 * @return index of char if found, -1 if not
786 */
787 public static int findNotChars( byte buf[], int start, int end, byte c[] )
788 {
789 int clen=c.length;
790 int offset = start;
791 boolean found;
792
793 while (offset < end) {
794 found=true;
795 for( int i=0; i<clen; i++ ) {
796 if (buf[offset] == c[i]) {
797 found=false;
798 break;
799 }
800 }
801 if( found ) { // buf[offset] != c[0..len]
802 return offset;
803 }
804 offset++;
805 }
806 return -1;
807 }
808
809
810 /**
811 * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
812 *
813 * @param value to convert to byte array
814 * @return the byte array value
815 */
816 public static final byte[] convertToBytes(String value) {
817 byte[] result = new byte[value.length()];
818 for (int i = 0; i < value.length(); i++) {
819 result[i] = (byte) value.charAt(i);
820 }
821 return result;
822 }
823
824
825 }