Save This Page
Home » iText-src-2.1.3 » com.lowagie » text » pdf » [javadoc | source]
    1   /*
    2    * $Id: PRTokeniser.java 3117 2008-01-31 05:53:22Z xlv $
    3    *
    4    * Copyright 2001, 2002 by Paulo Soares.
    5    *
    6    * The contents of this file are subject to the Mozilla Public License Version 1.1
    7    * (the "License"); you may not use this file except in compliance with the License.
    8    * You may obtain a copy of the License at http://www.mozilla.org/MPL/
    9    *
   10    * Software distributed under the License is distributed on an "AS IS" basis,
   11    * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
   12    * for the specific language governing rights and limitations under the License.
   13    *
   14    * The Original Code is 'iText, a free JAVA-PDF library'.
   15    *
   16    * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
   17    * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
   18    * All Rights Reserved.
   19    * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
   20    * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
   21    *
   22    * Contributor(s): all the names of the contributors are added in the source code
   23    * where applicable.
   24    *
   25    * Alternatively, the contents of this file may be used under the terms of the
   26    * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
   27    * provisions of LGPL are applicable instead of those above.  If you wish to
   28    * allow use of your version of this file only under the terms of the LGPL
   29    * License and not to allow others to use your version of this file under
   30    * the MPL, indicate your decision by deleting the provisions above and
   31    * replace them with the notice and other provisions required by the LGPL.
   32    * If you do not delete the provisions above, a recipient may use your version
   33    * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
   34    *
   35    * This library is free software; you can redistribute it and/or modify it
   36    * under the terms of the MPL as stated above or under the terms of the GNU
   37    * Library General Public License as published by the Free Software Foundation;
   38    * either version 2 of the License, or any later version.
   39    *
   40    * This library is distributed in the hope that it will be useful, but WITHOUT
   41    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   42    * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
   43    * details.
   44    *
   45    * If you didn't download this code from the following link, you should check if
   46    * you aren't using an obsolete version:
   47    * http://www.lowagie.com/iText/
   48    */
   49   
   50   package com.lowagie.text.pdf;
   51   
   52   import java.io.IOException;
   53   /**
   54    *
   55    * @author  Paulo Soares (psoares@consiste.pt)
   56    */
   57   public class PRTokeniser {
   58       
   59       public static final int TK_NUMBER = 1;
   60       public static final int TK_STRING = 2;
   61       public static final int TK_NAME = 3;
   62       public static final int TK_COMMENT = 4;
   63       public static final int TK_START_ARRAY = 5;
   64       public static final int TK_END_ARRAY = 6;
   65       public static final int TK_START_DIC = 7;
   66       public static final int TK_END_DIC = 8;
   67       public static final int TK_REF = 9;
   68       public static final int TK_OTHER = 10;
   69       public static final boolean delims[] = {
   70           true,  true,  false, false, false, false, false, false, false, false,
   71           true,  true,  false, true,  true,  false, false, false, false, false,
   72           false, false, false, false, false, false, false, false, false, false,
   73           false, false, false, true,  false, false, false, false, true,  false,
   74           false, true,  true,  false, false, false, false, false, true,  false,
   75           false, false, false, false, false, false, false, false, false, false,
   76           false, true,  false, true,  false, false, false, false, false, false,
   77           false, false, false, false, false, false, false, false, false, false,
   78           false, false, false, false, false, false, false, false, false, false,
   79           false, false, true,  false, true,  false, false, false, false, false,
   80           false, false, false, false, false, false, false, false, false, false,
   81           false, false, false, false, false, false, false, false, false, false,
   82           false, false, false, false, false, false, false, false, false, false,
   83           false, false, false, false, false, false, false, false, false, false,
   84           false, false, false, false, false, false, false, false, false, false,
   85           false, false, false, false, false, false, false, false, false, false,
   86           false, false, false, false, false, false, false, false, false, false,
   87           false, false, false, false, false, false, false, false, false, false,
   88           false, false, false, false, false, false, false, false, false, false,
   89           false, false, false, false, false, false, false, false, false, false,
   90           false, false, false, false, false, false, false, false, false, false,
   91           false, false, false, false, false, false, false, false, false, false,
   92           false, false, false, false, false, false, false, false, false, false,
   93           false, false, false, false, false, false, false, false, false, false,
   94           false, false, false, false, false, false, false, false, false, false,
   95           false, false, false, false, false, false, false};
   96       
   97       static final String EMPTY = "";
   98   
   99       
  100       protected RandomAccessFileOrArray file;
  101       protected int type;
  102       protected String stringValue;
  103       protected int reference;
  104       protected int generation;
  105       protected boolean hexString;
  106          
  107       public PRTokeniser(String filename) throws IOException {
  108           file = new RandomAccessFileOrArray(filename);
  109       }
  110   
  111       public PRTokeniser(byte pdfIn[]) {
  112           file = new RandomAccessFileOrArray(pdfIn);
  113       }
  114       
  115       public PRTokeniser(RandomAccessFileOrArray file) {
  116           this.file = file;
  117       }
  118       
  119       public void seek(int pos) throws IOException {
  120           file.seek(pos);
  121       }
  122       
  123       public int getFilePointer() throws IOException {
  124           return file.getFilePointer();
  125       }
  126   
  127       public void close() throws IOException {
  128           file.close();
  129       }
  130       
  131       public int length() throws IOException {
  132           return file.length();
  133       }
  134   
  135       public int read() throws IOException {
  136           return file.read();
  137       }
  138       
  139       public RandomAccessFileOrArray getSafeFile() {
  140           return new RandomAccessFileOrArray(file);
  141       }
  142       
  143       public RandomAccessFileOrArray getFile() {
  144           return file;
  145       }
  146       
  147       public String readString(int size) throws IOException {
  148           StringBuffer buf = new StringBuffer();
  149           int ch;
  150           while ((size--) > 0) {
  151               ch = file.read();
  152               if (ch == -1)
  153                   break;
  154               buf.append((char)ch);
  155           }
  156           return buf.toString();
  157       }
  158   
  159       public static final boolean isWhitespace(int ch) {
  160           return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
  161       }
  162       
  163       public static final boolean isDelimiter(int ch) {
  164           return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
  165       }
  166   
  167       public static final boolean isDelimiterWhitespace(int ch) {
  168           return delims[ch + 1];
  169       }
  170   
  171       public int getTokenType() {
  172           return type;
  173       }
  174       
  175       public String getStringValue() {
  176           return stringValue;
  177       }
  178       
  179       public int getReference() {
  180           return reference;
  181       }
  182       
  183       public int getGeneration() {
  184           return generation;
  185       }
  186       
  187       public void backOnePosition(int ch) {
  188           if (ch != -1)
  189               file.pushBack((byte)ch);
  190       }
  191       
  192       public void throwError(String error) throws IOException {
  193           throw new IOException(error + " at file pointer " + file.getFilePointer());
  194       }
  195       
  196       public char checkPdfHeader() throws IOException {
  197           file.setStartOffset(0);
  198           String str = readString(1024);
  199           int idx = str.indexOf("%PDF-");
  200           if (idx < 0)
  201               throw new IOException("PDF header signature not found.");
  202           file.setStartOffset(idx);
  203           return str.charAt(idx + 7);
  204       }
  205       
  206       public void checkFdfHeader() throws IOException {
  207           file.setStartOffset(0);
  208           String str = readString(1024);
  209           int idx = str.indexOf("%FDF-1.2");
  210           if (idx < 0)
  211               throw new IOException("FDF header signature not found.");
  212           file.setStartOffset(idx);
  213       }
  214   
  215       public int getStartxref() throws IOException {
  216           int size = Math.min(1024, file.length());
  217           int pos = file.length() - size;
  218           file.seek(pos);
  219           String str = readString(1024);
  220           int idx = str.lastIndexOf("startxref");
  221           if (idx < 0)
  222               throw new IOException("PDF startxref not found.");
  223           return pos + idx;
  224       }
  225   
  226       public static int getHex(int v) {
  227           if (v >= '0' && v <= '9')
  228               return v - '0';
  229           if (v >= 'A' && v <= 'F')
  230               return v - 'A' + 10;
  231           if (v >= 'a' && v <= 'f')
  232               return v - 'a' + 10;
  233           return -1;
  234       }
  235       
  236       public void nextValidToken() throws IOException {
  237           int level = 0;
  238           String n1 = null;
  239           String n2 = null;
  240           int ptr = 0;
  241           while (nextToken()) {
  242               if (type == TK_COMMENT)
  243                   continue;
  244               switch (level) {
  245                   case 0:
  246                   {
  247                       if (type != TK_NUMBER)
  248                           return;
  249                       ptr = file.getFilePointer();
  250                       n1 = stringValue;
  251                       ++level;
  252                       break;
  253                   }
  254                   case 1:
  255                   {
  256                       if (type != TK_NUMBER) {
  257                           file.seek(ptr);
  258                           type = TK_NUMBER;
  259                           stringValue = n1;
  260                           return;
  261                       }
  262                       n2 = stringValue;
  263                       ++level;
  264                       break;
  265                   }
  266                   default:
  267                   {
  268                       if (type != TK_OTHER || !stringValue.equals("R")) {
  269                           file.seek(ptr);
  270                           type = TK_NUMBER;
  271                           stringValue = n1;
  272                           return;
  273                       }
  274                       type = TK_REF;
  275                       reference = Integer.parseInt(n1);
  276                       generation = Integer.parseInt(n2);
  277                       return;
  278                   }
  279               }
  280           }
  281           throwError("Unexpected end of file");
  282       }
  283       
  284       public boolean nextToken() throws IOException {
  285           StringBuffer outBuf = null;
  286           stringValue = EMPTY;
  287           int ch = 0;
  288           do {
  289               ch = file.read();
  290           } while (ch != -1 && isWhitespace(ch));
  291           if (ch == -1)
  292               return false;
  293           switch (ch) {
  294               case '[':
  295                   type = TK_START_ARRAY;
  296                   break;
  297               case ']':
  298                   type = TK_END_ARRAY;
  299                   break;
  300               case '/':
  301               {
  302                   outBuf = new StringBuffer();
  303                   type = TK_NAME;
  304                   while (true) {
  305                       ch = file.read();
  306                       if (delims[ch + 1])
  307                           break;
  308                       if (ch == '#') {
  309                           ch = (getHex(file.read()) << 4) + getHex(file.read());
  310                       }
  311                       outBuf.append((char)ch);
  312                   }
  313                   backOnePosition(ch);
  314                   break;
  315               }
  316               case '>':
  317                   ch = file.read();
  318                   if (ch != '>')
  319                       throwError("'>' not expected");
  320                   type = TK_END_DIC;
  321                   break;
  322               case '<':
  323               {
  324                   int v1 = file.read();
  325                   if (v1 == '<') {
  326                       type = TK_START_DIC;
  327                       break;
  328                   }
  329                   outBuf = new StringBuffer();
  330                   type = TK_STRING;
  331                   hexString = true;
  332                   int v2 = 0;
  333                   while (true) {
  334                       while (isWhitespace(v1))
  335                           v1 = file.read();
  336                       if (v1 == '>')
  337                           break;
  338                       v1 = getHex(v1);
  339                       if (v1 < 0)
  340                           break;
  341                       v2 = file.read();
  342                       while (isWhitespace(v2))
  343                           v2 = file.read();
  344                       if (v2 == '>') {
  345                           ch = v1 << 4;
  346                           outBuf.append((char)ch);
  347                           break;
  348                       }
  349                       v2 = getHex(v2);
  350                       if (v2 < 0)
  351                           break;
  352                       ch = (v1 << 4) + v2;
  353                       outBuf.append((char)ch);
  354                       v1 = file.read();
  355                   }
  356                   if (v1 < 0 || v2 < 0)
  357                       throwError("Error reading string");
  358                   break;
  359               }
  360               case '%':
  361                   type = TK_COMMENT;
  362                   do {
  363                       ch = file.read();
  364                   } while (ch != -1 && ch != '\r' && ch != '\n');
  365                   break;
  366               case '(':
  367               {
  368                   outBuf = new StringBuffer();
  369                   type = TK_STRING;
  370                   hexString = false;
  371                   int nesting = 0;
  372                   while (true) {
  373                       ch = file.read();
  374                       if (ch == -1)
  375                           break;
  376                       if (ch == '(') {
  377                           ++nesting;
  378                       }
  379                       else if (ch == ')') {
  380                           --nesting;
  381                       }
  382                       else if (ch == '\\') {
  383                           boolean lineBreak = false;
  384                           ch = file.read();
  385                           switch (ch) {
  386                               case 'n':
  387                                   ch = '\n';
  388                                   break;
  389                               case 'r':
  390                                   ch = '\r';
  391                                   break;
  392                               case 't':
  393                                   ch = '\t';
  394                                   break;
  395                               case 'b':
  396                                   ch = '\b';
  397                                   break;
  398                               case 'f':
  399                                   ch = '\f';
  400                                   break;
  401                               case '(':
  402                               case ')':
  403                               case '\\':
  404                                   break;
  405                               case '\r':
  406                                   lineBreak = true;
  407                                   ch = file.read();
  408                                   if (ch != '\n')
  409                                       backOnePosition(ch);
  410                                   break;
  411                               case '\n':
  412                                   lineBreak = true;
  413                                   break;
  414                               default:
  415                               {
  416                                   if (ch < '0' || ch > '7') {
  417                                       break;
  418                                   }
  419                                   int octal = ch - '0';
  420                                   ch = file.read();
  421                                   if (ch < '0' || ch > '7') {
  422                                       backOnePosition(ch);
  423                                       ch = octal;
  424                                       break;
  425                                   }
  426                                   octal = (octal << 3) + ch - '0';
  427                                   ch = file.read();
  428                                   if (ch < '0' || ch > '7') {
  429                                       backOnePosition(ch);
  430                                       ch = octal;
  431                                       break;
  432                                   }
  433                                   octal = (octal << 3) + ch - '0';
  434                                   ch = octal & 0xff;
  435                                   break;
  436                               }
  437                           }
  438                           if (lineBreak)
  439                               continue;
  440                           if (ch < 0)
  441                               break;
  442                       }
  443                       else if (ch == '\r') {
  444                           ch = file.read();
  445                           if (ch < 0)
  446                               break;
  447                           if (ch != '\n') {
  448                               backOnePosition(ch);
  449                               ch = '\n';
  450                           }
  451                       }
  452                       if (nesting == -1)
  453                           break;
  454                       outBuf.append((char)ch);
  455                   }
  456                   if (ch == -1)
  457                       throwError("Error reading string");
  458                   break;
  459               }
  460               default:
  461               {
  462                   outBuf = new StringBuffer();
  463                   if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) {
  464                       type = TK_NUMBER;
  465                       do {
  466                           outBuf.append((char)ch);
  467                           ch = file.read();
  468                       } while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
  469                   }
  470                   else {
  471                       type = TK_OTHER;
  472                       do {
  473                           outBuf.append((char)ch);
  474                           ch = file.read();
  475                       } while (!delims[ch + 1]);
  476                   }
  477                   backOnePosition(ch);
  478                   break;
  479               }
  480           }
  481           if (outBuf != null)
  482               stringValue = outBuf.toString();
  483           return true;
  484       }
  485       
  486       public int intValue() {
  487           return Integer.parseInt(stringValue);
  488       }
  489       
  490       public boolean readLineSegment(byte input[]) throws IOException {
  491           int c = -1;
  492           boolean eol = false;
  493           int ptr = 0;
  494           int len = input.length;
  495   	// ssteward, pdftk-1.10, 040922: 
  496   	// skip initial whitespace; added this because PdfReader.rebuildXref()
  497   	// assumes that line provided by readLineSegment does not have init. whitespace;
  498   	if ( ptr < len ) {
  499   	    while ( isWhitespace( (c = read()) ) );
  500   	}
  501   	while ( !eol && ptr < len ) {
  502   	    switch (c) {
  503                   case -1:
  504                   case '\n':
  505                       eol = true;
  506                       break;
  507                   case '\r':
  508                       eol = true;
  509                       int cur = getFilePointer();
  510                       if ((read()) != '\n') {
  511                           seek(cur);
  512                       }
  513                       break;
  514                   default:
  515                       input[ptr++] = (byte)c;
  516                       break;
  517               }
  518   
  519   	    // break loop? do it before we read() again
  520   	    if( eol || len <= ptr ) {
  521   		break;
  522   	    }
  523   	    else {
  524   		c = read();
  525   	    }
  526           }
  527           if (ptr >= len) {
  528               eol = false;
  529               while (!eol) {
  530                   switch (c = read()) {
  531                       case -1:
  532                       case '\n':
  533                           eol = true;
  534                           break;
  535                       case '\r':
  536                           eol = true;
  537                           int cur = getFilePointer();
  538                           if ((read()) != '\n') {
  539                               seek(cur);
  540                           }
  541                           break;
  542                   }
  543               }
  544           }
  545           
  546           if ((c == -1) && (ptr == 0)) {
  547               return false;
  548           }
  549           if (ptr + 2 <= len) {
  550               input[ptr++] = (byte)' ';
  551               input[ptr] = (byte)'X';
  552           }
  553           return true;
  554       }
  555       
  556       public static int[] checkObjectStart(byte line[]) {
  557           try {
  558               PRTokeniser tk = new PRTokeniser(line);
  559               int num = 0;
  560               int gen = 0;
  561               if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
  562                   return null;
  563               num = tk.intValue();
  564               if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
  565                   return null;
  566               gen = tk.intValue();
  567               if (!tk.nextToken())
  568                   return null;
  569               if (!tk.getStringValue().equals("obj"))
  570                   return null;
  571               return new int[]{num, gen};
  572           }
  573           catch (Exception ioe) {
  574               // empty on purpose
  575           }
  576           return null;
  577       }
  578       
  579       public boolean isHexString() {
  580           return this.hexString;
  581       }
  582       
  583   }

Save This Page
Home » iText-src-2.1.3 » com.lowagie » text » pdf » [javadoc | source]