Save This Page
Home » POI-3.5 » org.apache » poi » hssf » eventusermodel » examples » [javadoc | source]
    1   /* ====================================================================
    2      Licensed to the Apache Software Foundation (ASF) under one or more
    3      contributor license agreements.  See the NOTICE file distributed with
    4      this work for additional information regarding copyright ownership.
    5      The ASF licenses this file to You under the Apache License, Version 2.0
    6      (the "License"); you may not use this file except in compliance with
    7      the License.  You may obtain a copy of the License at
    8   
    9          http://www.apache.org/licenses/LICENSE-2.0
   10   
   11      Unless required by applicable law or agreed to in writing, software
   12      distributed under the License is distributed on an "AS IS" BASIS,
   13      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14      See the License for the specific language governing permissions and
   15      limitations under the License.
   16   ==================================================================== */
   17   
   18   package org.apache.poi.hssf.eventusermodel.examples;
   19   
   20   import java.io.FileInputStream;
   21   import java.io.FileNotFoundException;
   22   import java.io.IOException;
   23   import java.io.PrintStream;
   24   import java.util.ArrayList;
   25   
   26   import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
   27   import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
   28   import org.apache.poi.hssf.eventusermodel.HSSFListener;
   29   import org.apache.poi.hssf.eventusermodel.HSSFRequest;
   30   import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
   31   import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
   32   import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
   33   import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
   34   import org.apache.poi.hssf.model.HSSFFormulaParser;
   35   import org.apache.poi.hssf.record.BOFRecord;
   36   import org.apache.poi.hssf.record.BlankRecord;
   37   import org.apache.poi.hssf.record.BoolErrRecord;
   38   import org.apache.poi.hssf.record.BoundSheetRecord;
   39   import org.apache.poi.hssf.record.FormulaRecord;
   40   import org.apache.poi.hssf.record.LabelRecord;
   41   import org.apache.poi.hssf.record.LabelSSTRecord;
   42   import org.apache.poi.hssf.record.NoteRecord;
   43   import org.apache.poi.hssf.record.NumberRecord;
   44   import org.apache.poi.hssf.record.RKRecord;
   45   import org.apache.poi.hssf.record.Record;
   46   import org.apache.poi.hssf.record.SSTRecord;
   47   import org.apache.poi.hssf.record.StringRecord;
   48   import org.apache.poi.hssf.usermodel.HSSFWorkbook;
   49   import org.apache.poi.poifs.filesystem.POIFSFileSystem;
   50   
   51   /**
   52    * A XLS -> CSV processor, that uses the MissingRecordAware
   53    *  EventModel code to ensure it outputs all columns and rows.
   54    * @author Nick Burch
   55    */
   56   public class XLS2CSVmra implements HSSFListener {
   57   	private int minColumns;
   58   	private POIFSFileSystem fs;
   59   	private PrintStream output;
   60   
   61   	private int lastRowNumber;
   62   	private int lastColumnNumber;
   63   
   64   	/** Should we output the formula, or the value it has? */
   65   	private boolean outputFormulaValues = true;
   66   
   67   	/** For parsing Formulas */
   68   	private SheetRecordCollectingListener workbookBuildingListener;
   69   	private HSSFWorkbook stubWorkbook;
   70   
   71   	// Records we pick up as we process
   72   	private SSTRecord sstRecord;
   73   	private FormatTrackingHSSFListener formatListener;
   74   	
   75   	/** So we known which sheet we're on */
   76   	private int sheetIndex = -1;
   77   	private BoundSheetRecord[] orderedBSRs;
   78   	private ArrayList boundSheetRecords = new ArrayList();
   79   
   80   	// For handling formulas with string results
   81   	private int nextRow;
   82   	private int nextColumn;
   83   	private boolean outputNextStringRecord;
   84   
   85   	/**
   86   	 * Creates a new XLS -> CSV converter
   87   	 * @param fs The POIFSFileSystem to process
   88   	 * @param output The PrintStream to output the CSV to
   89   	 * @param minColumns The minimum number of columns to output, or -1 for no minimum
   90   	 */
   91   	public XLS2CSVmra(POIFSFileSystem fs, PrintStream output, int minColumns) {
   92   		this.fs = fs;
   93   		this.output = output;
   94   		this.minColumns = minColumns;
   95   	}
   96   
   97   	/**
   98   	 * Creates a new XLS -> CSV converter
   99   	 * @param filename The file to process
  100   	 * @param minColumns The minimum number of columns to output, or -1 for no minimum
  101   	 * @throws IOException
  102   	 * @throws FileNotFoundException
  103   	 */
  104   	public XLS2CSVmra(String filename, int minColumns) throws IOException, FileNotFoundException {
  105   		this(
  106   				new POIFSFileSystem(new FileInputStream(filename)),
  107   				System.out, minColumns
  108   		);
  109   	}
  110   
  111   	/**
  112   	 * Initiates the processing of the XLS file to CSV
  113   	 */
  114   	public void process() throws IOException {
  115   		MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
  116   		formatListener = new FormatTrackingHSSFListener(listener);
  117   
  118   		HSSFEventFactory factory = new HSSFEventFactory();
  119   		HSSFRequest request = new HSSFRequest();
  120   
  121   		if(outputFormulaValues) {
  122   			request.addListenerForAllRecords(formatListener);
  123   		} else {
  124   			workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
  125   			request.addListenerForAllRecords(workbookBuildingListener);
  126   		}
  127   
  128   		factory.processWorkbookEvents(request, fs);
  129   	}
  130   
  131   	/**
  132   	 * Main HSSFListener method, processes events, and outputs the
  133   	 *  CSV as the file is processed.
  134   	 */
  135   	public void processRecord(Record record) {
  136   		int thisRow = -1;
  137   		int thisColumn = -1;
  138   		String thisStr = null;
  139   
  140   		switch (record.getSid())
  141   		{
  142   		case BoundSheetRecord.sid:
  143   			boundSheetRecords.add(record);
  144   			break;
  145   		case BOFRecord.sid:
  146   			BOFRecord br = (BOFRecord)record;
  147   			if(br.getType() == BOFRecord.TYPE_WORKSHEET) {
  148   				// Create sub workbook if required
  149   				if(workbookBuildingListener != null && stubWorkbook == null) {
  150   					stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
  151   				}
  152   				
  153   				// Output the worksheet name
  154   				// Works by ordering the BSRs by the location of
  155   				//  their BOFRecords, and then knowing that we
  156   				//  process BOFRecords in byte offset order
  157   				sheetIndex++;
  158   				if(orderedBSRs == null) {
  159   					orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
  160   				}
  161   				output.println();
  162   				output.println( 
  163   						orderedBSRs[sheetIndex].getSheetname() +
  164   						" [" + (sheetIndex+1) + "]:"
  165   				);
  166   			}
  167   			break;
  168   
  169   		case SSTRecord.sid:
  170   			sstRecord = (SSTRecord) record;
  171   			break;
  172   
  173   		case BlankRecord.sid:
  174   			BlankRecord brec = (BlankRecord) record;
  175   
  176   			thisRow = brec.getRow();
  177   			thisColumn = brec.getColumn();
  178   			thisStr = "";
  179   			break;
  180   		case BoolErrRecord.sid:
  181   			BoolErrRecord berec = (BoolErrRecord) record;
  182   
  183   			thisRow = berec.getRow();
  184   			thisColumn = berec.getColumn();
  185   			thisStr = "";
  186   			break;
  187   
  188   		case FormulaRecord.sid:
  189   			FormulaRecord frec = (FormulaRecord) record;
  190   
  191   			thisRow = frec.getRow();
  192   			thisColumn = frec.getColumn();
  193   
  194   			if(outputFormulaValues) {
  195   				if(Double.isNaN( frec.getValue() )) {
  196   					// Formula result is a string
  197   					// This is stored in the next record
  198   					outputNextStringRecord = true;
  199   					nextRow = frec.getRow();
  200   					nextColumn = frec.getColumn();
  201   				} else {
  202   					thisStr = formatListener.formatNumberDateCell(frec);
  203   				}
  204   			} else {
  205   				thisStr = '"' +
  206   					HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
  207   			}
  208   			break;
  209   		case StringRecord.sid:
  210   			if(outputNextStringRecord) {
  211   				// String for formula
  212   				StringRecord srec = (StringRecord)record;
  213   				thisStr = srec.getString();
  214   				thisRow = nextRow;
  215   				thisColumn = nextColumn;
  216   				outputNextStringRecord = false;
  217   			}
  218   			break;
  219   
  220   		case LabelRecord.sid:
  221   			LabelRecord lrec = (LabelRecord) record;
  222   
  223   			thisRow = lrec.getRow();
  224   			thisColumn = lrec.getColumn();
  225   			thisStr = '"' + lrec.getValue() + '"';
  226   			break;
  227   		case LabelSSTRecord.sid:
  228   			LabelSSTRecord lsrec = (LabelSSTRecord) record;
  229   
  230   			thisRow = lsrec.getRow();
  231   			thisColumn = lsrec.getColumn();
  232   			if(sstRecord == null) {
  233   				thisStr = '"' + "(No SST Record, can't identify string)" + '"';
  234   			} else {
  235   				thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
  236   			}
  237   			break;
  238   		case NoteRecord.sid:
  239   			NoteRecord nrec = (NoteRecord) record;
  240   
  241   			thisRow = nrec.getRow();
  242   			thisColumn = nrec.getColumn();
  243   			// TODO: Find object to match nrec.getShapeId()
  244   			thisStr = '"' + "(TODO)" + '"';
  245   			break;
  246   		case NumberRecord.sid:
  247   			NumberRecord numrec = (NumberRecord) record;
  248   
  249   			thisRow = numrec.getRow();
  250   			thisColumn = numrec.getColumn();
  251   
  252   			// Format
  253   			thisStr = formatListener.formatNumberDateCell(numrec);
  254   			break;
  255   		case RKRecord.sid:
  256   			RKRecord rkrec = (RKRecord) record;
  257   
  258   			thisRow = rkrec.getRow();
  259   			thisColumn = rkrec.getColumn();
  260   			thisStr = '"' + "(TODO)" + '"';
  261   			break;
  262   		default:
  263   			break;
  264   		}
  265   
  266   		// Handle new row
  267   		if(thisRow != -1 && thisRow != lastRowNumber) {
  268   			lastColumnNumber = -1;
  269   		}
  270   
  271   		// Handle missing column
  272   		if(record instanceof MissingCellDummyRecord) {
  273   			MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
  274   			thisRow = mc.getRow();
  275   			thisColumn = mc.getColumn();
  276   			thisStr = "";
  277   		}
  278   
  279   		// If we got something to print out, do so
  280   		if(thisStr != null) {
  281   			if(thisColumn > 0) {
  282   				output.print(',');
  283   			}
  284   			output.print(thisStr);
  285   		}
  286   
  287   		// Update column and row count
  288   		if(thisRow > -1)
  289   			lastRowNumber = thisRow;
  290   		if(thisColumn > -1)
  291   			lastColumnNumber = thisColumn;
  292   
  293   		// Handle end of row
  294   		if(record instanceof LastCellOfRowDummyRecord) {
  295   			// Print out any missing commas if needed
  296   			if(minColumns > 0) {
  297   				// Columns are 0 based
  298   				if(lastColumnNumber == -1) { lastColumnNumber = 0; }
  299   				for(int i=lastColumnNumber; i<(minColumns); i++) {
  300   					output.print(',');
  301   				}
  302   			}
  303   
  304   			// We're onto a new row
  305   			lastColumnNumber = -1;
  306   
  307   			// End the row
  308   			output.println();
  309   		}
  310   	}
  311   
  312   	public static void main(String[] args) throws Exception {
  313   		if(args.length < 1) {
  314   			System.err.println("Use:");
  315   			System.err.println("  XLS2CSVmra <xls file> [min columns]");
  316   			System.exit(1);
  317   		}
  318   
  319   		int minColumns = -1;
  320   		if(args.length >= 2) {
  321   			minColumns = Integer.parseInt(args[1]);
  322   		}
  323   
  324   		XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
  325   		xls2csv.process();
  326   	}
  327   }

Save This Page
Home » POI-3.5 » org.apache » poi » hssf » eventusermodel » examples » [javadoc | source]