1
2 /* ====================================================================
3 Licensed to the Apache Software Foundation (ASF) under one or more
4 contributor license agreements. See the NOTICE file distributed with
5 this work for additional information regarding copyright ownership.
6 The ASF licenses this file to You under the Apache License, Version 2.0
7 (the "License"); you may not use this file except in compliance with
8 the License. You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 ==================================================================== */
18
19
20
21 package org.apache.poi.hslf.model;
22
23 import java.util.LinkedList;
24 import java.util.Vector;
25
26 import org.apache.poi.hslf.model.textproperties.TextPropCollection;
27 import org.apache.poi.hslf.record;
28 import org.apache.poi.hslf.usermodel.RichTextRun;
29 import org.apache.poi.hslf.usermodel.SlideShow;
30 import org.apache.poi.util.StringUtil;
31
32 /**
33 * This class represents a run of text in a powerpoint document. That
34 * run could be text on a sheet, or text in a note.
35 * It is only a very basic class for now
36 *
37 * @author Nick Burch
38 */
39
40 public class TextRun
41 {
42 // Note: These fields are protected to help with unit testing
43 // Other classes shouldn't really go playing with them!
44 protected TextHeaderAtom _headerAtom;
45 protected TextBytesAtom _byteAtom;
46 protected TextCharsAtom _charAtom;
47 protected StyleTextPropAtom _styleAtom;
48 protected boolean _isUnicode;
49 protected RichTextRun[] _rtRuns;
50 private SlideShow slideShow;
51 private Sheet sheet;
52 private int shapeId;
53 private int slwtIndex; //position in the owning SlideListWithText
54 /**
55 * all text run records that follow TextHeaderAtom.
56 * (there can be misc InteractiveInfo, TxInteractiveInfo and other records)
57 */
58 protected Record[] _records;
59
60 /**
61 * Constructs a Text Run from a Unicode text block
62 *
63 * @param tha the TextHeaderAtom that defines what's what
64 * @param tca the TextCharsAtom containing the text
65 * @param sta the StyleTextPropAtom which defines the character stylings
66 */
67 public TextRun(TextHeaderAtom tha, TextCharsAtom tca, StyleTextPropAtom sta) {
68 this(tha,null,tca,sta);
69 }
70
71 /**
72 * Constructs a Text Run from a Ascii text block
73 *
74 * @param tha the TextHeaderAtom that defines what's what
75 * @param tba the TextBytesAtom containing the text
76 * @param sta the StyleTextPropAtom which defines the character stylings
77 */
78 public TextRun(TextHeaderAtom tha, TextBytesAtom tba, StyleTextPropAtom sta) {
79 this(tha,tba,null,sta);
80 }
81
82 /**
83 * Internal constructor and initializer
84 */
85 private TextRun(TextHeaderAtom tha, TextBytesAtom tba, TextCharsAtom tca, StyleTextPropAtom sta) {
86 _headerAtom = tha;
87 _styleAtom = sta;
88 if(tba != null) {
89 _byteAtom = tba;
90 _isUnicode = false;
91 } else {
92 _charAtom = tca;
93 _isUnicode = true;
94 }
95 String runRawText = getText();
96
97 // Figure out the rich text runs
98 LinkedList pStyles = new LinkedList();
99 LinkedList cStyles = new LinkedList();
100 if(_styleAtom != null) {
101 // Get the style atom to grok itself
102 _styleAtom.setParentTextSize(runRawText.length());
103 pStyles = _styleAtom.getParagraphStyles();
104 cStyles = _styleAtom.getCharacterStyles();
105 }
106
107 // Handle case of no current style, with a default
108 if(pStyles.size() == 0 || cStyles.size() == 0) {
109 _rtRuns = new RichTextRun[1];
110 _rtRuns[0] = new RichTextRun(this, 0, runRawText.length());
111 } else {
112 // Build up Rich Text Runs, one for each
113 // character/paragraph style pair
114 Vector rtrs = new Vector();
115
116 int pos = 0;
117
118 int curP = 0;
119 int curC = 0;
120 int pLenRemain = -1;
121 int cLenRemain = -1;
122
123 // Build one for each run with the same style
124 while(pos <= runRawText.length() && curP < pStyles.size() && curC < cStyles.size()) {
125 // Get the Props to use
126 TextPropCollection pProps = (TextPropCollection)pStyles.get(curP);
127 TextPropCollection cProps = (TextPropCollection)cStyles.get(curC);
128
129 int pLen = pProps.getCharactersCovered();
130 int cLen = cProps.getCharactersCovered();
131
132 // Handle new pass
133 boolean freshSet = false;
134 if(pLenRemain == -1 && cLenRemain == -1) { freshSet = true; }
135 if(pLenRemain == -1) { pLenRemain = pLen; }
136 if(cLenRemain == -1) { cLenRemain = cLen; }
137
138 // So we know how to build the eventual run
139 int runLen = -1;
140 boolean pShared = false;
141 boolean cShared = false;
142
143 // Same size, new styles - neither shared
144 if(pLen == cLen && freshSet) {
145 runLen = cLen;
146 pShared = false;
147 cShared = false;
148 curP++;
149 curC++;
150 pLenRemain = -1;
151 cLenRemain = -1;
152 } else {
153 // Some sharing
154
155 // See if we are already in a shared block
156 if(pLenRemain < pLen) {
157 // Existing shared p block
158 pShared = true;
159
160 // Do we end with the c block, or either side of it?
161 if(pLenRemain == cLenRemain) {
162 // We end at the same time
163 cShared = false;
164 runLen = pLenRemain;
165 curP++;
166 curC++;
167 pLenRemain = -1;
168 cLenRemain = -1;
169 } else if(pLenRemain < cLenRemain) {
170 // We end before the c block
171 cShared = true;
172 runLen = pLenRemain;
173 curP++;
174 cLenRemain -= pLenRemain;
175 pLenRemain = -1;
176 } else {
177 // We end after the c block
178 cShared = false;
179 runLen = cLenRemain;
180 curC++;
181 pLenRemain -= cLenRemain;
182 cLenRemain = -1;
183 }
184 } else if(cLenRemain < cLen) {
185 // Existing shared c block
186 cShared = true;
187
188 // Do we end with the p block, or either side of it?
189 if(pLenRemain == cLenRemain) {
190 // We end at the same time
191 pShared = false;
192 runLen = cLenRemain;
193 curP++;
194 curC++;
195 pLenRemain = -1;
196 cLenRemain = -1;
197 } else if(cLenRemain < pLenRemain) {
198 // We end before the p block
199 pShared = true;
200 runLen = cLenRemain;
201 curC++;
202 pLenRemain -= cLenRemain;
203 cLenRemain = -1;
204 } else {
205 // We end after the p block
206 pShared = false;
207 runLen = pLenRemain;
208 curP++;
209 cLenRemain -= pLenRemain;
210 pLenRemain = -1;
211 }
212 } else {
213 // Start of a shared block
214 if(pLenRemain < cLenRemain) {
215 // Shared c block
216 pShared = false;
217 cShared = true;
218 runLen = pLenRemain;
219 curP++;
220 cLenRemain -= pLenRemain;
221 pLenRemain = -1;
222 } else {
223 // Shared p block
224 pShared = true;
225 cShared = false;
226 runLen = cLenRemain;
227 curC++;
228 pLenRemain -= cLenRemain;
229 cLenRemain = -1;
230 }
231 }
232 }
233
234 // Wind on
235 int prevPos = pos;
236 pos += runLen;
237 // Adjust for end-of-run extra 1 length
238 if(pos > runRawText.length()) {
239 runLen--;
240 }
241
242 // Save
243 RichTextRun rtr = new RichTextRun(this, prevPos, runLen, pProps, cProps, pShared, cShared);
244 rtrs.add(rtr);
245 }
246
247 // Build the array
248 _rtRuns = new RichTextRun[rtrs.size()];
249 rtrs.copyInto(_rtRuns);
250 }
251 }
252
253
254 // Update methods follow
255
256 /**
257 * Adds the supplied text onto the end of the TextRun,
258 * creating a new RichTextRun (returned) for it to
259 * sit in.
260 * In many cases, before calling this, you'll want to add
261 * a newline onto the end of your last RichTextRun
262 */
263 public RichTextRun appendText(String s) {
264 // We will need a StyleTextProp atom
265 ensureStyleAtomPresent();
266
267 // First up, append the text to the
268 // underlying text atom
269 int oldSize = getRawText().length();
270 storeText(
271 getRawText() + s
272 );
273
274 // If either of the previous styles overran
275 // the text by one, we need to shuffle that
276 // extra character onto the new ones
277 int pOverRun = _styleAtom.getParagraphTextLengthCovered() - oldSize;
278 int cOverRun = _styleAtom.getCharacterTextLengthCovered() - oldSize;
279 if(pOverRun > 0) {
280 TextPropCollection tpc = (TextPropCollection)
281 _styleAtom.getParagraphStyles().getLast();
282 tpc.updateTextSize(
283 tpc.getCharactersCovered() - pOverRun
284 );
285 }
286 if(cOverRun > 0) {
287 TextPropCollection tpc = (TextPropCollection)
288 _styleAtom.getCharacterStyles().getLast();
289 tpc.updateTextSize(
290 tpc.getCharactersCovered() - cOverRun
291 );
292 }
293
294 // Next, add the styles for its paragraph and characters
295 TextPropCollection newPTP =
296 _styleAtom.addParagraphTextPropCollection(s.length()+pOverRun);
297 TextPropCollection newCTP =
298 _styleAtom.addCharacterTextPropCollection(s.length()+cOverRun);
299
300 // Now, create the new RichTextRun
301 RichTextRun nr = new RichTextRun(
302 this, oldSize, s.length(),
303 newPTP, newCTP, false, false
304 );
305
306 // Add the new RichTextRun onto our list
307 RichTextRun[] newRuns = new RichTextRun[_rtRuns.length+1];
308 System.arraycopy(_rtRuns, 0, newRuns, 0, _rtRuns.length);
309 newRuns[newRuns.length-1] = nr;
310 _rtRuns = newRuns;
311
312 // And return the new run to the caller
313 return nr;
314 }
315
316 /**
317 * Saves the given string to the records. Doesn't
318 * touch the stylings.
319 */
320 private void storeText(String s) {
321 // Remove a single trailing \r, as there is an implicit one at the
322 // end of every record
323 if(s.endsWith("\r")) {
324 s = s.substring(0, s.length()-1);
325 }
326
327 // Store in the appropriate record
328 if(_isUnicode) {
329 // The atom can safely convert to unicode
330 _charAtom.setText(s);
331 } else {
332 // Will it fit in a 8 bit atom?
333 boolean hasMultibyte = StringUtil.hasMultibyte(s);
334 if(! hasMultibyte) {
335 // Fine to go into 8 bit atom
336 byte[] text = new byte[s.length()];
337 StringUtil.putCompressedUnicode(s,text,0);
338 _byteAtom.setText(text);
339 } else {
340 // Need to swap a TextBytesAtom for a TextCharsAtom
341
342 // Build the new TextCharsAtom
343 _charAtom = new TextCharsAtom();
344 _charAtom.setText(s);
345
346 // Use the TextHeaderAtom to do the swap on the parent
347 RecordContainer parent = _headerAtom.getParentRecord();
348 Record[] cr = parent.getChildRecords();
349 for(int i=0; i<cr.length; i++) {
350 // Look for TextBytesAtom
351 if(cr[i].equals(_byteAtom)) {
352 // Found it, so replace, then all done
353 cr[i] = _charAtom;
354 break;
355 }
356 }
357
358 // Flag the change
359 _byteAtom = null;
360 _isUnicode = true;
361 }
362 }
363 /**
364 * If TextSpecInfoAtom is present, we must update the text size in it,
365 * otherwise the ppt will be corrupted
366 */
367 if(_records != null) for (int i = 0; i < _records.length; i++) {
368 if(_records[i] instanceof TextSpecInfoAtom){
369 TextSpecInfoAtom specAtom = (TextSpecInfoAtom)_records[i];
370 if((s.length() + 1) != specAtom.getCharactersCovered()){
371 specAtom.reset(s.length() + 1);
372 }
373 }
374 }
375 }
376
377 /**
378 * Handles an update to the text stored in one of the Rich Text Runs
379 * @param run
380 * @param s
381 */
382 public synchronized void changeTextInRichTextRun(RichTextRun run, String s) {
383 // Figure out which run it is
384 int runID = -1;
385 for(int i=0; i<_rtRuns.length; i++) {
386 if(run.equals(_rtRuns[i])) {
387 runID = i;
388 }
389 }
390 if(runID == -1) {
391 throw new IllegalArgumentException("Supplied RichTextRun wasn't from this TextRun");
392 }
393
394 // Ensure a StyleTextPropAtom is present, adding if required
395 ensureStyleAtomPresent();
396
397 // Update the text length for its Paragraph and Character stylings
398 // If it's shared:
399 // * calculate the new length based on the run's old text
400 // * this should leave in any +1's for the end of block if needed
401 // If it isn't shared:
402 // * reset the length, to the new string's length
403 // * add on +1 if the last block
404 // The last run needs its stylings to be 1 longer than the raw
405 // text is. This is to define the stylings that any new text
406 // that is added will inherit
407 TextPropCollection pCol = run._getRawParagraphStyle();
408 TextPropCollection cCol = run._getRawCharacterStyle();
409 int newSize = s.length();
410 if(runID == _rtRuns.length-1) {
411 newSize++;
412 }
413
414 if(run._isParagraphStyleShared()) {
415 pCol.updateTextSize( pCol.getCharactersCovered() - run.getLength() + s.length() );
416 } else {
417 pCol.updateTextSize(newSize);
418 }
419 if(run._isCharacterStyleShared()) {
420 cCol.updateTextSize( cCol.getCharactersCovered() - run.getLength() + s.length() );
421 } else {
422 cCol.updateTextSize(newSize);
423 }
424
425 // Build up the new text
426 // As we go through, update the start position for all subsequent runs
427 // The building relies on the old text still being present
428 StringBuffer newText = new StringBuffer();
429 for(int i=0; i<_rtRuns.length; i++) {
430 int newStartPos = newText.length();
431
432 // Build up the new text
433 if(i != runID) {
434 // Not the affected run, so keep old text
435 newText.append(_rtRuns[i].getRawText());
436 } else {
437 // Affected run, so use new text
438 newText.append(s);
439 }
440
441 // Do we need to update the start position of this run?
442 // (Need to get the text before we update the start pos)
443 if(i <= runID) {
444 // Change is after this, so don't need to change start position
445 } else {
446 // Change has occured, so update start position
447 _rtRuns[i].updateStartPosition(newStartPos);
448 }
449 }
450
451 // Now we can save the new text
452 storeText(newText.toString());
453 }
454
455 /**
456 * Changes the text, and sets it all to have the same styling
457 * as the the first character has.
458 * If you care about styling, do setText on a RichTextRun instead
459 */
460 public synchronized void setRawText(String s) {
461 // Save the new text to the atoms
462 storeText(s);
463 RichTextRun fst = _rtRuns[0];
464
465 // Finally, zap and re-do the RichTextRuns
466 for(int i=0; i<_rtRuns.length; i++) { _rtRuns[i] = null; }
467 _rtRuns = new RichTextRun[1];
468 _rtRuns[0] = fst;
469
470 // Now handle record stylings:
471 // If there isn't styling
472 // no change, stays with no styling
473 // If there is styling:
474 // everthing gets the same style that the first block has
475 if(_styleAtom != null) {
476 LinkedList pStyles = _styleAtom.getParagraphStyles();
477 while(pStyles.size() > 1) { pStyles.removeLast(); }
478
479 LinkedList cStyles = _styleAtom.getCharacterStyles();
480 while(cStyles.size() > 1) { cStyles.removeLast(); }
481
482 _rtRuns[0].setText(s);
483 } else {
484 // Recreate rich text run with no styling
485 _rtRuns[0] = new RichTextRun(this,0,s.length());
486 }
487
488 }
489
490 /**
491 * Changes the text.
492 * Converts '\r' into '\n'
493 */
494 public synchronized void setText(String s) {
495 String text = normalize(s);
496 setRawText(text);
497 }
498
499 /**
500 * Ensure a StyleTextPropAtom is present for this run,
501 * by adding if required. Normally for internal TextRun use.
502 */
503 public synchronized void ensureStyleAtomPresent() {
504 if(_styleAtom != null) {
505 // All there
506 return;
507 }
508
509 // Create a new one at the right size
510 _styleAtom = new StyleTextPropAtom(getRawText().length() + 1);
511
512 // Use the TextHeader atom to get at the parent
513 RecordContainer runAtomsParent = _headerAtom.getParentRecord();
514
515 // Add the new StyleTextPropAtom after the TextCharsAtom / TextBytesAtom
516 Record addAfter = _byteAtom;
517 if(_byteAtom == null) { addAfter = _charAtom; }
518 runAtomsParent.addChildAfter(_styleAtom, addAfter);
519
520 // Feed this to our sole rich text run
521 if(_rtRuns.length != 1) {
522 throw new IllegalStateException("Needed to add StyleTextPropAtom when had many rich text runs");
523 }
524 // These are the only styles for now
525 _rtRuns[0].supplyTextProps(
526 (TextPropCollection)_styleAtom.getParagraphStyles().get(0),
527 (TextPropCollection)_styleAtom.getCharacterStyles().get(0),
528 false,
529 false
530 );
531 }
532
533 // Accesser methods follow
534
535 /**
536 * Returns the text content of the run, which has been made safe
537 * for printing and other use.
538 */
539 public String getText() {
540 String rawText = getRawText();
541
542 // PowerPoint seems to store files with \r as the line break
543 // The messes things up on everything but a Mac, so translate
544 // them to \n
545 String text = rawText.replace('\r','\n');
546
547 int type = _headerAtom == null ? 0 : _headerAtom.getTextType();
548 if(type == TextHeaderAtom.TITLE_TYPE || type == TextHeaderAtom.CENTER_TITLE_TYPE){
549 //0xB acts like cariage return in page titles and like blank in the others
550 text = text.replace((char) 0x0B, '\n');
551 } else {
552 text = text.replace((char) 0x0B, ' ');
553 }
554 return text;
555 }
556
557 /**
558 * Returns the raw text content of the run. This hasn't had any
559 * changes applied to it, and so is probably unlikely to print
560 * out nicely.
561 */
562 public String getRawText() {
563 if(_isUnicode) {
564 return _charAtom.getText();
565 } else {
566 return _byteAtom.getText();
567 }
568 }
569
570 /**
571 * Fetch the rich text runs (runs of text with the same styling) that
572 * are contained within this block of text
573 */
574 public RichTextRun[] getRichTextRuns() {
575 return _rtRuns;
576 }
577
578 /**
579 * Returns the type of the text, from the TextHeaderAtom.
580 * Possible values can be seen from TextHeaderAtom
581 * @see org.apache.poi.hslf.record.TextHeaderAtom
582 */
583 public int getRunType() {
584 return _headerAtom.getTextType();
585 }
586
587 /**
588 * Changes the type of the text. Values should be taken
589 * from TextHeaderAtom. No checking is done to ensure you
590 * set this to a valid value!
591 * @see org.apache.poi.hslf.record.TextHeaderAtom
592 */
593 public void setRunType(int type) {
594 _headerAtom.setTextType(type);
595 }
596
597 /**
598 * Supply the SlideShow we belong to.
599 * Also passes it on to our child RichTextRuns
600 */
601 public void supplySlideShow(SlideShow ss) {
602 slideShow = ss;
603 if(_rtRuns != null) {
604 for(int i=0; i<_rtRuns.length; i++) {
605 _rtRuns[i].supplySlideShow(slideShow);
606 }
607 }
608 }
609
610 public void setSheet(Sheet sheet){
611 this.sheet = sheet;
612 }
613
614 public Sheet getSheet(){
615 return this.sheet;
616 }
617
618 /**
619 * @return Shape ID
620 */
621 protected int getShapeId(){
622 return shapeId;
623 }
624
625 /**
626 * @param id Shape ID
627 */
628 protected void setShapeId(int id){
629 shapeId = id;
630 }
631
632 /**
633 * @return 0-based index of the text run in the SLWT container
634 */
635 protected int getIndex(){
636 return slwtIndex;
637 }
638
639 /**
640 * @param id 0-based index of the text run in the SLWT container
641 */
642 protected void setIndex(int id){
643 slwtIndex = id;
644 }
645
646 /**
647 * Returns the array of all hyperlinks in this text run
648 *
649 * @return the array of all hyperlinks in this text run
650 * or <code>null</code> if not found.
651 */
652 public Hyperlink[] getHyperlinks(){
653 return Hyperlink.find(this);
654 }
655
656 /**
657 * Fetch RichTextRun at a given position
658 *
659 * @param pos 0-based index in the text
660 * @return RichTextRun or null if not found
661 */
662 public RichTextRun getRichTextRunAt(int pos){
663 for (int i = 0; i < _rtRuns.length; i++) {
664 int start = _rtRuns[i].getStartIndex();
665 int end = _rtRuns[i].getEndIndex();
666 if(pos >= start && pos < end) return _rtRuns[i];
667 }
668 return null;
669 }
670
671 public TextRulerAtom getTextRuler(){
672 for (int i = 0; i < _records.length; i++) {
673 if(_records[i] instanceof TextRulerAtom) return (TextRulerAtom)_records[i];
674 }
675 return null;
676
677 }
678
679 /**
680 * Returns a new string with line breaks converted into internal ppt representation
681 */
682 public String normalize(String s){
683 String ns = s.replaceAll("\\r?\\n", "\r");
684 return ns;
685 }
686
687 /**
688 * Returns records that make up this text run
689 *
690 * @return text run records
691 */
692 public Record[] getRecords(){
693 return _records;
694 }
695 }