001/* 002 * $Id: JBIG2SegmentReader.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf.codec; 045 046import java.io.ByteArrayOutputStream; 047import java.io.IOException; 048import java.util.Iterator; 049import java.util.SortedMap; 050import java.util.SortedSet; 051import java.util.TreeMap; 052import java.util.TreeSet; 053 054import com.itextpdf.text.error_messages.MessageLocalization; 055import com.itextpdf.text.pdf.RandomAccessFileOrArray; 056 057/** 058 * Class to read a JBIG2 file at a basic level: understand all the segments, 059 * understand what segments belong to which pages, how many pages there are, 060 * what the width and height of each page is, and global segments if there 061 * are any. Or: the minimum required to be able to take a normal sequential 062 * or random-access organized file, and be able to embed JBIG2 pages as images 063 * in a PDF. 064 * 065 * TODO: the indeterminate-segment-size value of dataLength, else? 066 * 067 * @since 2.1.5 068 */ 069 070public class JBIG2SegmentReader { 071 072 public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2. 073 074 public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3. 075 public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3. 076 public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3. 077 public static final int PATTERN_DICTIONARY = 16; //see 7.4.4. 078 public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5. 079 public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5. 080 public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5. 081 public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6. 082 public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6. 083 public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6. 084 public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7. 085 public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7. 086 public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7. 087 088 public static final int PAGE_INFORMATION = 48; //see 7.4.8. 089 public static final int END_OF_PAGE = 49; //see 7.4.9. 090 public static final int END_OF_STRIPE = 50; //see 7.4.10. 091 public static final int END_OF_FILE = 51; //see 7.4.11. 092 public static final int PROFILES = 52; //see 7.4.12. 093 public static final int TABLES = 53; //see 7.4.13. 094 public static final int EXTENSION = 62; //see 7.4.14. 095 096 private final SortedMap<Integer, JBIG2Segment> segments = new TreeMap<Integer, JBIG2Segment>(); 097 private final SortedMap<Integer, JBIG2Page> pages = new TreeMap<Integer, JBIG2Page>(); 098 private final SortedSet<JBIG2Segment> globals = new TreeSet<JBIG2Segment>(); 099 private RandomAccessFileOrArray ra; 100 private boolean sequential; 101 private boolean number_of_pages_known; 102 private int number_of_pages = -1; 103 private boolean read = false; 104 105 /** 106 * Inner class that holds information about a JBIG2 segment. 107 * @since 2.1.5 108 */ 109 public static class JBIG2Segment implements Comparable<JBIG2Segment> { 110 111 public final int segmentNumber; 112 public long dataLength = -1; 113 public int page = -1; 114 public int[] referredToSegmentNumbers = null; 115 public boolean[] segmentRetentionFlags = null; 116 public int type = -1; 117 public boolean deferredNonRetain = false; 118 public int countOfReferredToSegments = -1; 119 public byte[] data = null; 120 public byte[] headerData = null; 121 public boolean page_association_size = false; 122 public int page_association_offset = -1; 123 124 public JBIG2Segment(int segment_number) { 125 this.segmentNumber = segment_number; 126 } 127 128 public int compareTo(JBIG2Segment s) { 129 return this.segmentNumber - s.segmentNumber; 130 } 131 132 133 } 134 /** 135 * Inner class that holds information about a JBIG2 page. 136 * @since 2.1.5 137 */ 138 public static class JBIG2Page { 139 public final int page; 140 private final JBIG2SegmentReader sr; 141 private final SortedMap<Integer, JBIG2Segment> segs = new TreeMap<Integer, JBIG2Segment>(); 142 public int pageBitmapWidth = -1; 143 public int pageBitmapHeight = -1; 144 public JBIG2Page(int page, JBIG2SegmentReader sr) { 145 this.page = page; 146 this.sr = sr; 147 } 148 /** 149 * return as a single byte array the header-data for each segment in segment number 150 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization. 151 * if for_embedding, skip the segment types that are known to be not for acrobat. 152 * @param for_embedding 153 * @return a byte array 154 * @throws IOException 155 */ 156 public byte[] getData(boolean for_embedding) throws IOException { 157 ByteArrayOutputStream os = new ByteArrayOutputStream(); 158 for (Integer sn : segs.keySet()) { 159 JBIG2Segment s = segs.get(sn); 160 161 // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter 162 // D.3 Embedded organisation 163 if ( for_embedding && 164 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { 165 continue; 166 } 167 168 if ( for_embedding ) { 169 // change the page association to page 1 170 byte[] headerData_emb = copyByteArray(s.headerData); 171 if ( s.page_association_size ) { 172 headerData_emb[s.page_association_offset] = 0x0; 173 headerData_emb[s.page_association_offset+1] = 0x0; 174 headerData_emb[s.page_association_offset+2] = 0x0; 175 headerData_emb[s.page_association_offset+3] = 0x1; 176 } else { 177 headerData_emb[s.page_association_offset] = 0x1; 178 } 179 os.write(headerData_emb); 180 } else { 181 os.write(s.headerData); 182 } 183 os.write(s.data); 184 } 185 os.close(); 186 return os.toByteArray(); 187 } 188 public void addSegment(JBIG2Segment s) { 189 segs.put(Integer.valueOf(s.segmentNumber), s); 190 } 191 192 } 193 194 public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException { 195 this.ra = ra; 196 } 197 198 public static byte[] copyByteArray(byte[] b) { 199 byte[] bc = new byte[b.length]; 200 System.arraycopy(b, 0, bc, 0, b.length); 201 return bc; 202 } 203 204 public void read() throws IOException { 205 if ( this.read ) { 206 throw new IllegalStateException(MessageLocalization.getComposedMessage("already.attempted.a.read.on.this.jbig2.file")); 207 } 208 this.read = true; 209 210 readFileHeader(); 211 // Annex D 212 if ( this.sequential ) { 213 // D.1 214 do { 215 JBIG2Segment tmp = readHeader(); 216 readSegment(tmp); 217 segments.put(Integer.valueOf(tmp.segmentNumber), tmp); 218 } while ( this.ra.getFilePointer() < this.ra.length() ); 219 } else { 220 // D.2 221 JBIG2Segment tmp; 222 do { 223 tmp = readHeader(); 224 segments.put(Integer.valueOf(tmp.segmentNumber), tmp); 225 } while ( tmp.type != END_OF_FILE ); 226 Iterator<Integer> segs = segments.keySet().iterator(); 227 while ( segs.hasNext() ) { 228 readSegment(segments.get(segs.next())); 229 } 230 } 231 } 232 233 void readSegment(JBIG2Segment s) throws IOException { 234 int ptr = ra.getFilePointer(); 235 236 if ( s.dataLength == 0xffffffffl ) { 237 // TODO figure this bit out, 7.2.7 238 return; 239 } 240 241 byte[] data = new byte[(int)s.dataLength]; 242 ra.read(data); 243 s.data = data; 244 245 if ( s.type == PAGE_INFORMATION ) { 246 int last = ra.getFilePointer(); 247 ra.seek(ptr); 248 int page_bitmap_width = ra.readInt(); 249 int page_bitmap_height = ra.readInt(); 250 ra.seek(last); 251 JBIG2Page p = pages.get(Integer.valueOf(s.page)); 252 if ( p == null ) { 253 throw new IllegalStateException(MessageLocalization.getComposedMessage("referring.to.widht.height.of.page.we.havent.seen.yet.1", s.page)); 254 } 255 256 p.pageBitmapWidth = page_bitmap_width; 257 p.pageBitmapHeight = page_bitmap_height; 258 } 259 } 260 261 JBIG2Segment readHeader() throws IOException { 262 int ptr = ra.getFilePointer(); 263 // 7.2.1 264 int segment_number = ra.readInt(); 265 JBIG2Segment s = new JBIG2Segment(segment_number); 266 267 // 7.2.3 268 int segment_header_flags = ra.read(); 269 boolean deferred_non_retain = ( segment_header_flags & 0x80 ) == 0x80; 270 s.deferredNonRetain = deferred_non_retain; 271 boolean page_association_size = ( segment_header_flags & 0x40 ) == 0x40; 272 int segment_type = segment_header_flags & 0x3f; 273 s.type = segment_type; 274 275 //7.2.4 276 int referred_to_byte0 = ra.read(); 277 int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5; 278 int[] referred_to_segment_numbers = null; 279 boolean[] segment_retention_flags = null; 280 281 if ( count_of_referred_to_segments == 7 ) { 282 // at least five bytes 283 ra.seek(ra.getFilePointer() - 1); 284 count_of_referred_to_segments = ra.readInt() & 0x1fffffff; 285 segment_retention_flags = new boolean[count_of_referred_to_segments+1]; 286 int i = 0; 287 int referred_to_current_byte = 0; 288 do { 289 int j = i % 8; 290 if ( j == 0) { 291 referred_to_current_byte = ra.read(); 292 } 293 segment_retention_flags[i] = (0x1 << j & referred_to_current_byte) >> j == 0x1; 294 i++; 295 } while ( i <= count_of_referred_to_segments ); 296 297 } else if ( count_of_referred_to_segments <= 4 ) { 298 // only one byte 299 segment_retention_flags = new boolean[count_of_referred_to_segments+1]; 300 referred_to_byte0 &= 0x1f; 301 for ( int i = 0; i <= count_of_referred_to_segments; i++ ) { 302 segment_retention_flags[i] = (0x1 << i & referred_to_byte0) >> i == 0x1; 303 } 304 305 } else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) { 306 throw new IllegalStateException(MessageLocalization.getComposedMessage("count.of.referred.to.segments.had.bad.value.in.header.for.segment.1.starting.at.2", String.valueOf(segment_number), String.valueOf(ptr))); 307 } 308 s.segmentRetentionFlags = segment_retention_flags; 309 s.countOfReferredToSegments = count_of_referred_to_segments; 310 311 // 7.2.5 312 referred_to_segment_numbers = new int[count_of_referred_to_segments+1]; 313 for ( int i = 1; i <= count_of_referred_to_segments; i++ ) { 314 if ( segment_number <= 256 ) { 315 referred_to_segment_numbers[i] = ra.read(); 316 } else if ( segment_number <= 65536 ) { 317 referred_to_segment_numbers[i] = ra.readUnsignedShort(); 318 } else { 319 referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack 320 } 321 } 322 s.referredToSegmentNumbers = referred_to_segment_numbers; 323 324 // 7.2.6 325 int segment_page_association; 326 int page_association_offset = ra.getFilePointer() - ptr; 327 if ( page_association_size ) { 328 segment_page_association = ra.readInt(); 329 } else { 330 segment_page_association = ra.read(); 331 } 332 if ( segment_page_association < 0 ) { 333 throw new IllegalStateException(MessageLocalization.getComposedMessage("page.1.invalid.for.segment.2.starting.at.3", String.valueOf(segment_page_association), String.valueOf(segment_number), String.valueOf(ptr))); 334 } 335 s.page = segment_page_association; 336 // so we can change the page association at embedding time. 337 s.page_association_size = page_association_size; 338 s.page_association_offset = page_association_offset; 339 340 if ( segment_page_association > 0 && ! pages.containsKey(Integer.valueOf(segment_page_association)) ) { 341 pages.put(Integer.valueOf(segment_page_association), new JBIG2Page(segment_page_association, this)); 342 } 343 if ( segment_page_association > 0 ) { 344 pages.get(Integer.valueOf(segment_page_association)).addSegment(s); 345 } else { 346 globals.add(s); 347 } 348 349 // 7.2.7 350 long segment_data_length = ra.readUnsignedInt(); 351 // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments 352 s.dataLength = segment_data_length; 353 354 int end_ptr = ra.getFilePointer(); 355 ra.seek(ptr); 356 byte[] header_data = new byte[end_ptr - ptr]; 357 ra.read(header_data); 358 s.headerData = header_data; 359 360 return s; 361 } 362 363 void readFileHeader() throws IOException { 364 ra.seek(0); 365 byte[] idstring = new byte[8]; 366 ra.read(idstring); 367 368 byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A}; 369 370 for ( int i = 0; i < idstring.length; i++ ) { 371 if ( idstring[i] != refidstring[i] ) { 372 throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.idstring.not.good.at.byte.1", i)); 373 } 374 } 375 376 int fileheaderflags = ra.read(); 377 378 this.sequential = ( fileheaderflags & 0x1 ) == 0x1; 379 this.number_of_pages_known = ( fileheaderflags & 0x2) == 0x0; 380 381 if ( (fileheaderflags & 0xfc) != 0x0 ) { 382 throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.flags.bits.2.7.not.0")); 383 } 384 385 if ( this.number_of_pages_known ) { 386 this.number_of_pages = ra.readInt(); 387 } 388 } 389 390 public int numberOfPages() { 391 return pages.size(); 392 } 393 394 public int getPageHeight(int i) { 395 return pages.get(Integer.valueOf(i)).pageBitmapHeight; 396 } 397 398 public int getPageWidth(int i) { 399 return pages.get(Integer.valueOf(i)).pageBitmapWidth; 400 } 401 402 public JBIG2Page getPage(int page) { 403 return pages.get(Integer.valueOf(page)); 404 } 405 406 public byte[] getGlobal(boolean for_embedding) { 407 ByteArrayOutputStream os = new ByteArrayOutputStream(); 408 try { 409 for (Object element : globals) { 410 JBIG2Segment s = (JBIG2Segment)element; 411 if ( for_embedding && 412 ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) { 413 continue; 414 } 415 os.write(s.headerData); 416 os.write(s.data); 417 } 418 os.close(); 419 } catch (IOException e) { 420 e.printStackTrace(); 421 } 422 if ( os.size() <= 0 ) { 423 return null; 424 } 425 return os.toByteArray(); 426 } 427 428 @Override 429 public String toString() { 430 if ( this.read ) { 431 return "Jbig2SegmentReader: number of pages: " + this.numberOfPages(); 432 } else { 433 return "Jbig2SegmentReader in indeterminate state."; 434 } 435 } 436}