001/* 002 * $Id: PdfEncodings.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044package com.itextpdf.text.pdf; 045import java.io.BufferedReader; 046import java.io.IOException; 047import java.io.InputStream; 048import java.io.InputStreamReader; 049import java.io.UnsupportedEncodingException; 050import java.util.ArrayList; 051import java.util.HashMap; 052import java.util.StringTokenizer; 053 054import com.itextpdf.text.ExceptionConverter; 055import com.itextpdf.text.error_messages.MessageLocalization; 056import java.nio.CharBuffer; 057import java.nio.charset.Charset; 058import java.nio.charset.CharsetEncoder; 059import java.nio.charset.CodingErrorAction; 060/** Supports fast encodings for winansi and PDFDocEncoding. 061 * Supports conversions from CJK encodings to CID. 062 * Supports custom encodings. 063 * @author Paulo Soares 064 */ 065public class PdfEncodings { 066 protected static final int CIDNONE = 0; 067 protected static final int CIDRANGE = 1; 068 protected static final int CIDCHAR = 2; 069 070 static final char winansiByteToChar[] = { 071 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 072 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 073 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 074 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 075 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 076 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 077 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 078 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 079 8364, 65533, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 65533, 381, 65533, 080 65533, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 65533, 382, 376, 081 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 082 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 083 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 084 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 085 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 086 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}; 087 088 static final char pdfEncodingByteToChar[] = { 089 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 090 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 091 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 092 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 093 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 094 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 095 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 096 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 097 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, 098 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 65533, 099 0x20ac, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 100 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 101 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 102 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 103 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 104 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}; 105 106 static final IntHashtable winansi = new IntHashtable(); 107 108 static final IntHashtable pdfEncoding = new IntHashtable(); 109 110 static HashMap<String, ExtraEncoding> extraEncodings = new HashMap<String, ExtraEncoding>(); 111 112 static { 113 for (int k = 128; k < 161; ++k) { 114 char c = winansiByteToChar[k]; 115 if (c != 65533) 116 winansi.put(c, k); 117 } 118 119 for (int k = 128; k < 161; ++k) { 120 char c = pdfEncodingByteToChar[k]; 121 if (c != 65533) 122 pdfEncoding.put(c, k); 123 } 124 125 addExtraEncoding("Wingdings", new WingdingsConversion()); 126 addExtraEncoding("Symbol", new SymbolConversion(true)); 127 addExtraEncoding("ZapfDingbats", new SymbolConversion(false)); 128 addExtraEncoding("SymbolTT", new SymbolTTConversion()); 129 addExtraEncoding("Cp437", new Cp437Conversion()); 130 } 131 132 /** Converts a <CODE>String</CODE> to a </CODE>byte</CODE> array according 133 * to the font's encoding. 134 * @return an array of <CODE>byte</CODE> representing the conversion according to the font's encoding 135 * @param encoding the encoding 136 * @param text the <CODE>String</CODE> to be converted 137 */ 138 public static final byte[] convertToBytes(String text, String encoding) { 139 if (text == null) 140 return new byte[0]; 141 if (encoding == null || encoding.length() == 0) { 142 int len = text.length(); 143 byte b[] = new byte[len]; 144 for (int k = 0; k < len; ++k) 145 b[k] = (byte)text.charAt(k); 146 return b; 147 } 148 ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase()); 149 if (extra != null) { 150 byte b[] = extra.charToByte(text, encoding); 151 if (b != null) 152 return b; 153 } 154 IntHashtable hash = null; 155 if (encoding.equals(BaseFont.WINANSI)) 156 hash = winansi; 157 else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING)) 158 hash = pdfEncoding; 159 if (hash != null) { 160 char cc[] = text.toCharArray(); 161 int len = cc.length; 162 int ptr = 0; 163 byte b[] = new byte[len]; 164 int c = 0; 165 for (int k = 0; k < len; ++k) { 166 char char1 = cc[k]; 167 if (char1 < 128 || char1 > 160 && char1 <= 255) 168 c = char1; 169 else 170 c = hash.get(char1); 171 if (c != 0) 172 b[ptr++] = (byte)c; 173 } 174 if (ptr == len) 175 return b; 176 byte b2[] = new byte[ptr]; 177 System.arraycopy(b, 0, b2, 0, ptr); 178 return b2; 179 } 180 if (encoding.equals(PdfObject.TEXT_UNICODE)) { 181 // workaround for jdk 1.2.2 bug 182 char cc[] = text.toCharArray(); 183 int len = cc.length; 184 byte b[] = new byte[cc.length * 2 + 2]; 185 b[0] = -2; 186 b[1] = -1; 187 int bptr = 2; 188 for (int k = 0; k < len; ++k) { 189 char c = cc[k]; 190 b[bptr++] = (byte)(c >> 8); 191 b[bptr++] = (byte)(c & 0xff); 192 } 193 return b; 194 } 195 try { 196 Charset cc = Charset.forName(encoding); 197 CharsetEncoder ce = cc.newEncoder(); 198 ce.onUnmappableCharacter(CodingErrorAction.IGNORE); 199 CharBuffer cb = CharBuffer.wrap(text.toCharArray()); 200 java.nio.ByteBuffer bb = ce.encode(cb); 201 bb.rewind(); 202 int lim = bb.limit(); 203 byte[] br = new byte[lim]; 204 bb.get(br); 205 return br; 206 } 207 catch (IOException e) { 208 throw new ExceptionConverter(e); 209 } 210 } 211 212 /** Converts a <CODE>String</CODE> to a </CODE>byte</CODE> array according 213 * to the font's encoding. 214 * @return an array of <CODE>byte</CODE> representing the conversion according to the font's encoding 215 * @param encoding the encoding 216 * @param char1 the <CODE>char</CODE> to be converted 217 */ 218 public static final byte[] convertToBytes(char char1, String encoding) { 219 if (encoding == null || encoding.length() == 0) 220 return new byte[]{(byte)char1}; 221 ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase()); 222 if (extra != null) { 223 byte b[] = extra.charToByte(char1, encoding); 224 if (b != null) 225 return b; 226 } 227 IntHashtable hash = null; 228 if (encoding.equals(BaseFont.WINANSI)) 229 hash = winansi; 230 else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING)) 231 hash = pdfEncoding; 232 if (hash != null) { 233 int c = 0; 234 if (char1 < 128 || char1 > 160 && char1 <= 255) 235 c = char1; 236 else 237 c = hash.get(char1); 238 if (c != 0) 239 return new byte[]{(byte)c}; 240 else 241 return new byte[0]; 242 } 243 if (encoding.equals(PdfObject.TEXT_UNICODE)) { 244 // workaround for jdk 1.2.2 bug 245 byte b[] = new byte[4]; 246 b[0] = -2; 247 b[1] = -1; 248 b[2] = (byte)(char1 >> 8); 249 b[3] = (byte)(char1 & 0xff); 250 return b; 251 } 252 try { 253 Charset cc = Charset.forName(encoding); 254 CharsetEncoder ce = cc.newEncoder(); 255 ce.onUnmappableCharacter(CodingErrorAction.IGNORE); 256 CharBuffer cb = CharBuffer.wrap(new char[]{char1}); 257 java.nio.ByteBuffer bb = ce.encode(cb); 258 bb.rewind(); 259 int lim = bb.limit(); 260 byte[] br = new byte[lim]; 261 bb.get(br); 262 return br; 263 } 264 catch (IOException e) { 265 throw new ExceptionConverter(e); 266 } 267 } 268 269 /** Converts a </CODE>byte</CODE> array to a <CODE>String</CODE> according 270 * to the some encoding. 271 * @param bytes the bytes to convert 272 * @param encoding the encoding 273 * @return the converted <CODE>String</CODE> 274 */ 275 public static final String convertToString(byte bytes[], String encoding) { 276 if (bytes == null) 277 return PdfObject.NOTHING; 278 if (encoding == null || encoding.length() == 0) { 279 char c[] = new char[bytes.length]; 280 for (int k = 0; k < bytes.length; ++k) 281 c[k] = (char)(bytes[k] & 0xff); 282 return new String(c); 283 } 284 ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase()); 285 if (extra != null) { 286 String text = extra.byteToChar(bytes, encoding); 287 if (text != null) 288 return text; 289 } 290 char ch[] = null; 291 if (encoding.equals(BaseFont.WINANSI)) 292 ch = winansiByteToChar; 293 else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING)) 294 ch = pdfEncodingByteToChar; 295 if (ch != null) { 296 int len = bytes.length; 297 char c[] = new char[len]; 298 for (int k = 0; k < len; ++k) { 299 c[k] = ch[bytes[k] & 0xff]; 300 } 301 return new String(c); 302 } 303 try { 304 return new String(bytes, encoding); 305 } 306 catch (UnsupportedEncodingException e) { 307 throw new ExceptionConverter(e); 308 } 309 } 310 311 /** Checks is <CODE>text</CODE> only has PdfDocEncoding characters. 312 * @param text the <CODE>String</CODE> to test 313 * @return <CODE>true</CODE> if only PdfDocEncoding characters are present 314 */ 315 public static boolean isPdfDocEncoding(String text) { 316 if (text == null) 317 return true; 318 int len = text.length(); 319 for (int k = 0; k < len; ++k) { 320 char char1 = text.charAt(k); 321 if (char1 < 128 || char1 > 160 && char1 <= 255) 322 continue; 323 if (!pdfEncoding.containsKey(char1)) 324 return false; 325 } 326 return true; 327 } 328 329 static final HashMap<String, char[][]> cmaps = new HashMap<String, char[][]>(); 330 /** Assumes that '\\n' and '\\r\\n' are the newline sequences. It may not work for 331 * all CJK encodings. To be used with loadCmap(). 332 */ 333 public static final byte CRLF_CID_NEWLINE[][] = new byte[][]{{(byte)'\n'}, {(byte)'\r', (byte)'\n'}}; 334 335 /** Clears the CJK cmaps from the cache. If <CODE>name</CODE> is the 336 * empty string then all the cache is cleared. Calling this method 337 * has no consequences other than the need to reload the cmap 338 * if needed. 339 * @param name the name of the cmap to clear or all the cmaps if the empty string 340 */ 341 public static void clearCmap(String name) { 342 synchronized (cmaps) { 343 if (name.length() == 0) 344 cmaps.clear(); 345 else 346 cmaps.remove(name); 347 } 348 } 349 350 /** Loads a CJK cmap to the cache with the option of associating 351 * sequences to the newline. 352 * @param name the CJK cmap name 353 * @param newline the sequences to be replaced by a newline in the resulting CID. See <CODE>CRLF_CID_NEWLINE</CODE> 354 */ 355 public static void loadCmap(String name, byte newline[][]) { 356 try { 357 char planes[][] = null; 358 synchronized (cmaps) { 359 planes = cmaps.get(name); 360 } 361 if (planes == null) { 362 planes = readCmap(name, newline); 363 synchronized (cmaps) { 364 cmaps.put(name, planes); 365 } 366 } 367 } 368 catch (IOException e) { 369 throw new ExceptionConverter(e); 370 } 371 } 372 373 /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE> 374 * to a CID string. This is needed to reach some CJK characters 375 * that don't exist in 16 bit Unicode.</p> 376 * The font to use this result must use the encoding "Identity-H" 377 * or "Identity-V".</p> 378 * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/. 379 * @param name the CJK encoding name 380 * @param seq the <CODE>byte</CODE> array to be decoded 381 * @return the CID string 382 */ 383 public static String convertCmap(String name, byte seq[]) { 384 return convertCmap(name, seq, 0, seq.length); 385 } 386 387 /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE> 388 * to a CID string. This is needed to reach some CJK characters 389 * that don't exist in 16 bit Unicode.</p> 390 * The font to use this result must use the encoding "Identity-H" 391 * or "Identity-V".</p> 392 * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/. 393 * @param name the CJK encoding name 394 * @param start the start offset in the data 395 * @param length the number of bytes to convert 396 * @param seq the <CODE>byte</CODE> array to be decoded 397 * @return the CID string 398 */ 399 public static String convertCmap(String name, byte seq[], int start, int length) { 400 try { 401 char planes[][] = null; 402 synchronized (cmaps) { 403 planes = cmaps.get(name); 404 } 405 if (planes == null) { 406 planes = readCmap(name, (byte[][])null); 407 synchronized (cmaps) { 408 cmaps.put(name, planes); 409 } 410 } 411 return decodeSequence(seq, start, length, planes); 412 } 413 catch (IOException e) { 414 throw new ExceptionConverter(e); 415 } 416 } 417 418 static String decodeSequence(byte seq[], int start, int length, char planes[][]) { 419 StringBuffer buf = new StringBuffer(); 420 int end = start + length; 421 int currentPlane = 0; 422 for (int k = start; k < end; ++k) { 423 int one = seq[k] & 0xff; 424 char plane[] = planes[currentPlane]; 425 int cid = plane[one]; 426 if ((cid & 0x8000) == 0) { 427 buf.append((char)cid); 428 currentPlane = 0; 429 } 430 else 431 currentPlane = cid & 0x7fff; 432 } 433 return buf.toString(); 434 } 435 436 static char[][] readCmap(String name, byte newline[][]) throws IOException { 437 ArrayList<char[]> planes = new ArrayList<char[]>(); 438 planes.add(new char[256]); 439 readCmap(name, planes); 440 if (newline != null) { 441 for (int k = 0; k < newline.length; ++k) 442 encodeSequence(newline[k].length, newline[k], BaseFont.CID_NEWLINE, planes); 443 } 444 char ret[][] = new char[planes.size()][]; 445 return planes.toArray(ret); 446 } 447 448 static void readCmap(String name, ArrayList<char[]> planes) throws IOException { 449 String fullName = BaseFont.RESOURCE_PATH + "cmaps/" + name; 450 InputStream in = BaseFont.getResourceStream(fullName); 451 if (in == null) 452 throw new IOException(MessageLocalization.getComposedMessage("the.cmap.1.was.not.found", name)); 453 encodeStream(in, planes); 454 in.close(); 455 } 456 457 static void encodeStream(InputStream in, ArrayList<char[]> planes) throws IOException { 458 BufferedReader rd = new BufferedReader(new InputStreamReader(in, "iso-8859-1")); 459 String line = null; 460 int state = CIDNONE; 461 byte seqs[] = new byte[7]; 462 while ((line = rd.readLine()) != null) { 463 if (line.length() < 6) 464 continue; 465 switch (state) { 466 case CIDNONE: { 467 if (line.indexOf("begincidrange") >= 0) 468 state = CIDRANGE; 469 else if (line.indexOf("begincidchar") >= 0) 470 state = CIDCHAR; 471 else if (line.indexOf("usecmap") >= 0) { 472 StringTokenizer tk = new StringTokenizer(line); 473 String t = tk.nextToken(); 474 readCmap(t.substring(1), planes); 475 } 476 break; 477 } 478 case CIDRANGE: { 479 if (line.indexOf("endcidrange") >= 0) { 480 state = CIDNONE; 481 break; 482 } 483 StringTokenizer tk = new StringTokenizer(line); 484 String t = tk.nextToken(); 485 int size = t.length() / 2 - 1; 486 long start = Long.parseLong(t.substring(1, t.length() - 1), 16); 487 t = tk.nextToken(); 488 long end = Long.parseLong(t.substring(1, t.length() - 1), 16); 489 t = tk.nextToken(); 490 int cid = Integer.parseInt(t); 491 for (long k = start; k <= end; ++k) { 492 breakLong(k, size, seqs); 493 encodeSequence(size, seqs, (char)cid, planes); 494 ++cid; 495 } 496 break; 497 } 498 case CIDCHAR: { 499 if (line.indexOf("endcidchar") >= 0) { 500 state = CIDNONE; 501 break; 502 } 503 StringTokenizer tk = new StringTokenizer(line); 504 String t = tk.nextToken(); 505 int size = t.length() / 2 - 1; 506 long start = Long.parseLong(t.substring(1, t.length() - 1), 16); 507 t = tk.nextToken(); 508 int cid = Integer.parseInt(t); 509 breakLong(start, size, seqs); 510 encodeSequence(size, seqs, (char)cid, planes); 511 break; 512 } 513 } 514 } 515 } 516 517 static void breakLong(long n, int size, byte seqs[]) { 518 for (int k = 0; k < size; ++k) { 519 seqs[k] = (byte)(n >> (size - 1 - k) * 8); 520 } 521 } 522 523 static void encodeSequence(int size, byte seqs[], char cid, ArrayList<char[]> planes) { 524 --size; 525 int nextPlane = 0; 526 for (int idx = 0; idx < size; ++idx) { 527 char plane[] = planes.get(nextPlane); 528 int one = seqs[idx] & 0xff; 529 char c = plane[one]; 530 if (c != 0 && (c & 0x8000) == 0) 531 throw new RuntimeException(MessageLocalization.getComposedMessage("inconsistent.mapping")); 532 if (c == 0) { 533 planes.add(new char[256]); 534 c = (char)(planes.size() - 1 | 0x8000); 535 plane[one] = c; 536 } 537 nextPlane = c & 0x7fff; 538 } 539 char plane[] = planes.get(nextPlane); 540 int one = seqs[size] & 0xff; 541 char c = plane[one]; 542 if ((c & 0x8000) != 0) 543 throw new RuntimeException(MessageLocalization.getComposedMessage("inconsistent.mapping")); 544 plane[one] = cid; 545 } 546 547 /** Adds an extra encoding. 548 * @param name the name of the encoding. The encoding recognition is case insensitive 549 * @param enc the conversion class 550 */ 551 @SuppressWarnings("unchecked") 552 public static void addExtraEncoding(String name, ExtraEncoding enc) { 553 synchronized (extraEncodings) { // This serializes concurrent updates 554 HashMap<String, ExtraEncoding> newEncodings = (HashMap<String, ExtraEncoding>)extraEncodings.clone(); 555 newEncodings.put(name.toLowerCase(), enc); 556 extraEncodings = newEncodings; // This swap does not require synchronization with reader 557 } 558 } 559 560 private static class WingdingsConversion implements ExtraEncoding { 561 562 public byte[] charToByte(char char1, String encoding) { 563 if (char1 == ' ') 564 return new byte[]{(byte)char1}; 565 else if (char1 >= '\u2701' && char1 <= '\u27BE') { 566 byte v = table[char1 - 0x2700]; 567 if (v != 0) 568 return new byte[]{v}; 569 } 570 return new byte[0]; 571 } 572 573 public byte[] charToByte(String text, String encoding) { 574 char cc[] = text.toCharArray(); 575 byte b[] = new byte[cc.length]; 576 int ptr = 0; 577 int len = cc.length; 578 for (int k = 0; k < len; ++k) { 579 char c = cc[k]; 580 if (c == ' ') 581 b[ptr++] = (byte)c; 582 else if (c >= '\u2701' && c <= '\u27BE') { 583 byte v = table[c - 0x2700]; 584 if (v != 0) 585 b[ptr++] = v; 586 } 587 } 588 if (ptr == len) 589 return b; 590 byte b2[] = new byte[ptr]; 591 System.arraycopy(b, 0, b2, 0, ptr); 592 return b2; 593 } 594 595 public String byteToChar(byte[] b, String encoding) { 596 return null; 597 } 598 599 private final static byte table[] = { 600 0, 35, 34, 0, 0, 0, 41, 62, 81, 42, 601 0, 0, 65, 63, 0, 0, 0, 0, 0, -4, 602 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, 603 86, 0, 88, 89, 0, 0, 0, 0, 0, 0, 604 0, 0, -75, 0, 0, 0, 0, 0, -74, 0, 605 0, 0, -83, -81, -84, 0, 0, 0, 0, 0, 606 0, 0, 0, 124, 123, 0, 0, 0, 84, 0, 607 0, 0, 0, 0, 0, 0, 0, -90, 0, 0, 608 0, 113, 114, 0, 0, 0, 117, 0, 0, 0, 609 0, 0, 0, 125, 126, 0, 0, 0, 0, 0, 610 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 611 0, 0, 0, 0, 0, 0, 0, 0, -116, -115, 612 -114, -113, -112, -111, -110, -109, -108, -107, -127, -126, 613 -125, -124, -123, -122, -121, -120, -119, -118, -116, -115, 614 -114, -113, -112, -111, -110, -109, -108, -107, -24, 0, 615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 616 0, -24, -40, 0, 0, -60, -58, 0, 0, -16, 617 0, 0, 0, 0, 0, 0, 0, 0, 0, -36, 618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 619 0 620 }; 621 } 622 623 private static class Cp437Conversion implements ExtraEncoding { 624 private static IntHashtable c2b = new IntHashtable(); 625 626 public byte[] charToByte(String text, String encoding) { 627 char cc[] = text.toCharArray(); 628 byte b[] = new byte[cc.length]; 629 int ptr = 0; 630 int len = cc.length; 631 for (int k = 0; k < len; ++k) { 632 char c = cc[k]; 633 if (c < 128) 634 b[ptr++] = (byte)c; 635 else { 636 byte v = (byte)c2b.get(c); 637 if (v != 0) 638 b[ptr++] = v; 639 } 640 } 641 if (ptr == len) 642 return b; 643 byte b2[] = new byte[ptr]; 644 System.arraycopy(b, 0, b2, 0, ptr); 645 return b2; 646 } 647 648 public byte[] charToByte(char char1, String encoding) { 649 if (char1 < 128) 650 return new byte[]{(byte)char1}; 651 else { 652 byte v = (byte)c2b.get(char1); 653 if (v != 0) 654 return new byte[]{v}; 655 else 656 return new byte[0]; 657 } 658 } 659 660 public String byteToChar(byte[] b, String encoding) { 661 int len = b.length; 662 char cc[] = new char[len]; 663 int ptr = 0; 664 for (int k = 0; k < len; ++k) { 665 int c = b[k] & 0xff; 666 if (c < ' ') 667 continue; 668 if (c < 128) 669 cc[ptr++] = (char)c; 670 else { 671 char v = table[c - 128]; 672 cc[ptr++] = v; 673 } 674 } 675 return new String(cc, 0, ptr); 676 } 677 678 private final static char table[] = { 679 '\u00C7', '\u00FC', '\u00E9', '\u00E2', '\u00E4', '\u00E0', '\u00E5', '\u00E7', '\u00EA', '\u00EB', '\u00E8', '\u00EF', '\u00EE', '\u00EC', '\u00C4', '\u00C5', 680 '\u00C9', '\u00E6', '\u00C6', '\u00F4', '\u00F6', '\u00F2', '\u00FB', '\u00F9', '\u00FF', '\u00D6', '\u00DC', '\u00A2', '\u00A3', '\u00A5', '\u20A7', '\u0192', 681 '\u00E1', '\u00ED', '\u00F3', '\u00FA', '\u00F1', '\u00D1', '\u00AA', '\u00BA', '\u00BF', '\u2310', '\u00AC', '\u00BD', '\u00BC', '\u00A1', '\u00AB', '\u00BB', 682 '\u2591', '\u2592', '\u2593', '\u2502', '\u2524', '\u2561', '\u2562', '\u2556', '\u2555', '\u2563', '\u2551', '\u2557', '\u255D', '\u255C', '\u255B', '\u2510', 683 '\u2514', '\u2534', '\u252C', '\u251C', '\u2500', '\u253C', '\u255E', '\u255F', '\u255A', '\u2554', '\u2569', '\u2566', '\u2560', '\u2550', '\u256C', '\u2567', 684 '\u2568', '\u2564', '\u2565', '\u2559', '\u2558', '\u2552', '\u2553', '\u256B', '\u256A', '\u2518', '\u250C', '\u2588', '\u2584', '\u258C', '\u2590', '\u2580', 685 '\u03B1', '\u00DF', '\u0393', '\u03C0', '\u03A3', '\u03C3', '\u00B5', '\u03C4', '\u03A6', '\u0398', '\u03A9', '\u03B4', '\u221E', '\u03C6', '\u03B5', '\u2229', 686 '\u2261', '\u00B1', '\u2265', '\u2264', '\u2320', '\u2321', '\u00F7', '\u2248', '\u00B0', '\u2219', '\u00B7', '\u221A', '\u207F', '\u00B2', '\u25A0', '\u00A0' 687 }; 688 689 static { 690 for (int k = 0; k < table.length; ++k) 691 c2b.put(table[k], k + 128); 692 } 693 } 694 695 private static class SymbolConversion implements ExtraEncoding { 696 697 private static final IntHashtable t1 = new IntHashtable(); 698 private static final IntHashtable t2 = new IntHashtable(); 699 private IntHashtable translation; 700 701 SymbolConversion(boolean symbol) { 702 if (symbol) 703 translation = t1; 704 else 705 translation = t2; 706 } 707 708 public byte[] charToByte(String text, String encoding) { 709 char cc[] = text.toCharArray(); 710 byte b[] = new byte[cc.length]; 711 int ptr = 0; 712 int len = cc.length; 713 for (int k = 0; k < len; ++k) { 714 char c = cc[k]; 715 byte v = (byte)translation.get(c); 716 if (v != 0) 717 b[ptr++] = v; 718 } 719 if (ptr == len) 720 return b; 721 byte b2[] = new byte[ptr]; 722 System.arraycopy(b, 0, b2, 0, ptr); 723 return b2; 724 } 725 726 public byte[] charToByte(char char1, String encoding) { 727 byte v = (byte)translation.get(char1); 728 if (v != 0) 729 return new byte[]{v}; 730 else 731 return new byte[0]; 732 } 733 734 public String byteToChar(byte[] b, String encoding) { 735 return null; 736 } 737 738 private final static char table1[] = { 739 ' ','!','\u2200','#','\u2203','%','&','\u220b','(',')','*','+',',','-','.','/', 740 '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', 741 '\u2245','\u0391','\u0392','\u03a7','\u0394','\u0395','\u03a6','\u0393','\u0397','\u0399','\u03d1','\u039a','\u039b','\u039c','\u039d','\u039f', 742 '\u03a0','\u0398','\u03a1','\u03a3','\u03a4','\u03a5','\u03c2','\u03a9','\u039e','\u03a8','\u0396','[','\u2234',']','\u22a5','_', 743 '\u0305','\u03b1','\u03b2','\u03c7','\u03b4','\u03b5','\u03d5','\u03b3','\u03b7','\u03b9','\u03c6','\u03ba','\u03bb','\u03bc','\u03bd','\u03bf', 744 '\u03c0','\u03b8','\u03c1','\u03c3','\u03c4','\u03c5','\u03d6','\u03c9','\u03be','\u03c8','\u03b6','{','|','}','~','\0', 745 '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0', 746 '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0', 747 '\u20ac','\u03d2','\u2032','\u2264','\u2044','\u221e','\u0192','\u2663','\u2666','\u2665','\u2660','\u2194','\u2190','\u2191','\u2192','\u2193', 748 '\u00b0','\u00b1','\u2033','\u2265','\u00d7','\u221d','\u2202','\u2022','\u00f7','\u2260','\u2261','\u2248','\u2026','\u2502','\u2500','\u21b5', 749 '\u2135','\u2111','\u211c','\u2118','\u2297','\u2295','\u2205','\u2229','\u222a','\u2283','\u2287','\u2284','\u2282','\u2286','\u2208','\u2209', 750 '\u2220','\u2207','\u00ae','\u00a9','\u2122','\u220f','\u221a','\u2022','\u00ac','\u2227','\u2228','\u21d4','\u21d0','\u21d1','\u21d2','\u21d3', 751 '\u25ca','\u2329','\0','\0','\0','\u2211','\u239b','\u239c','\u239d','\u23a1','\u23a2','\u23a3','\u23a7','\u23a8','\u23a9','\u23aa', 752 '\0','\u232a','\u222b','\u2320','\u23ae','\u2321','\u239e','\u239f','\u23a0','\u23a4','\u23a5','\u23a6','\u23ab','\u23ac','\u23ad','\0' 753 }; 754 755 private final static char table2[] = { 756 '\u0020','\u2701','\u2702','\u2703','\u2704','\u260e','\u2706','\u2707','\u2708','\u2709','\u261b','\u261e','\u270C','\u270D','\u270E','\u270F', 757 '\u2710','\u2711','\u2712','\u2713','\u2714','\u2715','\u2716','\u2717','\u2718','\u2719','\u271A','\u271B','\u271C','\u271D','\u271E','\u271F', 758 '\u2720','\u2721','\u2722','\u2723','\u2724','\u2725','\u2726','\u2727','\u2605','\u2729','\u272A','\u272B','\u272C','\u272D','\u272E','\u272F', 759 '\u2730','\u2731','\u2732','\u2733','\u2734','\u2735','\u2736','\u2737','\u2738','\u2739','\u273A','\u273B','\u273C','\u273D','\u273E','\u273F', 760 '\u2740','\u2741','\u2742','\u2743','\u2744','\u2745','\u2746','\u2747','\u2748','\u2749','\u274A','\u274B','\u25cf','\u274D','\u25a0','\u274F', 761 '\u2750','\u2751','\u2752','\u25b2','\u25bc','\u25c6','\u2756','\u25d7','\u2758','\u2759','\u275A','\u275B','\u275C','\u275D','\u275E','\u0000', 762 '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0', 763 '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0', 764 '\u0000','\u2761','\u2762','\u2763','\u2764','\u2765','\u2766','\u2767','\u2663','\u2666','\u2665','\u2660','\u2460','\u2461','\u2462','\u2463', 765 '\u2464','\u2465','\u2466','\u2467','\u2468','\u2469','\u2776','\u2777','\u2778','\u2779','\u277A','\u277B','\u277C','\u277D','\u277E','\u277F', 766 '\u2780','\u2781','\u2782','\u2783','\u2784','\u2785','\u2786','\u2787','\u2788','\u2789','\u278A','\u278B','\u278C','\u278D','\u278E','\u278F', 767 '\u2790','\u2791','\u2792','\u2793','\u2794','\u2192','\u2194','\u2195','\u2798','\u2799','\u279A','\u279B','\u279C','\u279D','\u279E','\u279F', 768 '\u27A0','\u27A1','\u27A2','\u27A3','\u27A4','\u27A5','\u27A6','\u27A7','\u27A8','\u27A9','\u27AA','\u27AB','\u27AC','\u27AD','\u27AE','\u27AF', 769 '\u0000','\u27B1','\u27B2','\u27B3','\u27B4','\u27B5','\u27B6','\u27B7','\u27B8','\u27B9','\u27BA','\u27BB','\u27BC','\u27BD','\u27BE','\u0000' 770 }; 771 772 static { 773 for (int k = 0; k < table1.length; ++k) { 774 int v = table1[k]; 775 if (v != 0) 776 t1.put(v, k + 32); 777 } 778 for (int k = 0; k < table2.length; ++k) { 779 int v = table2[k]; 780 if (v != 0) 781 t2.put(v, k + 32); 782 } 783 } 784 } 785 786 private static class SymbolTTConversion implements ExtraEncoding { 787 788 public byte[] charToByte(char char1, String encoding) { 789 if ((char1 & 0xff00) == 0 || (char1 & 0xff00) == 0xf000) 790 return new byte[]{(byte)char1}; 791 else 792 return new byte[0]; 793 } 794 795 public byte[] charToByte(String text, String encoding) { 796 char ch[] = text.toCharArray(); 797 byte b[] = new byte[ch.length]; 798 int ptr = 0; 799 int len = ch.length; 800 for (int k = 0; k < len; ++k) { 801 char c = ch[k]; 802 if ((c & 0xff00) == 0 || (c & 0xff00) == 0xf000) 803 b[ptr++] = (byte)c; 804 } 805 if (ptr == len) 806 return b; 807 byte b2[] = new byte[ptr]; 808 System.arraycopy(b, 0, b2, 0, ptr); 809 return b2; 810 } 811 812 public String byteToChar(byte[] b, String encoding) { 813 return null; 814 } 815 816 } 817}