Source code

001/*
002 * $Id: PdfEncodings.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf;
045import java.io.BufferedReader;
046import java.io.IOException;
047import java.io.InputStream;
048import java.io.InputStreamReader;
049import java.io.UnsupportedEncodingException;
050import java.util.ArrayList;
051import java.util.HashMap;
052import java.util.StringTokenizer;
053
054import com.itextpdf.text.ExceptionConverter;
055import com.itextpdf.text.error_messages.MessageLocalization;
056import java.nio.CharBuffer;
057import java.nio.charset.Charset;
058import java.nio.charset.CharsetEncoder;
059import java.nio.charset.CodingErrorAction;
060/** Supports fast encodings for winansi and PDFDocEncoding.
061 * Supports conversions from CJK encodings to CID.
062 * Supports custom encodings.
063 * @author Paulo Soares
064 */
065public class PdfEncodings {
066    protected static final int CIDNONE = 0;
067    protected static final int CIDRANGE = 1;
068    protected static final int CIDCHAR = 2;
069
070    static final char winansiByteToChar[] = {
071        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
072        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
073        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
074        48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
075        64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
076        80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
077        96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
078        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
079        8364, 65533, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 65533, 381, 65533,
080        65533, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 65533, 382, 376,
081        160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
082        176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
083        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
084        208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
085        224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
086        240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
087
088    static final char pdfEncodingByteToChar[] = {
089        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
090        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
091        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
092        48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
093        64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
094        80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
095        96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
096        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
097        0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
098        0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 65533,
099        0x20ac, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
100        176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
101        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
102        208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
103        224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
104        240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
105
106    static final IntHashtable winansi = new IntHashtable();
107
108    static final IntHashtable pdfEncoding = new IntHashtable();
109
110    static HashMap<String, ExtraEncoding> extraEncodings = new HashMap<String, ExtraEncoding>();
111
112    static {
113        for (int k = 128; k < 161; ++k) {
114            char c = winansiByteToChar[k];
115            if (c != 65533)
116                winansi.put(c, k);
117        }
118
119        for (int k = 128; k < 161; ++k) {
120            char c = pdfEncodingByteToChar[k];
121            if (c != 65533)
122                pdfEncoding.put(c, k);
123        }
124
125        addExtraEncoding("Wingdings", new WingdingsConversion());
126        addExtraEncoding("Symbol", new SymbolConversion(true));
127        addExtraEncoding("ZapfDingbats", new SymbolConversion(false));
128        addExtraEncoding("SymbolTT", new SymbolTTConversion());
129        addExtraEncoding("Cp437", new Cp437Conversion());
130    }
131
132    /** Converts a <CODE>String</CODE> to a </CODE>byte</CODE> array according
133     * to the font's encoding.
134     * @return an array of <CODE>byte</CODE> representing the conversion according to the font's encoding
135     * @param encoding the encoding
136     * @param text the <CODE>String</CODE> to be converted
137     */
138    public static final byte[] convertToBytes(String text, String encoding) {
139        if (text == null)
140            return new byte[0];
141        if (encoding == null || encoding.length() == 0) {
142            int len = text.length();
143            byte b[] = new byte[len];
144            for (int k = 0; k < len; ++k)
145                b[k] = (byte)text.charAt(k);
146            return b;
147        }
148        ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase());
149        if (extra != null) {
150            byte b[] = extra.charToByte(text, encoding);
151            if (b != null)
152                return b;
153        }
154        IntHashtable hash = null;
155        if (encoding.equals(BaseFont.WINANSI))
156            hash = winansi;
157        else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING))
158            hash = pdfEncoding;
159        if (hash != null) {
160            char cc[] = text.toCharArray();
161            int len = cc.length;
162            int ptr = 0;
163            byte b[] = new byte[len];
164            int c = 0;
165            for (int k = 0; k < len; ++k) {
166                char char1 = cc[k];
167                if (char1 < 128 || char1 > 160 && char1 <= 255)
168                    c = char1;
169                else
170                    c = hash.get(char1);
171                if (c != 0)
172                    b[ptr++] = (byte)c;
173            }
174            if (ptr == len)
175                return b;
176            byte b2[] = new byte[ptr];
177            System.arraycopy(b, 0, b2, 0, ptr);
178            return b2;
179        }
180        if (encoding.equals(PdfObject.TEXT_UNICODE)) {
181            // workaround for jdk 1.2.2 bug
182            char cc[] = text.toCharArray();
183            int len = cc.length;
184            byte b[] = new byte[cc.length * 2 + 2];
185            b[0] = -2;
186            b[1] = -1;
187            int bptr = 2;
188            for (int k = 0; k < len; ++k) {
189                char c = cc[k];
190                b[bptr++] = (byte)(c >> 8);
191                b[bptr++] = (byte)(c & 0xff);
192            }
193            return b;
194        }
195        try {
196            Charset cc = Charset.forName(encoding);
197            CharsetEncoder ce = cc.newEncoder();
198            ce.onUnmappableCharacter(CodingErrorAction.IGNORE);
199            CharBuffer cb = CharBuffer.wrap(text.toCharArray());
200            java.nio.ByteBuffer bb = ce.encode(cb);
201            bb.rewind();
202            int lim = bb.limit();
203            byte[] br = new byte[lim];
204            bb.get(br);
205            return br;
206        }
207        catch (IOException e) {
208            throw new ExceptionConverter(e);
209        }
210    }
211
212    /** Converts a <CODE>String</CODE> to a </CODE>byte</CODE> array according
213     * to the font's encoding.
214     * @return an array of <CODE>byte</CODE> representing the conversion according to the font's encoding
215     * @param encoding the encoding
216     * @param char1 the <CODE>char</CODE> to be converted
217     */
218    public static final byte[] convertToBytes(char char1, String encoding) {
219        if (encoding == null || encoding.length() == 0)
220            return new byte[]{(byte)char1};
221        ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase());
222        if (extra != null) {
223            byte b[] = extra.charToByte(char1, encoding);
224            if (b != null)
225                return b;
226        }
227        IntHashtable hash = null;
228        if (encoding.equals(BaseFont.WINANSI))
229            hash = winansi;
230        else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING))
231            hash = pdfEncoding;
232        if (hash != null) {
233            int c = 0;
234            if (char1 < 128 || char1 > 160 && char1 <= 255)
235                c = char1;
236            else
237                c = hash.get(char1);
238            if (c != 0)
239                return new byte[]{(byte)c};
240            else
241                return new byte[0];
242        }
243        if (encoding.equals(PdfObject.TEXT_UNICODE)) {
244            // workaround for jdk 1.2.2 bug
245            byte b[] = new byte[4];
246            b[0] = -2;
247            b[1] = -1;
248            b[2] = (byte)(char1 >> 8);
249            b[3] = (byte)(char1 & 0xff);
250            return b;
251        }
252        try {
253            Charset cc = Charset.forName(encoding);
254            CharsetEncoder ce = cc.newEncoder();
255            ce.onUnmappableCharacter(CodingErrorAction.IGNORE);
256            CharBuffer cb = CharBuffer.wrap(new char[]{char1});
257            java.nio.ByteBuffer bb = ce.encode(cb);
258            bb.rewind();
259            int lim = bb.limit();
260            byte[] br = new byte[lim];
261            bb.get(br);
262            return br;
263        }
264        catch (IOException e) {
265            throw new ExceptionConverter(e);
266        }
267    }
268
269    /** Converts a </CODE>byte</CODE> array to a <CODE>String</CODE> according
270     * to the some encoding.
271     * @param bytes the bytes to convert
272     * @param encoding the encoding
273     * @return the converted <CODE>String</CODE>
274     */
275    public static final String convertToString(byte bytes[], String encoding) {
276        if (bytes == null)
277            return PdfObject.NOTHING;
278        if (encoding == null || encoding.length() == 0) {
279            char c[] = new char[bytes.length];
280            for (int k = 0; k < bytes.length; ++k)
281                c[k] = (char)(bytes[k] & 0xff);
282            return new String(c);
283        }
284        ExtraEncoding extra = extraEncodings.get(encoding.toLowerCase());
285        if (extra != null) {
286            String text = extra.byteToChar(bytes, encoding);
287            if (text != null)
288                return text;
289        }
290        char ch[] = null;
291        if (encoding.equals(BaseFont.WINANSI))
292            ch = winansiByteToChar;
293        else if (encoding.equals(PdfObject.TEXT_PDFDOCENCODING))
294            ch = pdfEncodingByteToChar;
295        if (ch != null) {
296            int len = bytes.length;
297            char c[] = new char[len];
298            for (int k = 0; k < len; ++k) {
299                c[k] = ch[bytes[k] & 0xff];
300            }
301            return new String(c);
302        }
303        try {
304            return new String(bytes, encoding);
305        }
306        catch (UnsupportedEncodingException e) {
307            throw new ExceptionConverter(e);
308        }
309    }
310
311    /** Checks is <CODE>text</CODE> only has PdfDocEncoding characters.
312     * @param text the <CODE>String</CODE> to test
313     * @return <CODE>true</CODE> if only PdfDocEncoding characters are present
314     */
315    public static boolean isPdfDocEncoding(String text) {
316        if (text == null)
317            return true;
318        int len = text.length();
319        for (int k = 0; k < len; ++k) {
320            char char1 = text.charAt(k);
321            if (char1 < 128 || char1 > 160 && char1 <= 255)
322                continue;
323            if (!pdfEncoding.containsKey(char1))
324                return false;
325        }
326        return true;
327    }
328
329    static final HashMap<String, char[][]> cmaps = new HashMap<String, char[][]>();
330    /** Assumes that '\\n' and '\\r\\n' are the newline sequences. It may not work for
331     * all CJK encodings. To be used with loadCmap().
332     */
333    public static final byte CRLF_CID_NEWLINE[][] = new byte[][]{{(byte)'\n'}, {(byte)'\r', (byte)'\n'}};
334
335    /** Clears the CJK cmaps from the cache. If <CODE>name</CODE> is the
336     * empty string then all the cache is cleared. Calling this method
337     * has no consequences other than the need to reload the cmap
338     * if needed.
339     * @param name the name of the cmap to clear or all the cmaps if the empty string
340     */
341    public static void clearCmap(String name) {
342        synchronized (cmaps) {
343            if (name.length() == 0)
344                cmaps.clear();
345            else
346                cmaps.remove(name);
347        }
348    }
349
350    /** Loads a CJK cmap to the cache with the option of associating
351     * sequences to the newline.
352     * @param name the CJK cmap name
353     * @param newline the sequences to be replaced by a newline in the resulting CID. See <CODE>CRLF_CID_NEWLINE</CODE>
354     */
355    public static void loadCmap(String name, byte newline[][]) {
356        try {
357            char planes[][] = null;
358            synchronized (cmaps) {
359                planes = cmaps.get(name);
360            }
361            if (planes == null) {
362                planes = readCmap(name, newline);
363                synchronized (cmaps) {
364                    cmaps.put(name, planes);
365                }
366            }
367        }
368        catch (IOException e) {
369            throw new ExceptionConverter(e);
370        }
371    }
372
373    /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE>
374     * to a CID string. This is needed to reach some CJK characters
375     * that don't exist in 16 bit Unicode.</p>
376     * The font to use this result must use the encoding "Identity-H"
377     * or "Identity-V".</p>
378     * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/.
379     * @param name the CJK encoding name
380     * @param seq the <CODE>byte</CODE> array to be decoded
381     * @return the CID string
382     */
383    public static String convertCmap(String name, byte seq[]) {
384        return convertCmap(name, seq, 0, seq.length);
385    }
386
387    /** Converts a <CODE>byte</CODE> array encoded as <CODE>name</CODE>
388     * to a CID string. This is needed to reach some CJK characters
389     * that don't exist in 16 bit Unicode.</p>
390     * The font to use this result must use the encoding "Identity-H"
391     * or "Identity-V".</p>
392     * See ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/adobe/.
393     * @param name the CJK encoding name
394     * @param start the start offset in the data
395     * @param length the number of bytes to convert
396     * @param seq the <CODE>byte</CODE> array to be decoded
397     * @return the CID string
398     */
399    public static String convertCmap(String name, byte seq[], int start, int length) {
400        try {
401            char planes[][] = null;
402            synchronized (cmaps) {
403                planes = cmaps.get(name);
404            }
405            if (planes == null) {
406                planes = readCmap(name, (byte[][])null);
407                synchronized (cmaps) {
408                    cmaps.put(name, planes);
409                }
410            }
411            return decodeSequence(seq, start, length, planes);
412        }
413        catch (IOException e) {
414            throw new ExceptionConverter(e);
415        }
416    }
417
418    static String decodeSequence(byte seq[], int start, int length, char planes[][]) {
419        StringBuffer buf = new StringBuffer();
420        int end = start + length;
421        int currentPlane = 0;
422        for (int k = start; k < end; ++k) {
423            int one = seq[k] & 0xff;
424            char plane[] = planes[currentPlane];
425            int cid = plane[one];
426            if ((cid & 0x8000) == 0) {
427                buf.append((char)cid);
428                currentPlane = 0;
429            }
430            else
431                currentPlane = cid & 0x7fff;
432        }
433        return buf.toString();
434    }
435
436    static char[][] readCmap(String name, byte newline[][]) throws IOException {
437        ArrayList<char[]> planes = new ArrayList<char[]>();
438        planes.add(new char[256]);
439        readCmap(name, planes);
440        if (newline != null) {
441            for (int k = 0; k < newline.length; ++k)
442                encodeSequence(newline[k].length, newline[k], BaseFont.CID_NEWLINE, planes);
443        }
444        char ret[][] = new char[planes.size()][];
445        return planes.toArray(ret);
446    }
447
448    static void readCmap(String name, ArrayList<char[]> planes) throws IOException {
449        String fullName = BaseFont.RESOURCE_PATH + "cmaps/" + name;
450        InputStream in = BaseFont.getResourceStream(fullName);
451        if (in == null)
452            throw new IOException(MessageLocalization.getComposedMessage("the.cmap.1.was.not.found", name));
453        encodeStream(in, planes);
454        in.close();
455    }
456
457    static void encodeStream(InputStream in, ArrayList<char[]> planes) throws IOException {
458        BufferedReader rd = new BufferedReader(new InputStreamReader(in, "iso-8859-1"));
459        String line = null;
460        int state = CIDNONE;
461        byte seqs[] = new byte[7];
462        while ((line = rd.readLine()) != null) {
463            if (line.length() < 6)
464                continue;
465            switch (state) {
466                case CIDNONE: {
467                    if (line.indexOf("begincidrange") >= 0)
468                        state = CIDRANGE;
469                    else if (line.indexOf("begincidchar") >= 0)
470                        state = CIDCHAR;
471                    else if (line.indexOf("usecmap") >= 0) {
472                        StringTokenizer tk = new StringTokenizer(line);
473                        String t = tk.nextToken();
474                        readCmap(t.substring(1), planes);
475                    }
476                    break;
477                }
478                case CIDRANGE: {
479                    if (line.indexOf("endcidrange") >= 0) {
480                        state = CIDNONE;
481                        break;
482                    }
483                    StringTokenizer tk = new StringTokenizer(line);
484                    String t = tk.nextToken();
485                    int size = t.length() / 2 - 1;
486                    long start = Long.parseLong(t.substring(1, t.length() - 1), 16);
487                    t = tk.nextToken();
488                    long end = Long.parseLong(t.substring(1, t.length() - 1), 16);
489                    t = tk.nextToken();
490                    int cid = Integer.parseInt(t);
491                    for (long k = start; k <= end; ++k) {
492                        breakLong(k, size, seqs);
493                        encodeSequence(size, seqs, (char)cid, planes);
494                        ++cid;
495                    }
496                    break;
497                }
498                case CIDCHAR: {
499                    if (line.indexOf("endcidchar") >= 0) {
500                        state = CIDNONE;
501                        break;
502                    }
503                    StringTokenizer tk = new StringTokenizer(line);
504                    String t = tk.nextToken();
505                    int size = t.length() / 2 - 1;
506                    long start = Long.parseLong(t.substring(1, t.length() - 1), 16);
507                    t = tk.nextToken();
508                    int cid = Integer.parseInt(t);
509                    breakLong(start, size, seqs);
510                    encodeSequence(size, seqs, (char)cid, planes);
511                    break;
512                }
513            }
514        }
515    }
516
517    static void breakLong(long n, int size, byte seqs[]) {
518        for (int k = 0; k < size; ++k) {
519            seqs[k] = (byte)(n >> (size - 1 - k) * 8);
520        }
521    }
522
523    static void encodeSequence(int size, byte seqs[], char cid, ArrayList<char[]> planes) {
524        --size;
525        int nextPlane = 0;
526        for (int idx = 0; idx < size; ++idx) {
527            char plane[] = planes.get(nextPlane);
528            int one = seqs[idx] & 0xff;
529            char c = plane[one];
530            if (c != 0 && (c & 0x8000) == 0)
531                throw new RuntimeException(MessageLocalization.getComposedMessage("inconsistent.mapping"));
532            if (c == 0) {
533                planes.add(new char[256]);
534                c = (char)(planes.size() - 1 | 0x8000);
535                plane[one] = c;
536            }
537            nextPlane = c & 0x7fff;
538        }
539        char plane[] = planes.get(nextPlane);
540        int one = seqs[size] & 0xff;
541        char c = plane[one];
542        if ((c & 0x8000) != 0)
543            throw new RuntimeException(MessageLocalization.getComposedMessage("inconsistent.mapping"));
544        plane[one] = cid;
545    }
546
547    /** Adds an extra encoding.
548     * @param name the name of the encoding. The encoding recognition is case insensitive
549     * @param enc the conversion class
550     */
551    @SuppressWarnings("unchecked")
552    public static void addExtraEncoding(String name, ExtraEncoding enc) {
553        synchronized (extraEncodings) { // This serializes concurrent updates
554            HashMap<String, ExtraEncoding> newEncodings = (HashMap<String, ExtraEncoding>)extraEncodings.clone();
555            newEncodings.put(name.toLowerCase(), enc);
556            extraEncodings = newEncodings;  // This swap does not require synchronization with reader
557        }
558    }
559
560    private static class WingdingsConversion implements ExtraEncoding {
561
562        public byte[] charToByte(char char1, String encoding) {
563            if (char1 == ' ')
564                return new byte[]{(byte)char1};
565            else if (char1 >= '\u2701' && char1 <= '\u27BE') {
566                byte v = table[char1 - 0x2700];
567                if (v != 0)
568                    return new byte[]{v};
569            }
570            return new byte[0];
571        }
572
573        public byte[] charToByte(String text, String encoding) {
574            char cc[] = text.toCharArray();
575            byte b[] = new byte[cc.length];
576            int ptr = 0;
577            int len = cc.length;
578            for (int k = 0; k < len; ++k) {
579                char c = cc[k];
580                if (c == ' ')
581                    b[ptr++] = (byte)c;
582                else if (c >= '\u2701' && c <= '\u27BE') {
583                    byte v = table[c - 0x2700];
584                    if (v != 0)
585                        b[ptr++] = v;
586                }
587            }
588            if (ptr == len)
589                return b;
590            byte b2[] = new byte[ptr];
591            System.arraycopy(b, 0, b2, 0, ptr);
592            return b2;
593        }
594
595        public String byteToChar(byte[] b, String encoding) {
596            return null;
597        }
598
599        private final static byte table[] = {
600            0, 35, 34, 0, 0, 0, 41, 62, 81, 42,
601            0, 0, 65, 63, 0, 0, 0, 0, 0, -4,
602            0, 0, 0, -5, 0, 0, 0, 0, 0, 0,
603            86, 0, 88, 89, 0, 0, 0, 0, 0, 0,
604            0, 0, -75, 0, 0, 0, 0, 0, -74, 0,
605            0, 0, -83, -81, -84, 0, 0, 0, 0, 0,
606            0, 0, 0, 124, 123, 0, 0, 0, 84, 0,
607            0, 0, 0, 0, 0, 0, 0, -90, 0, 0,
608            0, 113, 114, 0, 0, 0, 117, 0, 0, 0,
609            0, 0, 0, 125, 126, 0, 0, 0, 0, 0,
610            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
611            0, 0, 0, 0, 0, 0, 0, 0, -116, -115,
612            -114, -113, -112, -111, -110, -109, -108, -107, -127, -126,
613            -125, -124, -123, -122, -121, -120, -119, -118, -116, -115,
614            -114, -113, -112, -111, -110, -109, -108, -107, -24, 0,
615            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616            0, -24, -40, 0, 0, -60, -58, 0, 0, -16,
617            0, 0, 0, 0, 0, 0, 0, 0, 0, -36,
618            0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619            0
620        };
621    }
622
623    private static class Cp437Conversion implements ExtraEncoding {
624        private static IntHashtable c2b = new IntHashtable();
625
626        public byte[] charToByte(String text, String encoding) {
627            char cc[] = text.toCharArray();
628            byte b[] = new byte[cc.length];
629            int ptr = 0;
630            int len = cc.length;
631            for (int k = 0; k < len; ++k) {
632                char c = cc[k];
633                if (c < 128)
634                    b[ptr++] = (byte)c;
635                else {
636                    byte v = (byte)c2b.get(c);
637                    if (v != 0)
638                        b[ptr++] = v;
639                }
640            }
641            if (ptr == len)
642                return b;
643            byte b2[] = new byte[ptr];
644            System.arraycopy(b, 0, b2, 0, ptr);
645            return b2;
646        }
647
648        public byte[] charToByte(char char1, String encoding) {
649            if (char1 < 128)
650                return new byte[]{(byte)char1};
651            else {
652                byte v = (byte)c2b.get(char1);
653                if (v != 0)
654                    return new byte[]{v};
655                else
656                    return new byte[0];
657            }
658        }
659
660        public String byteToChar(byte[] b, String encoding) {
661            int len = b.length;
662            char cc[] = new char[len];
663            int ptr = 0;
664            for (int k = 0; k < len; ++k) {
665                int c = b[k] & 0xff;
666                if (c < ' ')
667                    continue;
668                if (c < 128)
669                    cc[ptr++] = (char)c;
670                else {
671                    char v = table[c - 128];
672                    cc[ptr++] = v;
673                }
674            }
675            return new String(cc, 0, ptr);
676        }
677
678        private final static char table[] = {
679            '\u00C7', '\u00FC', '\u00E9', '\u00E2', '\u00E4', '\u00E0', '\u00E5', '\u00E7', '\u00EA', '\u00EB', '\u00E8', '\u00EF', '\u00EE', '\u00EC', '\u00C4', '\u00C5',
680            '\u00C9', '\u00E6', '\u00C6', '\u00F4', '\u00F6', '\u00F2', '\u00FB', '\u00F9', '\u00FF', '\u00D6', '\u00DC', '\u00A2', '\u00A3', '\u00A5', '\u20A7', '\u0192',
681            '\u00E1', '\u00ED', '\u00F3', '\u00FA', '\u00F1', '\u00D1', '\u00AA', '\u00BA', '\u00BF', '\u2310', '\u00AC', '\u00BD', '\u00BC', '\u00A1', '\u00AB', '\u00BB',
682            '\u2591', '\u2592', '\u2593', '\u2502', '\u2524', '\u2561', '\u2562', '\u2556', '\u2555', '\u2563', '\u2551', '\u2557', '\u255D', '\u255C', '\u255B', '\u2510',
683            '\u2514', '\u2534', '\u252C', '\u251C', '\u2500', '\u253C', '\u255E', '\u255F', '\u255A', '\u2554', '\u2569', '\u2566', '\u2560', '\u2550', '\u256C', '\u2567',
684            '\u2568', '\u2564', '\u2565', '\u2559', '\u2558', '\u2552', '\u2553', '\u256B', '\u256A', '\u2518', '\u250C', '\u2588', '\u2584', '\u258C', '\u2590', '\u2580',
685            '\u03B1', '\u00DF', '\u0393', '\u03C0', '\u03A3', '\u03C3', '\u00B5', '\u03C4', '\u03A6', '\u0398', '\u03A9', '\u03B4', '\u221E', '\u03C6', '\u03B5', '\u2229',
686            '\u2261', '\u00B1', '\u2265', '\u2264', '\u2320', '\u2321', '\u00F7', '\u2248', '\u00B0', '\u2219', '\u00B7', '\u221A', '\u207F', '\u00B2', '\u25A0', '\u00A0'
687        };
688
689        static {
690            for (int k = 0; k < table.length; ++k)
691                c2b.put(table[k], k + 128);
692        }
693    }
694
695    private static class SymbolConversion implements ExtraEncoding {
696
697        private static final IntHashtable t1 = new IntHashtable();
698        private static final IntHashtable t2 = new IntHashtable();
699        private IntHashtable translation;
700
701        SymbolConversion(boolean symbol) {
702            if (symbol)
703                translation = t1;
704            else
705                translation = t2;
706        }
707
708        public byte[] charToByte(String text, String encoding) {
709            char cc[] = text.toCharArray();
710            byte b[] = new byte[cc.length];
711            int ptr = 0;
712            int len = cc.length;
713            for (int k = 0; k < len; ++k) {
714                char c = cc[k];
715                byte v = (byte)translation.get(c);
716                if (v != 0)
717                    b[ptr++] = v;
718            }
719            if (ptr == len)
720                return b;
721            byte b2[] = new byte[ptr];
722            System.arraycopy(b, 0, b2, 0, ptr);
723            return b2;
724        }
725
726        public byte[] charToByte(char char1, String encoding) {
727            byte v = (byte)translation.get(char1);
728            if (v != 0)
729                return new byte[]{v};
730            else
731                return new byte[0];
732        }
733
734        public String byteToChar(byte[] b, String encoding) {
735            return null;
736        }
737
738        private final static char table1[] = {
739            ' ','!','\u2200','#','\u2203','%','&','\u220b','(',')','*','+',',','-','.','/',
740            '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
741            '\u2245','\u0391','\u0392','\u03a7','\u0394','\u0395','\u03a6','\u0393','\u0397','\u0399','\u03d1','\u039a','\u039b','\u039c','\u039d','\u039f',
742            '\u03a0','\u0398','\u03a1','\u03a3','\u03a4','\u03a5','\u03c2','\u03a9','\u039e','\u03a8','\u0396','[','\u2234',']','\u22a5','_',
743            '\u0305','\u03b1','\u03b2','\u03c7','\u03b4','\u03b5','\u03d5','\u03b3','\u03b7','\u03b9','\u03c6','\u03ba','\u03bb','\u03bc','\u03bd','\u03bf',
744            '\u03c0','\u03b8','\u03c1','\u03c3','\u03c4','\u03c5','\u03d6','\u03c9','\u03be','\u03c8','\u03b6','{','|','}','~','\0',
745            '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
746            '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
747            '\u20ac','\u03d2','\u2032','\u2264','\u2044','\u221e','\u0192','\u2663','\u2666','\u2665','\u2660','\u2194','\u2190','\u2191','\u2192','\u2193',
748            '\u00b0','\u00b1','\u2033','\u2265','\u00d7','\u221d','\u2202','\u2022','\u00f7','\u2260','\u2261','\u2248','\u2026','\u2502','\u2500','\u21b5',
749            '\u2135','\u2111','\u211c','\u2118','\u2297','\u2295','\u2205','\u2229','\u222a','\u2283','\u2287','\u2284','\u2282','\u2286','\u2208','\u2209',
750            '\u2220','\u2207','\u00ae','\u00a9','\u2122','\u220f','\u221a','\u2022','\u00ac','\u2227','\u2228','\u21d4','\u21d0','\u21d1','\u21d2','\u21d3',
751            '\u25ca','\u2329','\0','\0','\0','\u2211','\u239b','\u239c','\u239d','\u23a1','\u23a2','\u23a3','\u23a7','\u23a8','\u23a9','\u23aa',
752            '\0','\u232a','\u222b','\u2320','\u23ae','\u2321','\u239e','\u239f','\u23a0','\u23a4','\u23a5','\u23a6','\u23ab','\u23ac','\u23ad','\0'
753        };
754
755        private final static char table2[] = {
756            '\u0020','\u2701','\u2702','\u2703','\u2704','\u260e','\u2706','\u2707','\u2708','\u2709','\u261b','\u261e','\u270C','\u270D','\u270E','\u270F',
757            '\u2710','\u2711','\u2712','\u2713','\u2714','\u2715','\u2716','\u2717','\u2718','\u2719','\u271A','\u271B','\u271C','\u271D','\u271E','\u271F',
758            '\u2720','\u2721','\u2722','\u2723','\u2724','\u2725','\u2726','\u2727','\u2605','\u2729','\u272A','\u272B','\u272C','\u272D','\u272E','\u272F',
759            '\u2730','\u2731','\u2732','\u2733','\u2734','\u2735','\u2736','\u2737','\u2738','\u2739','\u273A','\u273B','\u273C','\u273D','\u273E','\u273F',
760            '\u2740','\u2741','\u2742','\u2743','\u2744','\u2745','\u2746','\u2747','\u2748','\u2749','\u274A','\u274B','\u25cf','\u274D','\u25a0','\u274F',
761            '\u2750','\u2751','\u2752','\u25b2','\u25bc','\u25c6','\u2756','\u25d7','\u2758','\u2759','\u275A','\u275B','\u275C','\u275D','\u275E','\u0000',
762            '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
763            '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
764            '\u0000','\u2761','\u2762','\u2763','\u2764','\u2765','\u2766','\u2767','\u2663','\u2666','\u2665','\u2660','\u2460','\u2461','\u2462','\u2463',
765            '\u2464','\u2465','\u2466','\u2467','\u2468','\u2469','\u2776','\u2777','\u2778','\u2779','\u277A','\u277B','\u277C','\u277D','\u277E','\u277F',
766            '\u2780','\u2781','\u2782','\u2783','\u2784','\u2785','\u2786','\u2787','\u2788','\u2789','\u278A','\u278B','\u278C','\u278D','\u278E','\u278F',
767            '\u2790','\u2791','\u2792','\u2793','\u2794','\u2192','\u2194','\u2195','\u2798','\u2799','\u279A','\u279B','\u279C','\u279D','\u279E','\u279F',
768            '\u27A0','\u27A1','\u27A2','\u27A3','\u27A4','\u27A5','\u27A6','\u27A7','\u27A8','\u27A9','\u27AA','\u27AB','\u27AC','\u27AD','\u27AE','\u27AF',
769            '\u0000','\u27B1','\u27B2','\u27B3','\u27B4','\u27B5','\u27B6','\u27B7','\u27B8','\u27B9','\u27BA','\u27BB','\u27BC','\u27BD','\u27BE','\u0000'
770        };
771
772        static {
773            for (int k = 0; k < table1.length; ++k) {
774                int v = table1[k];
775                if (v != 0)
776                    t1.put(v, k + 32);
777            }
778            for (int k = 0; k < table2.length; ++k) {
779                int v = table2[k];
780                if (v != 0)
781                    t2.put(v, k + 32);
782            }
783        }
784    }
785
786    private static class SymbolTTConversion implements ExtraEncoding {
787
788        public byte[] charToByte(char char1, String encoding) {
789            if ((char1 & 0xff00) == 0 || (char1 & 0xff00) == 0xf000)
790                return new byte[]{(byte)char1};
791            else
792                return new byte[0];
793        }
794
795        public byte[] charToByte(String text, String encoding) {
796            char ch[] = text.toCharArray();
797            byte b[] = new byte[ch.length];
798            int ptr = 0;
799            int len = ch.length;
800            for (int k = 0; k < len; ++k) {
801                char c = ch[k];
802                if ((c & 0xff00) == 0 || (c & 0xff00) == 0xf000)
803                    b[ptr++] = (byte)c;
804            }
805            if (ptr == len)
806                return b;
807            byte b2[] = new byte[ptr];
808            System.arraycopy(b, 0, b2, 0, ptr);
809            return b2;
810        }
811
812        public String byteToChar(byte[] b, String encoding) {
813            return null;
814        }
815
816    }
817}