001/*
002 * $Id: IanaEncodings.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044
045/* The values used in this class are based on class org.apache.xercis.util.EncodingMap
046 * http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/EncodingMap.java?view=markup
047 * This class was originally published under the following license:
048 *
049 * Licensed to the Apache Software Foundation (ASF) under one or more
050 * contributor license agreements.  See the NOTICE file distributed with
051 * this work for additional information regarding copyright ownership.
052 * The ASF licenses this file to You under the Apache License, Version 2.0
053 * (the "License"); you may not use this file except in compliance with
054 * the License.  You may obtain a copy of the License at
055 *
056 *      http://www.apache.org/licenses/LICENSE-2.0
057 *
058 * Unless required by applicable law or agreed to in writing, software
059 * distributed under the License is distributed on an "AS IS" BASIS,
060 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
061 * See the License for the specific language governing permissions and
062 * limitations under the License.
063 */
064package com.itextpdf.text.xml.simpleparser;
065
066import java.util.HashMap;
067import java.util.Map;
068
069/**
070 * Translates a IANA encoding name to a Java encoding.
071 */
072
073public class IanaEncodings {
074
075        /** The object that maps IANA to Java encodings. */
076    private static final Map<String, String> MAP = new HashMap<String, String>();
077
078    static {
079        // add IANA to Java encoding mappings.
080        MAP.put("BIG5", "Big5");
081        MAP.put("CSBIG5", "Big5");
082        MAP.put("CP037", "CP037");
083        MAP.put("IBM037", "CP037");
084        MAP.put("CSIBM037", "CP037");
085        MAP.put("EBCDIC-CP-US", "CP037");
086        MAP.put("EBCDIC-CP-CA", "CP037");
087        MAP.put("EBCDIC-CP-NL", "CP037");
088        MAP.put("EBCDIC-CP-WT", "CP037");
089        MAP.put("IBM277", "CP277");
090        MAP.put("CP277", "CP277");
091        MAP.put("CSIBM277", "CP277");
092        MAP.put("EBCDIC-CP-DK", "CP277");
093        MAP.put("EBCDIC-CP-NO", "CP277");
094        MAP.put("IBM278", "CP278");
095        MAP.put("CP278", "CP278");
096        MAP.put("CSIBM278", "CP278");
097        MAP.put("EBCDIC-CP-FI", "CP278");
098        MAP.put("EBCDIC-CP-SE", "CP278");
099        MAP.put("IBM280", "CP280");
100        MAP.put("CP280", "CP280");
101        MAP.put("CSIBM280", "CP280");
102        MAP.put("EBCDIC-CP-IT", "CP280");
103        MAP.put("IBM284", "CP284");
104        MAP.put("CP284", "CP284");
105        MAP.put("CSIBM284", "CP284");
106        MAP.put("EBCDIC-CP-ES", "CP284");
107        MAP.put("EBCDIC-CP-GB", "CP285");
108        MAP.put("IBM285", "CP285");
109        MAP.put("CP285", "CP285");
110        MAP.put("CSIBM285", "CP285");
111        MAP.put("EBCDIC-CP-FR", "CP297");
112        MAP.put("IBM297", "CP297");
113        MAP.put("CP297", "CP297");
114        MAP.put("CSIBM297", "CP297");
115        MAP.put("EBCDIC-CP-AR1", "CP420");
116        MAP.put("IBM420", "CP420");
117        MAP.put("CP420", "CP420");
118        MAP.put("CSIBM420", "CP420");
119        MAP.put("EBCDIC-CP-HE", "CP424");
120        MAP.put("IBM424", "CP424");
121        MAP.put("CP424", "CP424");
122        MAP.put("CSIBM424", "CP424");
123        MAP.put("EBCDIC-CP-CH", "CP500");
124        MAP.put("IBM500", "CP500");
125        MAP.put("CP500", "CP500");
126        MAP.put("CSIBM500", "CP500");
127        MAP.put("EBCDIC-CP-CH", "CP500");
128        MAP.put("EBCDIC-CP-BE", "CP500");
129        MAP.put("IBM868", "CP868");
130        MAP.put("CP868", "CP868");
131        MAP.put("CSIBM868", "CP868");
132        MAP.put("CP-AR", "CP868");
133        MAP.put("IBM869", "CP869");
134        MAP.put("CP869", "CP869");
135        MAP.put("CSIBM869", "CP869");
136        MAP.put("CP-GR", "CP869");
137        MAP.put("IBM870", "CP870");
138        MAP.put("CP870", "CP870");
139        MAP.put("CSIBM870", "CP870");
140        MAP.put("EBCDIC-CP-ROECE", "CP870");
141        MAP.put("EBCDIC-CP-YU", "CP870");
142        MAP.put("IBM871", "CP871");
143        MAP.put("CP871", "CP871");
144        MAP.put("CSIBM871", "CP871");
145        MAP.put("EBCDIC-CP-IS", "CP871");
146        MAP.put("IBM918", "CP918");
147        MAP.put("CP918", "CP918");
148        MAP.put("CSIBM918", "CP918");
149        MAP.put("EBCDIC-CP-AR2", "CP918");
150        MAP.put("EUC-JP", "EUCJIS");
151        MAP.put("CSEUCPkdFmtJapanese", "EUCJIS");
152        MAP.put("EUC-KR", "KSC5601");
153        MAP.put("GB2312", "GB2312");
154        MAP.put("CSGB2312", "GB2312");
155        MAP.put("ISO-2022-JP", "JIS");
156        MAP.put("CSISO2022JP", "JIS");
157        MAP.put("ISO-2022-KR", "ISO2022KR");
158        MAP.put("CSISO2022KR", "ISO2022KR");
159        MAP.put("ISO-2022-CN", "ISO2022CN");
160
161        MAP.put("X0201", "JIS0201");
162        MAP.put("CSISO13JISC6220JP", "JIS0201");
163        MAP.put("X0208", "JIS0208");
164        MAP.put("ISO-IR-87", "JIS0208");
165        MAP.put("X0208dbiJIS_X0208-1983", "JIS0208");
166        MAP.put("CSISO87JISX0208", "JIS0208");
167        MAP.put("X0212", "JIS0212");
168        MAP.put("ISO-IR-159", "JIS0212");
169        MAP.put("CSISO159JISX02121990", "JIS0212");
170        MAP.put("SHIFT_JIS", "SJIS");
171        MAP.put("CSSHIFT_JIS", "SJIS");
172        MAP.put("MS_Kanji", "SJIS");
173
174        // Add support for Cp1252 and its friends
175        MAP.put("WINDOWS-1250", "Cp1250");
176        MAP.put("WINDOWS-1251", "Cp1251");
177        MAP.put("WINDOWS-1252", "Cp1252");
178        MAP.put("WINDOWS-1253", "Cp1253");
179        MAP.put("WINDOWS-1254", "Cp1254");
180        MAP.put("WINDOWS-1255", "Cp1255");
181        MAP.put("WINDOWS-1256", "Cp1256");
182        MAP.put("WINDOWS-1257", "Cp1257");
183        MAP.put("WINDOWS-1258", "Cp1258");
184        MAP.put("TIS-620", "TIS620");
185
186        MAP.put("ISO-8859-1", "ISO8859_1");
187        MAP.put("ISO-IR-100", "ISO8859_1");
188        MAP.put("ISO_8859-1", "ISO8859_1");
189        MAP.put("LATIN1", "ISO8859_1");
190        MAP.put("CSISOLATIN1", "ISO8859_1");
191        MAP.put("L1", "ISO8859_1");
192        MAP.put("IBM819", "ISO8859_1");
193        MAP.put("CP819", "ISO8859_1");
194
195        MAP.put("ISO-8859-2", "ISO8859_2");
196        MAP.put("ISO-IR-101", "ISO8859_2");
197        MAP.put("ISO_8859-2", "ISO8859_2");
198        MAP.put("LATIN2", "ISO8859_2");
199        MAP.put("CSISOLATIN2", "ISO8859_2");
200        MAP.put("L2", "ISO8859_2");
201
202        MAP.put("ISO-8859-3", "ISO8859_3");
203        MAP.put("ISO-IR-109", "ISO8859_3");
204        MAP.put("ISO_8859-3", "ISO8859_3");
205        MAP.put("LATIN3", "ISO8859_3");
206        MAP.put("CSISOLATIN3", "ISO8859_3");
207        MAP.put("L3", "ISO8859_3");
208
209        MAP.put("ISO-8859-4", "ISO8859_4");
210        MAP.put("ISO-IR-110", "ISO8859_4");
211        MAP.put("ISO_8859-4", "ISO8859_4");
212        MAP.put("LATIN4", "ISO8859_4");
213        MAP.put("CSISOLATIN4", "ISO8859_4");
214        MAP.put("L4", "ISO8859_4");
215
216        MAP.put("ISO-8859-5", "ISO8859_5");
217        MAP.put("ISO-IR-144", "ISO8859_5");
218        MAP.put("ISO_8859-5", "ISO8859_5");
219        MAP.put("CYRILLIC", "ISO8859_5");
220        MAP.put("CSISOLATINCYRILLIC", "ISO8859_5");
221
222        MAP.put("ISO-8859-6", "ISO8859_6");
223        MAP.put("ISO-IR-127", "ISO8859_6");
224        MAP.put("ISO_8859-6", "ISO8859_6");
225        MAP.put("ECMA-114", "ISO8859_6");
226        MAP.put("ASMO-708", "ISO8859_6");
227        MAP.put("ARABIC", "ISO8859_6");
228        MAP.put("CSISOLATINARABIC", "ISO8859_6");
229
230        MAP.put("ISO-8859-7", "ISO8859_7");
231        MAP.put("ISO-IR-126", "ISO8859_7");
232        MAP.put("ISO_8859-7", "ISO8859_7");
233        MAP.put("ELOT_928", "ISO8859_7");
234        MAP.put("ECMA-118", "ISO8859_7");
235        MAP.put("GREEK", "ISO8859_7");
236        MAP.put("CSISOLATINGREEK", "ISO8859_7");
237        MAP.put("GREEK8", "ISO8859_7");
238
239        MAP.put("ISO-8859-8", "ISO8859_8");
240        MAP.put("ISO-8859-8-I", "ISO8859_8"); // added since this encoding only differs w.r.t. presentation
241        MAP.put("ISO-IR-138", "ISO8859_8");
242        MAP.put("ISO_8859-8", "ISO8859_8");
243        MAP.put("HEBREW", "ISO8859_8");
244        MAP.put("CSISOLATINHEBREW", "ISO8859_8");
245
246        MAP.put("ISO-8859-9", "ISO8859_9");
247        MAP.put("ISO-IR-148", "ISO8859_9");
248        MAP.put("ISO_8859-9", "ISO8859_9");
249        MAP.put("LATIN5", "ISO8859_9");
250        MAP.put("CSISOLATIN5", "ISO8859_9");
251        MAP.put("L5", "ISO8859_9");
252
253        MAP.put("KOI8-R", "KOI8_R");
254        MAP.put("CSKOI8-R", "KOI8_R");
255        MAP.put("US-ASCII", "ASCII");
256        MAP.put("ISO-IR-6", "ASCII");
257        MAP.put("ANSI_X3.4-1986", "ASCII");
258        MAP.put("ISO_646.IRV:1991", "ASCII");
259        MAP.put("ASCII", "ASCII");
260        MAP.put("CSASCII", "ASCII");
261        MAP.put("ISO646-US", "ASCII");
262        MAP.put("US", "ASCII");
263        MAP.put("IBM367", "ASCII");
264        MAP.put("CP367", "ASCII");
265        MAP.put("UTF-8", "UTF8");
266        MAP.put("UTF-16", "Unicode");
267        MAP.put("UTF-16BE", "UnicodeBig");
268        MAP.put("UTF-16LE", "UnicodeLittle");
269    }
270
271    /**
272     * Gets the java encoding from the IANA encoding. If the encoding cannot be found
273     * it returns the input.
274     * @param iana the IANA encoding
275     * @return the java encoding
276     */
277    public static String getJavaEncoding(final String iana) {
278        String IANA = iana.toUpperCase();
279        String jdec = MAP.get(IANA);
280        if (jdec == null)
281            jdec = iana;
282        return jdec;
283    }
284}