001/* 002 * $Id: IanaEncodings.java 4784 2011-03-15 08:33:00Z blowagie $ 003 * 004 * This file is part of the iText (R) project. 005 * Copyright (c) 1998-2011 1T3XT BVBA 006 * Authors: Bruno Lowagie, Paulo Soares, et al. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU Affero General Public License version 3 010 * as published by the Free Software Foundation with the addition of the 011 * following permission added to Section 15 as permitted in Section 7(a): 012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT, 013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS. 014 * 015 * This program is distributed in the hope that it will be useful, but 016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 017 * or FITNESS FOR A PARTICULAR PURPOSE. 018 * See the GNU Affero General Public License for more details. 019 * You should have received a copy of the GNU Affero General Public License 020 * along with this program; if not, see http://www.gnu.org/licenses or write to 021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 022 * Boston, MA, 02110-1301 USA, or download the license from the following URL: 023 * http://itextpdf.com/terms-of-use/ 024 * 025 * The interactive user interfaces in modified source and object code versions 026 * of this program must display Appropriate Legal Notices, as required under 027 * Section 5 of the GNU Affero General Public License. 028 * 029 * In accordance with Section 7(b) of the GNU Affero General Public License, 030 * a covered work must retain the producer line in every PDF that is created 031 * or manipulated using iText. 032 * 033 * You can be released from the requirements of the license by purchasing 034 * a commercial license. Buying such a license is mandatory as soon as you 035 * develop commercial activities involving the iText software without 036 * disclosing the source code of your own applications. 037 * These activities include: offering paid services to customers as an ASP, 038 * serving PDFs on the fly in a web application, shipping iText with a closed 039 * source product. 040 * 041 * For more information, please contact iText Software Corp. at this 042 * address: sales@itextpdf.com 043 */ 044 045/* The values used in this class are based on class org.apache.xercis.util.EncodingMap 046 * http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/EncodingMap.java?view=markup 047 * This class was originally published under the following license: 048 * 049 * Licensed to the Apache Software Foundation (ASF) under one or more 050 * contributor license agreements. See the NOTICE file distributed with 051 * this work for additional information regarding copyright ownership. 052 * The ASF licenses this file to You under the Apache License, Version 2.0 053 * (the "License"); you may not use this file except in compliance with 054 * the License. You may obtain a copy of the License at 055 * 056 * http://www.apache.org/licenses/LICENSE-2.0 057 * 058 * Unless required by applicable law or agreed to in writing, software 059 * distributed under the License is distributed on an "AS IS" BASIS, 060 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 061 * See the License for the specific language governing permissions and 062 * limitations under the License. 063 */ 064package com.itextpdf.text.xml.simpleparser; 065 066import java.util.HashMap; 067import java.util.Map; 068 069/** 070 * Translates a IANA encoding name to a Java encoding. 071 */ 072 073public class IanaEncodings { 074 075 /** The object that maps IANA to Java encodings. */ 076 private static final Map<String, String> MAP = new HashMap<String, String>(); 077 078 static { 079 // add IANA to Java encoding mappings. 080 MAP.put("BIG5", "Big5"); 081 MAP.put("CSBIG5", "Big5"); 082 MAP.put("CP037", "CP037"); 083 MAP.put("IBM037", "CP037"); 084 MAP.put("CSIBM037", "CP037"); 085 MAP.put("EBCDIC-CP-US", "CP037"); 086 MAP.put("EBCDIC-CP-CA", "CP037"); 087 MAP.put("EBCDIC-CP-NL", "CP037"); 088 MAP.put("EBCDIC-CP-WT", "CP037"); 089 MAP.put("IBM277", "CP277"); 090 MAP.put("CP277", "CP277"); 091 MAP.put("CSIBM277", "CP277"); 092 MAP.put("EBCDIC-CP-DK", "CP277"); 093 MAP.put("EBCDIC-CP-NO", "CP277"); 094 MAP.put("IBM278", "CP278"); 095 MAP.put("CP278", "CP278"); 096 MAP.put("CSIBM278", "CP278"); 097 MAP.put("EBCDIC-CP-FI", "CP278"); 098 MAP.put("EBCDIC-CP-SE", "CP278"); 099 MAP.put("IBM280", "CP280"); 100 MAP.put("CP280", "CP280"); 101 MAP.put("CSIBM280", "CP280"); 102 MAP.put("EBCDIC-CP-IT", "CP280"); 103 MAP.put("IBM284", "CP284"); 104 MAP.put("CP284", "CP284"); 105 MAP.put("CSIBM284", "CP284"); 106 MAP.put("EBCDIC-CP-ES", "CP284"); 107 MAP.put("EBCDIC-CP-GB", "CP285"); 108 MAP.put("IBM285", "CP285"); 109 MAP.put("CP285", "CP285"); 110 MAP.put("CSIBM285", "CP285"); 111 MAP.put("EBCDIC-CP-FR", "CP297"); 112 MAP.put("IBM297", "CP297"); 113 MAP.put("CP297", "CP297"); 114 MAP.put("CSIBM297", "CP297"); 115 MAP.put("EBCDIC-CP-AR1", "CP420"); 116 MAP.put("IBM420", "CP420"); 117 MAP.put("CP420", "CP420"); 118 MAP.put("CSIBM420", "CP420"); 119 MAP.put("EBCDIC-CP-HE", "CP424"); 120 MAP.put("IBM424", "CP424"); 121 MAP.put("CP424", "CP424"); 122 MAP.put("CSIBM424", "CP424"); 123 MAP.put("EBCDIC-CP-CH", "CP500"); 124 MAP.put("IBM500", "CP500"); 125 MAP.put("CP500", "CP500"); 126 MAP.put("CSIBM500", "CP500"); 127 MAP.put("EBCDIC-CP-CH", "CP500"); 128 MAP.put("EBCDIC-CP-BE", "CP500"); 129 MAP.put("IBM868", "CP868"); 130 MAP.put("CP868", "CP868"); 131 MAP.put("CSIBM868", "CP868"); 132 MAP.put("CP-AR", "CP868"); 133 MAP.put("IBM869", "CP869"); 134 MAP.put("CP869", "CP869"); 135 MAP.put("CSIBM869", "CP869"); 136 MAP.put("CP-GR", "CP869"); 137 MAP.put("IBM870", "CP870"); 138 MAP.put("CP870", "CP870"); 139 MAP.put("CSIBM870", "CP870"); 140 MAP.put("EBCDIC-CP-ROECE", "CP870"); 141 MAP.put("EBCDIC-CP-YU", "CP870"); 142 MAP.put("IBM871", "CP871"); 143 MAP.put("CP871", "CP871"); 144 MAP.put("CSIBM871", "CP871"); 145 MAP.put("EBCDIC-CP-IS", "CP871"); 146 MAP.put("IBM918", "CP918"); 147 MAP.put("CP918", "CP918"); 148 MAP.put("CSIBM918", "CP918"); 149 MAP.put("EBCDIC-CP-AR2", "CP918"); 150 MAP.put("EUC-JP", "EUCJIS"); 151 MAP.put("CSEUCPkdFmtJapanese", "EUCJIS"); 152 MAP.put("EUC-KR", "KSC5601"); 153 MAP.put("GB2312", "GB2312"); 154 MAP.put("CSGB2312", "GB2312"); 155 MAP.put("ISO-2022-JP", "JIS"); 156 MAP.put("CSISO2022JP", "JIS"); 157 MAP.put("ISO-2022-KR", "ISO2022KR"); 158 MAP.put("CSISO2022KR", "ISO2022KR"); 159 MAP.put("ISO-2022-CN", "ISO2022CN"); 160 161 MAP.put("X0201", "JIS0201"); 162 MAP.put("CSISO13JISC6220JP", "JIS0201"); 163 MAP.put("X0208", "JIS0208"); 164 MAP.put("ISO-IR-87", "JIS0208"); 165 MAP.put("X0208dbiJIS_X0208-1983", "JIS0208"); 166 MAP.put("CSISO87JISX0208", "JIS0208"); 167 MAP.put("X0212", "JIS0212"); 168 MAP.put("ISO-IR-159", "JIS0212"); 169 MAP.put("CSISO159JISX02121990", "JIS0212"); 170 MAP.put("SHIFT_JIS", "SJIS"); 171 MAP.put("CSSHIFT_JIS", "SJIS"); 172 MAP.put("MS_Kanji", "SJIS"); 173 174 // Add support for Cp1252 and its friends 175 MAP.put("WINDOWS-1250", "Cp1250"); 176 MAP.put("WINDOWS-1251", "Cp1251"); 177 MAP.put("WINDOWS-1252", "Cp1252"); 178 MAP.put("WINDOWS-1253", "Cp1253"); 179 MAP.put("WINDOWS-1254", "Cp1254"); 180 MAP.put("WINDOWS-1255", "Cp1255"); 181 MAP.put("WINDOWS-1256", "Cp1256"); 182 MAP.put("WINDOWS-1257", "Cp1257"); 183 MAP.put("WINDOWS-1258", "Cp1258"); 184 MAP.put("TIS-620", "TIS620"); 185 186 MAP.put("ISO-8859-1", "ISO8859_1"); 187 MAP.put("ISO-IR-100", "ISO8859_1"); 188 MAP.put("ISO_8859-1", "ISO8859_1"); 189 MAP.put("LATIN1", "ISO8859_1"); 190 MAP.put("CSISOLATIN1", "ISO8859_1"); 191 MAP.put("L1", "ISO8859_1"); 192 MAP.put("IBM819", "ISO8859_1"); 193 MAP.put("CP819", "ISO8859_1"); 194 195 MAP.put("ISO-8859-2", "ISO8859_2"); 196 MAP.put("ISO-IR-101", "ISO8859_2"); 197 MAP.put("ISO_8859-2", "ISO8859_2"); 198 MAP.put("LATIN2", "ISO8859_2"); 199 MAP.put("CSISOLATIN2", "ISO8859_2"); 200 MAP.put("L2", "ISO8859_2"); 201 202 MAP.put("ISO-8859-3", "ISO8859_3"); 203 MAP.put("ISO-IR-109", "ISO8859_3"); 204 MAP.put("ISO_8859-3", "ISO8859_3"); 205 MAP.put("LATIN3", "ISO8859_3"); 206 MAP.put("CSISOLATIN3", "ISO8859_3"); 207 MAP.put("L3", "ISO8859_3"); 208 209 MAP.put("ISO-8859-4", "ISO8859_4"); 210 MAP.put("ISO-IR-110", "ISO8859_4"); 211 MAP.put("ISO_8859-4", "ISO8859_4"); 212 MAP.put("LATIN4", "ISO8859_4"); 213 MAP.put("CSISOLATIN4", "ISO8859_4"); 214 MAP.put("L4", "ISO8859_4"); 215 216 MAP.put("ISO-8859-5", "ISO8859_5"); 217 MAP.put("ISO-IR-144", "ISO8859_5"); 218 MAP.put("ISO_8859-5", "ISO8859_5"); 219 MAP.put("CYRILLIC", "ISO8859_5"); 220 MAP.put("CSISOLATINCYRILLIC", "ISO8859_5"); 221 222 MAP.put("ISO-8859-6", "ISO8859_6"); 223 MAP.put("ISO-IR-127", "ISO8859_6"); 224 MAP.put("ISO_8859-6", "ISO8859_6"); 225 MAP.put("ECMA-114", "ISO8859_6"); 226 MAP.put("ASMO-708", "ISO8859_6"); 227 MAP.put("ARABIC", "ISO8859_6"); 228 MAP.put("CSISOLATINARABIC", "ISO8859_6"); 229 230 MAP.put("ISO-8859-7", "ISO8859_7"); 231 MAP.put("ISO-IR-126", "ISO8859_7"); 232 MAP.put("ISO_8859-7", "ISO8859_7"); 233 MAP.put("ELOT_928", "ISO8859_7"); 234 MAP.put("ECMA-118", "ISO8859_7"); 235 MAP.put("GREEK", "ISO8859_7"); 236 MAP.put("CSISOLATINGREEK", "ISO8859_7"); 237 MAP.put("GREEK8", "ISO8859_7"); 238 239 MAP.put("ISO-8859-8", "ISO8859_8"); 240 MAP.put("ISO-8859-8-I", "ISO8859_8"); // added since this encoding only differs w.r.t. presentation 241 MAP.put("ISO-IR-138", "ISO8859_8"); 242 MAP.put("ISO_8859-8", "ISO8859_8"); 243 MAP.put("HEBREW", "ISO8859_8"); 244 MAP.put("CSISOLATINHEBREW", "ISO8859_8"); 245 246 MAP.put("ISO-8859-9", "ISO8859_9"); 247 MAP.put("ISO-IR-148", "ISO8859_9"); 248 MAP.put("ISO_8859-9", "ISO8859_9"); 249 MAP.put("LATIN5", "ISO8859_9"); 250 MAP.put("CSISOLATIN5", "ISO8859_9"); 251 MAP.put("L5", "ISO8859_9"); 252 253 MAP.put("KOI8-R", "KOI8_R"); 254 MAP.put("CSKOI8-R", "KOI8_R"); 255 MAP.put("US-ASCII", "ASCII"); 256 MAP.put("ISO-IR-6", "ASCII"); 257 MAP.put("ANSI_X3.4-1986", "ASCII"); 258 MAP.put("ISO_646.IRV:1991", "ASCII"); 259 MAP.put("ASCII", "ASCII"); 260 MAP.put("CSASCII", "ASCII"); 261 MAP.put("ISO646-US", "ASCII"); 262 MAP.put("US", "ASCII"); 263 MAP.put("IBM367", "ASCII"); 264 MAP.put("CP367", "ASCII"); 265 MAP.put("UTF-8", "UTF8"); 266 MAP.put("UTF-16", "Unicode"); 267 MAP.put("UTF-16BE", "UnicodeBig"); 268 MAP.put("UTF-16LE", "UnicodeLittle"); 269 } 270 271 /** 272 * Gets the java encoding from the IANA encoding. If the encoding cannot be found 273 * it returns the input. 274 * @param iana the IANA encoding 275 * @return the java encoding 276 */ 277 public static String getJavaEncoding(final String iana) { 278 String IANA = iana.toUpperCase(); 279 String jdec = MAP.get(IANA); 280 if (jdec == null) 281 jdec = iana; 282 return jdec; 283 } 284}