183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Copyright (C) 2001-2006 Python Software Foundation 283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Author: Ben Gertzfield, Barry Warsaw 383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Contact: email-sig@python.org 483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh__all__ = [ 683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'Charset', 783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'add_alias', 883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'add_charset', 983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'add_codec', 1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ] 1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport codecs 1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport email.base64mime 1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport email.quoprimime 1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom email import errors 1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom email.encoders import encode_7or8bit 1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Flags for types of header encodings 2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehQP = 1 # Quoted-Printable 2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehBASE64 = 2 # Base64 2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehSHORTEST = 3 # the shorter of QP and base64, but only for headers 2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehMISC_LEN = 7 2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehDEFAULT_CHARSET = 'us-ascii' 3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Defaults 3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehCHARSETS = { 3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # input header enc body enc output conv 3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-1': (QP, QP, None), 3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-2': (QP, QP, None), 3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-3': (QP, QP, None), 3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-4': (QP, QP, None), 4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # iso-8859-5 is Cyrillic, and not especially used 4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # iso-8859-6 is Arabic, also not particularly used 4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # iso-8859-7 is Greek, QP will not make it readable 4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # iso-8859-8 is Hebrew, QP will not make it readable 4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-9': (QP, QP, None), 4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-10': (QP, QP, None), 4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # iso-8859-11 is Thai, QP will not make it readable 4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-13': (QP, QP, None), 4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-14': (QP, QP, None), 4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-15': (QP, QP, None), 5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-8859-16': (QP, QP, None), 5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'windows-1252':(QP, QP, None), 5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'viscii': (QP, QP, None), 5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'us-ascii': (None, None, None), 5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'big5': (BASE64, BASE64, None), 5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'gb2312': (BASE64, BASE64, None), 5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'euc-jp': (BASE64, None, 'iso-2022-jp'), 5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'shift_jis': (BASE64, None, 'iso-2022-jp'), 5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'iso-2022-jp': (BASE64, None, None), 5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'koi8-r': (BASE64, BASE64, None), 6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'utf-8': (SHORTEST, BASE64, 'utf-8'), 6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # We're making this one up to represent raw unencoded 8-bit 6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh '8bit': (None, BASE64, 'utf-8'), 6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh } 6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Aliases for other commonly-used names for character sets. Map 6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# them to the real ones used in email. 6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehALIASES = { 6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_1': 'iso-8859-1', 6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-1': 'iso-8859-1', 7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_2': 'iso-8859-2', 7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-2': 'iso-8859-2', 7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_3': 'iso-8859-3', 7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-3': 'iso-8859-3', 7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_4': 'iso-8859-4', 7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-4': 'iso-8859-4', 7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_5': 'iso-8859-9', 7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-5': 'iso-8859-9', 7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_6': 'iso-8859-10', 7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-6': 'iso-8859-10', 8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_7': 'iso-8859-13', 8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-7': 'iso-8859-13', 8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_8': 'iso-8859-14', 8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-8': 'iso-8859-14', 8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_9': 'iso-8859-15', 8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-9': 'iso-8859-15', 8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin_10':'iso-8859-16', 8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'latin-10':'iso-8859-16', 8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'cp949': 'ks_c_5601-1987', 8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'euc_jp': 'euc-jp', 9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'euc_kr': 'euc-kr', 9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'ascii': 'us-ascii', 9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh } 9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Map charsets to their Unicode codec strings. 9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew HsiehCODEC_MAP = { 9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'gb2312': 'eucgb2312_cn', 9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'big5': 'big5_tw', 9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Hack: We don't want *any* conversion for stuff marked us-ascii, as all 10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. 10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Let that stuff pass through without conversion to/from Unicode. 10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 'us-ascii': None, 10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh } 10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Convenience functions for extending the above mappings 10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_charset(charset, header_enc=None, body_enc=None, output_charset=None): 10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Add character set properties to the global registry. 11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh charset is the input character set, and must be the canonical name of a 11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh character set. 11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Optional header_enc and body_enc is either Charset.QP for 11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for 11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the shortest of qp or base64 encoding, or None for no encoding. SHORTEST 11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh is only valid for header_enc. It describes how message headers and 11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh message bodies in the input charset are to be encoded. Default is no 11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoding. 12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Optional output_charset is the character set that the output should be 12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh in. Conversions will proceed from input charset, to Unicode, to the 12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output charset when the method Charset.convert() is called. The default 12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh is to output in the same character set as the input. 12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Both input_charset and output_charset must have Unicode codec entries in 12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the module's charset-to-codec mapping; use add_codec(charset, codecname) 12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to add codecs the module does not know about. See the codecs module's 12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh documentation for more information. 13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if body_enc == SHORTEST: 13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise ValueError('SHORTEST not allowed for body_enc') 13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh CHARSETS[charset] = (header_enc, body_enc, output_charset) 13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_alias(alias, canonical): 13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Add a character set alias. 13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh alias is the alias name, e.g. latin-1 14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh canonical is the character set's canonical name, e.g. iso-8859-1 14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh ALIASES[alias] = canonical 14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef add_codec(charset, codecname): 14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Add a codec that map characters in the given charset to/from Unicode. 14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh charset is the canonical name of a character set. codecname is the name 14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh of a Python codec, as appropriate for the second argument to the unicode() 15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh built-in, or to the encode() method of a Unicode string. 15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh CODEC_MAP[charset] = codecname 15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass Charset: 15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Map character sets to their email properties. 15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This class provides information about the requirements imposed on email 16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh for a specific character set. It also provides convenience routines for 16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh converting between character sets, given the availability of the 16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh applicable codecs. Given a character set, it will do its best to provide 16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh information on how to use that character set in an email in an 16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh RFC-compliant way. 16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Certain character sets must be encoded with quoted-printable or base64 16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh when used in email headers or bodies. Certain character sets must be 16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh converted outright, and are not allowed in email. Instances of this 16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh module expose the following information about a character set: 17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset: The initial character set specified. Common aliases 17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh are converted to their `official' email names (e.g. latin_1 17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh is converted to iso-8859-1). Defaults to 7-bit us-ascii. 17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh header_encoding: If the character set must be encoded before it can be 17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh used in an email header, this attribute will be set to 17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Charset.QP (for quoted-printable), Charset.BASE64 (for 17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh base64 encoding), or Charset.SHORTEST for the shortest of 17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh QP or BASE64 encoding. Otherwise, it will be None. 18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh body_encoding: Same as header_encoding, but describes the encoding for the 18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh mail message's body, which indeed may be different than the 18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh header encoding. Charset.SHORTEST is not allowed for 18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh body_encoding. 18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output_charset: Some character sets must be converted before the can be 18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh used in email headers or bodies. If the input_charset is 18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh one of them, this attribute will contain the name of the 18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh charset output will be converted to. Otherwise, it will 19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh be None. 19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_codec: The name of the Python codec used to convert the 19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset to Unicode. If no conversion codec is 19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh necessary, this attribute will be None. 19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh output_codec: The name of the Python codec used to convert Unicode 19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to the output_charset. If no conversion codec is necessary, 19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh this attribute will have the same value as the input_codec. 19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __init__(self, input_charset=DEFAULT_CHARSET): 20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to 20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # unicode because its .lower() is locale insensitive. If the argument 20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # is already a unicode, we leave it at that, but ensure that the 20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # charset is ASCII, as the standard (RFC XXX) requires. 20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if isinstance(input_charset, unicode): 20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset.encode('ascii') 20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset = unicode(input_charset, 'ascii') 21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except UnicodeError: 21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh raise errors.CharsetError(input_charset) 21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset = input_charset.lower().encode('ascii') 21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Set the input charset after filtering through the aliases and/or codecs 21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not (input_charset in ALIASES or input_charset in CHARSETS): 21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh input_charset = codecs.lookup(input_charset).name 21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except LookupError: 21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh pass 21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.input_charset = ALIASES.get(input_charset, input_charset) 22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # We can try to guess which encoding and conversion to use by the 22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # charset_map dictionary. Try that first, but let the user override 22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # it. 22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh henc, benc, conv = CHARSETS.get(self.input_charset, 22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh (SHORTEST, BASE64, None)) 22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not conv: 22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh conv = self.input_charset 22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Set the attributes, allowing the arguments to override the default. 22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.header_encoding = henc 22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.body_encoding = benc 23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.output_charset = ALIASES.get(conv, conv) 23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Now set the codecs. If one isn't defined for input_charset, 23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # guess and try a Unicode codec with the same name as input_codec. 23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.input_codec = CODEC_MAP.get(self.input_charset, 23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.input_charset) 23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.output_codec = CODEC_MAP.get(self.output_charset, 23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.output_charset) 23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __str__(self): 23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.input_charset.lower() 24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh __repr__ = __str__ 24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __eq__(self, other): 24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return str(self) == str(other).lower() 24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def __ne__(self, other): 24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return not self.__eq__(other) 24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def get_body_encoding(self): 25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Return the content-transfer-encoding used for body encoding. 25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This is either the string `quoted-printable' or `base64' depending on 25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the encoding used, or it is a function in which case you should call 25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the function with a single argument, the Message object being 25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoded. The function should then set the Content-Transfer-Encoding 25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh header itself to whatever is appropriate. 25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Returns "quoted-printable" if self.body_encoding is QP. 25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Returns "base64" if self.body_encoding is BASE64. 26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Returns "7bit" otherwise. 26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh assert self.body_encoding != SHORTEST 26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.body_encoding == QP: 26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return 'quoted-printable' 26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.body_encoding == BASE64: 26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return 'base64' 26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return encode_7or8bit 26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def convert(self, s): 27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Convert a string from the input_codec to the output_codec.""" 27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.input_codec != self.output_codec: 27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return unicode(s, self.input_codec).encode(self.output_codec) 27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return s 27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def to_splittable(self, s): 27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Convert a possibly multibyte string to a safely splittable format. 27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Uses the input_codec to try and convert the string to Unicode, so it 28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh can be safely split on character boundaries (even for multibyte 28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh characters). 28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Returns the string as-is if it isn't known how to convert it to 28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Unicode with the input_charset. 28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Characters that could not be converted to Unicode will be replaced 28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh with the Unicode replacement character U+FFFD. 28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if isinstance(s, unicode) or self.input_codec is None: 29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return s 29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return unicode(s, self.input_codec, 'replace') 29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except LookupError: 29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Input codec not installed on system, so return the original 29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # string unchanged. 29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return s 29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def from_splittable(self, ustr, to_output=True): 30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Convert a splittable string back into an encoded string. 30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Uses the proper codec to try and convert the string from Unicode back 30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh into an encoded format. Return the string as-is if it is not Unicode, 30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh or if it could not be converted from Unicode. 30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh Characters that could not be converted from Unicode will be replaced 30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh with an appropriate character (usually '?'). 30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If to_output is True (the default), uses output_codec to convert to an 31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh encoded format. If to_output is False, uses input_codec. 31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if to_output: 31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codec = self.output_codec 31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh codec = self.input_codec 31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if not isinstance(ustr, unicode) or codec is None: 31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return ustr 31883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh try: 31983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return ustr.encode(codec, 'replace') 32083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh except LookupError: 32183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # Output codec not installed 32283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return ustr 32383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def get_output_charset(self): 32583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Return the output character set. 32683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 32783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh This is self.output_charset if that is not None, otherwise it is 32883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.input_charset. 32983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 33083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return self.output_charset or self.input_charset 33183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 33283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def encoded_header_len(self, s): 33383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Return the length of the encoded header string.""" 33483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh cset = self.get_output_charset() 33583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # The len(s) of a 7bit encoding is len(s) 33683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.header_encoding == BASE64: 33783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.base64mime.base64_len(s) + len(cset) + MISC_LEN 33883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.header_encoding == QP: 33983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN 34083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.header_encoding == SHORTEST: 34183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lenb64 = email.base64mime.base64_len(s) 34283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lenqp = email.quoprimime.header_quopri_len(s) 34383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return min(lenb64, lenqp) + len(cset) + MISC_LEN 34483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 34583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return len(s) 34683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 34783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def header_encode(self, s, convert=False): 34883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Header-encode a string, optionally converting it to output_charset. 34983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 35083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If convert is True, the string will be converted from the input 35183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh charset to the output charset automatically. This is not useful for 35283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh multibyte character sets, which have line length issues (multibyte 35383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh characters must be split on a character, not a byte boundary); use the 35483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh high-level Header class to deal with these issues. convert defaults 35583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh to False. 35683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 35783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The type of encoding (base64 or quoted-printable) will be based on 35883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.header_encoding. 35983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 36083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh cset = self.get_output_charset() 36183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if convert: 36283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh s = self.convert(s) 36383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # 7bit/8bit encodings return the string unchanged (modulo conversions) 36483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.header_encoding == BASE64: 36583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.base64mime.header_encode(s, cset) 36683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.header_encoding == QP: 36783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.quoprimime.header_encode(s, cset, maxlinelen=None) 36883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.header_encoding == SHORTEST: 36983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lenb64 = email.base64mime.base64_len(s) 37083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh lenqp = email.quoprimime.header_quopri_len(s) 37183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if lenb64 < lenqp: 37283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.base64mime.header_encode(s, cset) 37383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 37483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.quoprimime.header_encode(s, cset, maxlinelen=None) 37583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 37683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return s 37783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 37883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh def body_encode(self, s, convert=True): 37983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """Body-encode a string and convert it to output_charset. 38083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 38183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh If convert is True (the default), the string will be converted from 38283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh the input charset to output charset automatically. Unlike 38383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh header_encode(), there are no issues with byte boundaries and 38483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh multibyte charsets in email bodies, so this is usually pretty safe. 38583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh 38683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh The type of encoding (base64 or quoted-printable) will be based on 38783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh self.body_encoding. 38883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh """ 38983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if convert: 39083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh s = self.convert(s) 39183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh # 7bit/8bit encodings return the string unchanged (module conversions) 39283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh if self.body_encoding is BASE64: 39383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.base64mime.body_encode(s) 39483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh elif self.body_encoding is QP: 39583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return email.quoprimime.body_encode(s) 39683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh else: 39783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh return s 398