diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py index d0efa9ad6bb..933d0e2ac71 100644 --- a/Lib/_pycodecs.py +++ b/Lib/_pycodecs.py @@ -357,6 +357,145 @@ def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): return res, consumed +def STORECHAR32(ch, byteorder): + """Store a 32-bit character as 4 bytes in the specified byte order.""" + b0 = ch & 0xff + b1 = (ch >> 8) & 0xff + b2 = (ch >> 16) & 0xff + b3 = (ch >> 24) & 0xff + if byteorder == 'little': + return [b0, b1, b2, b3] + else: # big-endian + return [b3, b2, b1, b0] + + +def PyUnicode_EncodeUTF32(s, size, errors, byteorder='little'): + """Encode a Unicode string to UTF-32.""" + p = [] + bom = sys.byteorder + + if byteorder == 'native': + bom = sys.byteorder + # Add BOM for native encoding + p += STORECHAR32(0xFEFF, bom) + + if size == 0: + return [] + + if byteorder == 'little': + bom = 'little' + elif byteorder == 'big': + bom = 'big' + + for c in s: + ch = ord(c) + # UTF-32 doesn't need surrogate pairs, each character is encoded directly + p += STORECHAR32(ch, bom) + + return p + + +def utf_32_encode(obj, errors='strict'): + """UTF-32 encoding with BOM.""" + res = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'native') + res = bytes(res) + return res, len(obj) + + +def utf_32_le_encode(obj, errors='strict'): + """UTF-32 little-endian encoding without BOM.""" + res = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'little') + res = bytes(res) + return res, len(obj) + + +def utf_32_be_encode(obj, errors='strict'): + """UTF-32 big-endian encoding without BOM.""" + res = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'big') + res = bytes(res) + return res, len(obj) + + +def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder='little', final=0): + """Decode UTF-32 encoded bytes to Unicode string.""" + if size == 0: + return [], 0, 0 + + if size % 4 != 0: + if not final: + # Incomplete data, return what we can decode + size = (size // 4) * 4 + if size == 0: + return [], 0, 0 + else: + # Final data must be complete + if errors == 'strict': + raise UnicodeDecodeError('utf-32', bytes(data), size - (size % 4), size, + 'truncated data') + elif errors == 'ignore': + size = (size // 4) * 4 + elif errors == 'replace': + size = (size // 4) * 4 + + result = [] + pos = 0 + + while pos + 3 < size: + if byteorder == 'little': + ch = data[pos] | (data[pos+1] << 8) | (data[pos+2] << 16) | (data[pos+3] << 24) + else: # big-endian + ch = (data[pos] << 24) | (data[pos+1] << 16) | (data[pos+2] << 8) | data[pos+3] + + # Validate code point + if ch > 0x10FFFF: + if errors == 'strict': + raise UnicodeDecodeError('utf-32', bytes(data), pos, pos+4, + 'codepoint not in range(0x110000)') + elif errors == 'replace': + result.append('\ufffd') + # 'ignore' - skip this character + else: + result.append(chr(ch)) + + pos += 4 + + return result, pos, 0 + + +def utf_32_decode(data, errors='strict', final=0): + """UTF-32 decoding with BOM detection.""" + if len(data) >= 4: + # Check for BOM + if data[0:4] == b'\xff\xfe\x00\x00': + # UTF-32 LE BOM + res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data[4:], len(data)-4, errors, 'little', final) + res = ''.join(res) + return res, consumed + 4 + elif data[0:4] == b'\x00\x00\xfe\xff': + # UTF-32 BE BOM + res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data[4:], len(data)-4, errors, 'big', final) + res = ''.join(res) + return res, consumed + 4 + + # Default to little-endian if no BOM + byteorder = 'little' if sys.byteorder == 'little' else 'big' + res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, byteorder, final) + res = ''.join(res) + return res, consumed + + +def utf_32_le_decode(data, errors='strict', final=0): + """UTF-32 little-endian decoding without BOM.""" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'little', final) + res = ''.join(res) + return res, consumed + + +def utf_32_be_decode(data, errors='strict', final=0): + """UTF-32 big-endian decoding without BOM.""" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'big', final) + res = ''.join(res) + return res, consumed # ---------------------------------------------------------------------- @@ -677,8 +816,8 @@ def PyUnicode_AsASCIIString(unistr): if not type(unistr) == str: raise TypeError - return PyUnicode_EncodeASCII(str(unistr), - len(str), + return PyUnicode_EncodeASCII(unistr, + len(unistr), None) def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True): @@ -815,7 +954,7 @@ def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'): p += STORECHAR(0xFEFF, bom) if (size == 0): - return "" + return [] if (byteorder == 'little' ): bom = 'little' @@ -1084,7 +1223,7 @@ def PyUnicode_EncodeRawUnicodeEscape(s, size): def charmapencode_output(c, mapping): rep = mapping[c] - if isinstance(rep, int) or isinstance(rep, int): + if isinstance(rep, int): if rep < 256: return [rep] else: diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py index 0c20e27cfda..0376d7ff9b7 100644 --- a/Lib/test/test_array.py +++ b/Lib/test/test_array.py @@ -176,8 +176,6 @@ def test_numbers(self): self.assertEqual(a, b, msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase)) - # TODO: RUSTPYTHON - requires UTF-32 encoding support in codecs and proper array reconstructor implementation - @unittest.expectedFailure def test_unicode(self): teststr = "Bonne Journ\xe9e \U0002030a\U00020347" testcases = ( diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py index aaa9972bc45..8f528812e35 100644 --- a/Lib/test/test_bigmem.py +++ b/Lib/test/test_bigmem.py @@ -638,8 +638,6 @@ def test_encode_utf7(self, size): except MemoryError: pass # acceptable on 32-bit - # TODO: RUSTPYTHON - @unittest.expectedFailure @bigmemtest(size=_4G // 4 + 5, memuse=ascii_char_size + ucs4_char_size + 4) def test_encode_utf32(self, size): try: diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py index 9ca02cea351..763146c94fc 100644 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@ -281,8 +281,6 @@ def handler2(exc): b"g[<252><223>]" ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", @@ -684,8 +682,6 @@ def test_badandgoodsurrogateescapeexceptions(self): ("\udc80", 2) ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_badandgoodsurrogatepassexceptions(self): surrogatepass_errors = codecs.lookup_error('surrogatepass') # "surrogatepass" complains about a non-exception passed in diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0cd6db234c7..fabf74fd9e8 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -390,7 +390,6 @@ def test_bug1098990_b(self): ill_formed_sequence_replace = "\ufffd" - @unittest.expectedFailure # TODO: RUSTPYTHON def test_lone_surrogates(self): self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding) self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"), @@ -466,7 +465,7 @@ class UTF32Test(ReadTest, unittest.TestCase): b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m' b'\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_only_one_bom(self): _,_,reader,writer = codecs.lookup(self.encoding) # encode some stream @@ -482,7 +481,7 @@ def test_only_one_bom(self): f = reader(s) self.assertEqual(f.read(), "spamspam") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_badbom(self): s = io.BytesIO(4*b"\xff") f = codecs.getreader(self.encoding)(s) @@ -492,7 +491,7 @@ def test_badbom(self): f = codecs.getreader(self.encoding)(s) self.assertRaises(UnicodeDecodeError, f.read) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -531,19 +530,17 @@ def test_handlers(self): self.assertEqual(('', 1), codecs.utf_32_decode(b'\x01', 'ignore', True)) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode, b"\xff", "strict", True) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_decoder_state(self): self.check_state_handling_decode(self.encoding, "spamspam", self.spamle) self.check_state_handling_decode(self.encoding, "spamspam", self.spambe) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_issue8941(self): # Issue #8941: insufficient result allocation when decoding into # surrogate pairs on UCS-2 builds. @@ -555,39 +552,49 @@ def test_issue8941(self): codecs.utf_32_decode(encoded_be)[0]) @unittest.expectedFailure # TODO: RUSTPYTHON + def test_lone_surrogates(self): + return super().test_lone_surrogates() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_bug1098990_a(self): return super().test_bug1098990_a() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_bug1098990_b(self): return super().test_bug1098990_b() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_bug1175396(self): return super().test_bug1175396() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_mixed_readline_and_read(self): return super().test_mixed_readline_and_read() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_readline(self): return super().test_readline() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_readlinequeue(self): return super().test_readlinequeue() + + + + + + + class UTF32LETest(ReadTest, unittest.TestCase): encoding = "utf-32-le" ill_formed_sequence = b"\x80\xdc\x00\x00" - @unittest.expectedFailure # TODO: RUSTPYTHON def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -615,16 +622,13 @@ def test_partial(self): ] ) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple(self): self.assertEqual("\U00010203".encode(self.encoding), b"\x03\x02\x01\x00") - @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode, b"\xff", "strict", True) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_issue8941(self): # Issue #8941: insufficient result allocation when decoding into # surrogate pairs on UCS-2 builds. @@ -633,39 +637,21 @@ def test_issue8941(self): codecs.utf_32_le_decode(encoded)[0]) @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1098990_a(self): - return super().test_bug1098990_a() + def test_lone_surrogates(self): + return super().test_lone_surrogates() + - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1098990_b(self): - return super().test_bug1098990_b() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1175396(self): - return super().test_bug1175396() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_incremental_surrogatepass(self): - return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_mixed_readline_and_read(self): - return super().test_mixed_readline_and_read() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_readline(self): - return super().test_readline() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_readlinequeue(self): - return super().test_readlinequeue() class UTF32BETest(ReadTest, unittest.TestCase): encoding = "utf-32-be" ill_formed_sequence = b"\x00\x00\xdc\x80" - @unittest.expectedFailure # TODO: RUSTPYTHON def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -693,16 +679,13 @@ def test_partial(self): ] ) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_simple(self): self.assertEqual("\U00010203".encode(self.encoding), b"\x00\x01\x02\x03") - @unittest.expectedFailure # TODO: RUSTPYTHON def test_errors(self): self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode, b"\xff", "strict", True) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_issue8941(self): # Issue #8941: insufficient result allocation when decoding into # surrogate pairs on UCS-2 builds. @@ -711,32 +694,15 @@ def test_issue8941(self): codecs.utf_32_be_decode(encoded)[0]) @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1098990_a(self): - return super().test_bug1098990_a() + def test_lone_surrogates(self): + return super().test_lone_surrogates() + - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1098990_b(self): - return super().test_bug1098990_b() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_bug1175396(self): - return super().test_bug1175396() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_incremental_surrogatepass(self): - return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_mixed_readline_and_read(self): - return super().test_mixed_readline_and_read() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_readline(self): - return super().test_readline() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_readlinequeue(self): - return super().test_readlinequeue() class UTF16Test(ReadTest, unittest.TestCase): @@ -773,7 +739,7 @@ def test_badbom(self): f = codecs.getreader(self.encoding)(s) self.assertRaises(UnicodeDecodeError, f.read) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -795,7 +761,7 @@ def test_partial(self): ] ) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range def test_handlers(self): self.assertEqual(('\ufffd', 1), codecs.utf_16_decode(b'\x01', 'replace', True)) @@ -840,15 +806,20 @@ def test_invalid_modes(self): str(cm.exception)) @unittest.expectedFailure # TODO: RUSTPYTHON + def test_lone_surrogates(self): + return super().test_lone_surrogates() + + @unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() + class UTF16LETest(ReadTest, unittest.TestCase): encoding = "utf-16-le" ill_formed_sequence = b"\x80\xdc" - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -891,14 +862,19 @@ def test_nonbmp(self): "\U00010203") @unittest.expectedFailure # TODO: RUSTPYTHON + def test_lone_surrogates(self): + return super().test_lone_surrogates() + + @unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() + class UTF16BETest(ReadTest, unittest.TestCase): encoding = "utf-16-be" ill_formed_sequence = b"\xdc\x80" - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data def test_partial(self): self.check_partial( "\x00\xff\u0100\uffff\U00010000", @@ -941,9 +917,14 @@ def test_nonbmp(self): "\U00010203") @unittest.expectedFailure # TODO: RUSTPYTHON + def test_lone_surrogates(self): + return super().test_lone_surrogates() + + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'utf-16' codec can't decode bytes in position 0-1: unexpected end of data def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() + class UTF8Test(ReadTest, unittest.TestCase): encoding = "utf-8" ill_formed_sequence = b"\xed\xb2\x80" @@ -1069,7 +1050,7 @@ def test_ascii(self): b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU' b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1 def test_partial(self): self.check_partial( 'a+-b\x00c\x80d\u0100e\U00010000f', @@ -1181,13 +1162,16 @@ def test_lone_surrogates(self): def test_bug1175396(self): return super().test_bug1175396() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: expected at least 5 arguments, got 1 + def test_readline(self): + return super().test_readline() + + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: utf_7_decode() takes from 1 to 2 positional arguments but 3 were given def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_readline(self): - return super().test_readline() + + class UTF16ExTest(unittest.TestCase): @@ -1312,7 +1296,7 @@ def test_raw(self): if b != b'\\': self.assertEqual(decode(b + b'0'), (b + b'0', 2)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; + (b'[]', 4) def test_escape(self): decode = codecs.escape_decode check = coding_checker(self, decode) @@ -2293,7 +2277,7 @@ def test_basic(self): class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: big5 def test_basics(self): s = "abc123" # all codecs should be able to encode these for encoding in all_unicode_encodings: @@ -2413,7 +2397,7 @@ def test_basics_capi(self): self.assertEqual(decodedresult, s, "encoding=%r" % encoding) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: big5 def test_seek(self): # all codecs should be able to encode these s = "%s\n%s\n" % (100*"abc123", 100*"def456") @@ -2429,7 +2413,7 @@ def test_seek(self): data = reader.read() self.assertEqual(s, data) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: big5 def test_bad_decode_args(self): for encoding in all_unicode_encodings: decoder = codecs.getdecoder(encoding) @@ -2437,7 +2421,7 @@ def test_bad_decode_args(self): if encoding not in ("idna", "punycode"): self.assertRaises(TypeError, decoder, 42) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: big5 def test_bad_encode_args(self): for encoding in all_unicode_encodings: encoder = codecs.getencoder(encoding) @@ -2449,7 +2433,7 @@ def test_encoding_map_type_initialized(self): table_type = type(cp1140.encoding_table) self.assertEqual(table_type, table_type) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: big5 def test_decoder_state(self): # Check that getstate() and setstate() handle the state properly u = "abc123" @@ -2460,7 +2444,7 @@ def test_decoder_state(self): class CharmapTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range def test_decode_with_string_map(self): self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"), @@ -2516,7 +2500,7 @@ def test_decode_with_string_map(self): ("", len(allbytes)) ) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: UnicodeDecodeError not raised by charmap_decode def test_decode_with_int2str_map(self): self.assertEqual( codecs.charmap_decode(b"\x00\x01\x02", "strict", @@ -2633,7 +2617,7 @@ def test_decode_with_int2str_map(self): b"\x00\x01\x02", "strict", {0: "A", 1: 'Bb', 2: 999999999} ) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: character mapping must be in range(65536) def test_decode_with_int2int_map(self): a = ord('a') b = ord('b') @@ -2726,7 +2710,7 @@ def test_streamreaderwriter(self): class TypesTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_decode_unicode(self): # Most decoders don't accept unicode input decoders = [ @@ -2918,14 +2902,16 @@ def test_partial(self): ] ) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; IndexError: index out of range def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'unicodeescape' codec can't decode bytes in position 72-75: truncated \uXXXX escape def test_readline(self): return super().test_readline() + + class RawUnicodeEscapeTest(ReadTest, unittest.TestCase): encoding = "raw-unicode-escape" @@ -2979,7 +2965,7 @@ def test_decode_errors(self): self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10)) self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; - \ def test_partial(self): self.check_partial( "\x00\t\n\r\\\xff\uffff\U00010000", @@ -3009,15 +2995,17 @@ def test_partial(self): ] ) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; - \ def test_incremental_surrogatepass(self): return super().test_incremental_surrogatepass() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; UnicodeDecodeError: 'rawunicodeescape' codec can't decode bytes in position 72-76: truncated \uXXXX def test_readline(self): return super().test_readline() + + class EscapeEncodeTest(unittest.TestCase): def test_escape_encode(self): @@ -3059,7 +3047,7 @@ def test_ascii(self): self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"), b"foo\x80bar") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; Result: FAILURE def test_charmap(self): # bad byte: \xa5 is unmapped in iso-8859-3 self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"), @@ -3074,7 +3062,7 @@ def test_latin1(self): class BomTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_seek0(self): data = "1234567890" tests = ("utf-16", @@ -3253,7 +3241,7 @@ def test_binary_to_text_denylists_text_transforms(self): bad_input.decode("rot_13") self.assertIsNone(failure.exception.__cause__) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'error' object has no attribute '__notes__'. Did you mean: '__ne__'? @unittest.skipUnless(zlib, "Requires zlib support") def test_custom_zlib_error_is_noted(self): # Check zlib codec gives a good error for malformed input @@ -3350,7 +3338,6 @@ def raise_obj(self, *args, **kwds): # Helper to dynamically change the object raised by a test codec raise self.obj_to_raise - @unittest.expectedFailure # TODO: RUSTPYTHON def check_note(self, obj_to_raise, msg, exc_type=RuntimeError): self.obj_to_raise = obj_to_raise self.set_codec(self.raise_obj, self.raise_obj) @@ -3363,55 +3350,55 @@ def check_note(self, obj_to_raise, msg, exc_type=RuntimeError): with self.assertNoted("decoding", exc_type, msg): codecs.decode(b"bytes input", self.codec_name) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_raise_by_type(self): self.check_note(RuntimeError, "") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_raise_by_value(self): msg = "This should be noted" self.check_note(RuntimeError(msg), msg) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'MyRuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_raise_grandchild_subclass_exact_size(self): msg = "This should be noted" class MyRuntimeError(RuntimeError): __slots__ = () self.check_note(MyRuntimeError(msg), msg, MyRuntimeError) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'MyRuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_raise_subclass_with_weakref_support(self): msg = "This should be noted" class MyRuntimeError(RuntimeError): pass self.check_note(MyRuntimeError(msg), msg, MyRuntimeError) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'CustomInit' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_init_override(self): class CustomInit(RuntimeError): def __init__(self): pass self.check_note(CustomInit, "") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'CustomNew' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_new_override(self): class CustomNew(RuntimeError): def __new__(cls): return super().__new__(cls) self.check_note(CustomNew, "") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_instance_attribute(self): msg = "This should be noted" exc = RuntimeError(msg) exc.attr = 1 self.check_note(exc, "^{}$".format(msg)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_non_str_arg(self): self.check_note(RuntimeError(1), "1") - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'? def test_multiple_args(self): msg_re = r"^\('a', 'b', 'c'\)$" self.check_note(RuntimeError('a', 'b', 'c'), msg_re) @@ -3428,7 +3415,7 @@ def test_codec_lookup_failure(self): with self.assertRaisesRegex(LookupError, msg): codecs.decode(b"bytes input", self.codec_name) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: "^'exception_notes_test' encoder returned 'str' instead of 'bytes'; use codecs.encode\(\) to encode to arbitrary types$" does not match "'exception_notes_test' encoder returned 'str' instead of 'bytes'; use codecs.encode() to encode arbitrary types" def test_unflagged_non_text_codec_handling(self): # The stdlib non-text codecs are now marked so they're # pre-emptively skipped by the text model related methods @@ -3464,14 +3451,14 @@ def decode_to_bytes(*args, **kwds): class CodePageTest(unittest.TestCase): CP_UTF8 = 65001 - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_invalid_code_page(self): self.assertRaises(ValueError, codecs.code_page_encode, -1, 'a') self.assertRaises(ValueError, codecs.code_page_decode, -1, b'a') self.assertRaises(OSError, codecs.code_page_encode, 123, 'a') self.assertRaises(OSError, codecs.code_page_decode, 123, b'a') - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_code_page_name(self): self.assertRaisesRegex(UnicodeEncodeError, 'cp932', codecs.code_page_encode, 932, '\xff') @@ -3538,7 +3525,7 @@ def check_encode(self, cp, tests): self.assertRaises(UnicodeEncodeError, text.encode, f'cp{cp}', errors) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_cp932(self): self.check_encode(932, ( ('abc', 'strict', b'abc'), @@ -3573,7 +3560,7 @@ def test_cp932(self): (b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'), )) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_cp1252(self): self.check_encode(1252, ( ('abc', 'strict', b'abc'), @@ -3647,7 +3634,7 @@ def test_cp20106(self): (b'(\xbf)', 'surrogatepass', None), )) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_cp_utf7(self): cp = 65000 self.check_encode(cp, ( @@ -3668,7 +3655,7 @@ def test_cp_utf7(self): (b'[\xff]', 'strict', '[\xff]'), )) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_multibyte_encoding(self): self.check_decode(932, ( (b'\x84\xe9\x80', 'ignore', '\u9a3e'), @@ -3683,7 +3670,7 @@ def test_multibyte_encoding(self): ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'), )) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_code_page_decode_flags(self): # Issue #36312: For some code pages (e.g. UTF-7) flags for # MultiByteToWideChar() must be set to 0. @@ -3703,7 +3690,7 @@ def test_code_page_decode_flags(self): self.assertEqual(codecs.code_page_decode(42, b'abc'), ('\uf061\uf062\uf063', 3)) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_incremental(self): decoded = codecs.code_page_decode(932, b'\x82', 'strict', False) self.assertEqual(decoded, ('', 0)) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 15491560b52..5fd011360f0 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -780,8 +780,8 @@ def test_closefd_attr(self): file = self.open(f.fileno(), "r", encoding="utf-8", closefd=False) self.assertEqual(file.buffer.raw.closefd, False) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: filter ('', ResourceWarning) did not catch any warning @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. @@ -1803,8 +1803,8 @@ def test_misbehaved_io_read(self): # checking this is not so easy. self.assertRaises(OSError, bufio.read, 10) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: filter ('', ResourceWarning) did not catch any warning @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead @@ -1839,14 +1839,14 @@ def test_bad_readinto_type(self): bufio.readline() self.assertIsInstance(cm.exception.__cause__, TypeError) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_error_through_destructor(self): - return super().test_error_through_destructor() - - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_pickling_subclass(self): return super().test_pickling_subclass() + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'NoneType' object has no attribute 'exc_type' + def test_error_through_destructor(self): + return super().test_error_through_destructor() + class PyBufferedReaderTest(BufferedReaderTest): tp = pyio.BufferedReader @@ -2161,8 +2161,8 @@ def test_initialization(self): self.assertRaises(ValueError, bufio.__init__, rawio, buffer_size=-1) self.assertRaises(ValueError, bufio.write, b"def") + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: filter ('', ResourceWarning) did not catch any warning @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): # C BufferedWriter objects are collected, and collecting them flushes # all data to disk. @@ -2185,14 +2185,14 @@ def test_args_error(self): with self.assertRaisesRegex(TypeError, "BufferedWriter"): self.tp(self.BytesIO(), 1024, 1024, 1024) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_error_through_destructor(self): - return super().test_error_through_destructor() - - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_pickling_subclass(self): return super().test_pickling_subclass() + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'NoneType' object has no attribute 'exc_type' + def test_error_through_destructor(self): + return super().test_error_through_destructor() + class PyBufferedWriterTest(BufferedWriterTest): tp = pyio.BufferedWriter @@ -2669,8 +2669,8 @@ def test_interleaved_readline_write(self): class CBufferedRandomTest(BufferedRandomTest, SizeofTest): tp = io.BufferedRandom + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: filter ('', ResourceWarning) did not catch any warning @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): CBufferedReaderTest.test_garbage_collection(self) CBufferedWriterTest.test_garbage_collection(self) @@ -2680,14 +2680,14 @@ def test_args_error(self): with self.assertRaisesRegex(TypeError, "BufferedRandom"): self.tp(self.BytesIO(), 1024, 1024, 1024) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_error_through_destructor(self): - return super().test_error_through_destructor() - - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON def test_pickling_subclass(self): return super().test_pickling_subclass() + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'NoneType' object has no attribute 'exc_type' + def test_error_through_destructor(self): + return super().test_error_through_destructor() + class PyBufferedRandomTest(BufferedRandomTest): tp = pyio.BufferedRandom @@ -2847,6 +2847,7 @@ def setUp(self): def tearDown(self): os_helper.unlink(os_helper.TESTFN) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: UnicodeEncodeError not raised def test_constructor(self): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) @@ -3069,6 +3070,7 @@ def test_encoding_errors_writing(self): t.flush() self.assertEqual(b.getvalue(), b"abc?def\n") + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_newlines(self): input_lines = [ "unix\n", "windows\r\n", "os9\r", "last\n", "nonl" ] @@ -3340,7 +3342,7 @@ def test_seek_and_tell_with_data(data, min_pos=0): finally: StatefulIncrementalDecoder.codecEnabled = 0 - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: euc_jp def test_multibyte_seek_and_tell(self): f = self.open(os_helper.TESTFN, "w", encoding="euc_jp") f.write("AB\n\u3046\u3048\n") @@ -3387,7 +3389,7 @@ def test_seek_with_encoder_state(self): self.assertEqual(f.readline(), "\u00e6\u0300\u0300") f.close() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_encoded_writes(self): data = "1234567890" tests = ("utf-16", @@ -3526,7 +3528,6 @@ def test_issue2282(self): self.assertEqual(buffer.seekable(), txt.seekable()) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_append_bom(self): # The BOM is not written again when appending to a non-empty file filename = os_helper.TESTFN @@ -3542,7 +3543,6 @@ def test_append_bom(self): with self.open(filename, 'rb') as f: self.assertEqual(f.read(), 'aaaxxx'.encode(charset)) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_seek_bom(self): # Same test, but when seeking manually filename = os_helper.TESTFN @@ -3558,7 +3558,6 @@ def test_seek_bom(self): with self.open(filename, 'rb') as f: self.assertEqual(f.read(), 'bbbzzz'.encode(charset)) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_seek_append_bom(self): # Same test, but first seek to the start and then to the end filename = os_helper.TESTFN @@ -3826,7 +3825,7 @@ def __del__(self): """.format(iomod=iomod, kwargs=kwargs) return assert_python_ok("-c", code) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: 'LookupError: unknown encoding: ascii' not found in "Exception ignored in: \nAttributeError: 'NoneType' object has no attribute 'TextIOWrapper'\n" def test_create_at_shutdown_without_encoding(self): rc, out, err = self._check_create_at_shutdown() if err: @@ -3836,7 +3835,7 @@ def test_create_at_shutdown_without_encoding(self): else: self.assertEqual("ok", out.decode().strip()) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b"Exception ignored in: \nAttributeError: 'NoneType' object has no attribute 'TextIOWrapper'\n" is not false def test_create_at_shutdown_with_encoding(self): rc, out, err = self._check_create_at_shutdown(encoding='utf-8', errors='strict') @@ -4108,7 +4107,7 @@ class CTextIOWrapperTest(TextIOWrapperTest): io = io shutdown_error = "LookupError: unknown encoding: ascii" - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: ValueError not raised by read def test_initialization(self): r = self.BytesIO(b"\xc3\xa9\n\n") b = self.BufferedReader(r, 1000) @@ -4119,8 +4118,8 @@ def test_initialization(self): t = self.TextIOWrapper.__new__(self.TextIOWrapper) self.assertRaises(Exception, repr, t) + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: filter ('', ResourceWarning) did not catch any warning @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') - @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. @@ -4184,7 +4183,7 @@ def write(self, data): t.write("x"*chunk_size) self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; RuntimeError: reentrant call inside textio def test_issue119506(self): chunk_size = 8192 @@ -4207,78 +4206,74 @@ def write(self, data): self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size], buf._write_stack) - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_constructor(self): - return super().test_constructor() + # TODO: RUSTPYTHON; euc_jis_2004 encoding not supported + @unittest.expectedFailure + def test_seek_with_encoder_state(self): + return super().test_seek_with_encoder_state() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_newlines(self): - return super().test_newlines() + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_pickling_subclass(self): + return super().test_pickling_subclass() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; + + def test_reconfigure_newline(self): + return super().test_reconfigure_newline() + + @unittest.expectedFailure # TODO: RUSTPYTHON; + ['AAA\nBB\x00B\nCCC\r', 'DDD\r', 'EEE\r', '\nFFF\r', '\nGGG'] def test_newlines_input(self): return super().test_newlines_input() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; + strict + def test_reconfigure_defaults(self): + return super().test_reconfigure_defaults() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: LookupError not raised def test_non_text_encoding_codecs_are_rejected(self): return super().test_non_text_encoding_codecs_are_rejected() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_defaults(self): - return super().test_reconfigure_defaults() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: Regex didn't match: "<(_io\\.)?TextIOWrapper name='dummy' mode='r' encoding='utf-8'>" not found in "<_io.TextIOWrapper name='dummy' encoding='utf-8'>" + def test_repr(self): + return super().test_repr() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_encoding_read(self): - return super().test_reconfigure_encoding_read() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: RuntimeError not raised + def test_recursive_repr(self): + return super().test_recursive_repr() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: UnicodeEncodeError not raised def test_reconfigure_errors(self): return super().test_reconfigure_errors() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_line_buffering(self): - return super().test_reconfigure_line_buffering() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: UnsupportedOperation not raised + def test_reconfigure_encoding_read(self): + return super().test_reconfigure_encoding_read() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_locale(self): - return super().test_reconfigure_locale() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'' != b'1' + def test_reconfigure_write_through(self): + return super().test_reconfigure_write_through() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_newline(self): - return super().test_reconfigure_newline() + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'' != b'AB\nC' + def test_reconfigure_line_buffering(self): + return super().test_reconfigure_line_buffering() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'' != b'abc\xe9\n' def test_reconfigure_write(self): return super().test_reconfigure_write() - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'\xef\xbb\xbfaaa\xef\xbb\xbfxxx' != b'\xef\xbb\xbfaaaxxx' + def test_append_bom(self): + return super().test_append_bom() + + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'foo\n\xef\xbb\xbf\xc3\xa9\n' != b'foo\n\xc3\xa9\n' def test_reconfigure_write_fromascii(self): return super().test_reconfigure_write_fromascii() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_reconfigure_write_through(self): - return super().test_reconfigure_write_through() - - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: 'NoneType' object has no attribute 'exc_type' def test_error_through_destructor(self): return super().test_error_through_destructor() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_repr(self): - return super().test_repr() - - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_recursive_repr(self): - return super().test_recursive_repr() - - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_pickling_subclass(self): - return super().test_pickling_subclass() - - # TODO: RUSTPYTHON; euc_jis_2004 encoding not supported - @unittest.expectedFailure - def test_seek_with_encoder_state(self): - return super().test_seek_with_encoder_state() + @unittest.expectedFailure # TODO: RUSTPYTHON; LookupError: unknown encoding: locale + def test_reconfigure_locale(self): + return super().test_reconfigure_locale() class PyTextIOWrapperTest(TextIOWrapperTest): @@ -4289,10 +4284,6 @@ class PyTextIOWrapperTest(TextIOWrapperTest): def test_constructor(self): return super().test_constructor() - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_newlines(self): - return super().test_newlines() - # TODO: RUSTPYTHON; euc_jis_2004 encoding not supported @unittest.expectedFailure def test_seek_with_encoder_state(self): @@ -4376,7 +4367,7 @@ def _decode_bytewise(s): self.assertEqual(decoder.decode(input), "abc") self.assertEqual(decoder.newlines, None) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AttributeError: module 'codecs' has no attribute 'utf_32_ex_decode'. Did you mean: 'utf_16_ex_decode'? def test_newline_decoder(self): encodings = ( # None meaning the IncrementalNewlineDecoder takes unicode input @@ -4797,7 +4788,7 @@ def test_check_encoding_warning(self): self.assertTrue( warnings[1].startswith(b":8: EncodingWarning: ")) - @unittest.expectedFailure # TODO: RUSTPYTHON + @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: b'locale' != b'utf-8' def test_text_encoding(self): # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8" # based on sys.flags.utf8_mode diff --git a/Lib/test/test_json/test_unicode.py b/Lib/test/test_json/test_unicode.py index 2118c9827ea..c1fba019ccc 100644 --- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -94,7 +94,6 @@ def test_bytes_encode(self): self.assertRaises(TypeError, self.dumps, b"hi") self.assertRaises(TypeError, self.dumps, [b"hi"]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_bytes_decode(self): for encoding, bom in [ ('utf-8', codecs.BOM_UTF8), diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 9eee1797d48..d95c7857d98 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1291,7 +1291,6 @@ def test_universal_newlines_communicate_stdin_stdout_stderr(self): # to stderr at exit of subprocess. self.assertTrue(stderr.startswith("eline2\neline6\neline7\n")) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_universal_newlines_communicate_encodings(self): # Check that universal newlines mode works for various encodings, # in particular for encodings in the UTF-16 and UTF-32 families. diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 2281f48ce25..9d6d39307ff 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -4094,7 +4094,6 @@ def f(): e[:1] = (f() for i in range(2)) class IOTest(unittest.TestCase): - @unittest.expectedFailure # TODO: RUSTPYTHON def test_encoding(self): # Test encoding issues. elem = ET.Element("tag") diff --git a/crates/vm/src/stdlib/codecs.rs b/crates/vm/src/stdlib/codecs.rs index 1b728386671..bc9029cb71a 100644 --- a/crates/vm/src/stdlib/codecs.rs +++ b/crates/vm/src/stdlib/codecs.rs @@ -705,5 +705,28 @@ mod _codecs { fn utf_16_ex_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { delegate_pycodecs!(utf_16_ex_decode, args, vm) } - // TODO: utf-32 functions + #[pyfunction] + fn utf_32_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_encode, args, vm) + } + #[pyfunction] + fn utf_32_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_decode, args, vm) + } + #[pyfunction] + fn utf_32_le_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_le_encode, args, vm) + } + #[pyfunction] + fn utf_32_le_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_le_decode, args, vm) + } + #[pyfunction] + fn utf_32_be_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_be_encode, args, vm) + } + #[pyfunction] + fn utf_32_be_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult { + delegate_pycodecs!(utf_32_be_decode, args, vm) + } }