diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py
index 4068bd56693..98dec3c614d 100644
--- a/Lib/_pycodecs.py
+++ b/Lib/_pycodecs.py
@@ -22,10 +22,10 @@
 
    The builtin Unicode codecs use the following interface:
 
-     <encoding>_encode(Unicode_object[,errors='strict']) -> 
+     <encoding>_encode(Unicode_object[,errors='strict']) ->
          (string object, bytes consumed)
 
-     <encoding>_decode(char_buffer_obj[,errors='strict']) -> 
+     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
         (Unicode object, bytes consumed)
 
    <encoding>_encode() interfaces also accept non-Unicode object as
@@ -44,48 +44,76 @@
 From PyPy v1.0.0
 
 """
-#from unicodecodec import *
-
-__all__ = ['register', 'lookup', 'lookup_error', 'register_error', 'encode', 'decode',
-           'latin_1_encode', 'mbcs_decode', 'readbuffer_encode', 'escape_encode',
-           'utf_8_decode', 'raw_unicode_escape_decode', 'utf_7_decode',
-           'unicode_escape_encode', 'latin_1_decode', 'utf_16_decode',
-           'unicode_escape_decode', 'ascii_decode', 'charmap_encode', 'charmap_build',
-           'unicode_internal_encode', 'unicode_internal_decode', 'utf_16_ex_decode',
-           'escape_decode', 'charmap_decode', 'utf_7_encode', 'mbcs_encode',
-           'ascii_encode', 'utf_16_encode', 'raw_unicode_escape_encode', 'utf_8_encode',
-           'utf_16_le_encode', 'utf_16_be_encode', 'utf_16_le_decode', 'utf_16_be_decode',
-           'utf_32_ex_decode',]
+# from unicodecodec import *
+
+__all__ = [
+    "register",
+    "lookup",
+    "lookup_error",
+    "register_error",
+    "encode",
+    "decode",
+    "latin_1_encode",
+    "mbcs_decode",
+    "readbuffer_encode",
+    "escape_encode",
+    "utf_8_decode",
+    "raw_unicode_escape_decode",
+    "utf_7_decode",
+    "unicode_escape_encode",
+    "latin_1_decode",
+    "utf_16_decode",
+    "unicode_escape_decode",
+    "ascii_decode",
+    "charmap_encode",
+    "charmap_build",
+    "unicode_internal_encode",
+    "unicode_internal_decode",
+    "utf_16_ex_decode",
+    "escape_decode",
+    "charmap_decode",
+    "utf_7_encode",
+    "mbcs_encode",
+    "ascii_encode",
+    "utf_16_encode",
+    "raw_unicode_escape_encode",
+    "utf_8_encode",
+    "utf_16_le_encode",
+    "utf_16_be_encode",
+    "utf_16_le_decode",
+    "utf_16_be_decode",
+    "utf_32_ex_decode",
+]
 
 import sys
 import warnings
 from _codecs import *
 
 
-def latin_1_encode( obj, errors='strict'):
-    """None
-    """
+def latin_1_encode(obj, errors="strict"):
+    """None"""
     res = PyUnicode_EncodeLatin1(obj, len(obj), errors)
     res = bytes(res)
     return res, len(obj)
+
+
 # XXX MBCS codec might involve ctypes ?
 def mbcs_decode():
-    """None
-    """
+    """None"""
     pass
 
-def readbuffer_encode( obj, errors='strict'):
-    """None
-    """
+
+def readbuffer_encode(obj, errors="strict"):
+    """None"""
     if isinstance(obj, str):
         res = obj.encode()
     else:
         res = bytes(obj)
     return res, len(obj)
 
-def escape_encode( obj, errors='strict'):
-    """None
-    """
+
+def escape_encode(obj, errors="strict"):
+    """None"""
     if not isinstance(obj, bytes):
         raise TypeError("must be bytes")
     s = repr(obj).encode()
@@ -94,85 +122,88 @@ def escape_encode( obj, errors='strict'):
         v = v.replace(b"'", b"\\'").replace(b'\\"', b'"')
     return v, len(obj)
 
-def raw_unicode_escape_decode( data, errors='strict', final=False):
-    """None
-    """
-    res = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors, final)
-    res = ''.join(res)
-    return res, len(data)
 
-def utf_7_decode( data, errors='strict', final=False):
-    """None
-    """
+def raw_unicode_escape_decode(data, errors="strict", final=True):
+    """None"""
+    res, consumed = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors, final)
+    res = "".join(res)
+    return res, consumed
+
+
+def utf_7_decode(data, errors="strict", final=False):
+    """None"""
     res, consumed = PyUnicode_DecodeUTF7(data, len(data), errors, final)
-    res = ''.join(res)
+    res = "".join(res)
     return res, consumed
 
-def unicode_escape_encode( obj, errors='strict'):
-    """None
-    """
+
+def unicode_escape_encode(obj, errors="strict"):
+    """None"""
     res = unicodeescape_string(obj, len(obj), 0)
-    res = b''.join(res)
+    res = b"".join(res)
     return res, len(obj)
 
-def latin_1_decode( data, errors='strict'):
-    """None
-    """
+
+def latin_1_decode(data, errors="strict"):
+    """None"""
     res = PyUnicode_DecodeLatin1(data, len(data), errors)
-    res = ''.join(res)
+    res = "".join(res)
     return res, len(data)
 
-def utf_16_decode( data, errors='strict', final=False):
-    """None
-    """
+
+def utf_16_decode(data, errors="strict", final=False):
+    """None"""
     consumed = len(data)
     if final:
         consumed = 0
-    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'native', final)
-    res = ''.join(res)
+    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(
+        data, len(data), errors, "native", final
+    )
+    res = "".join(res)
     return res, consumed
 
-def unicode_escape_decode( data, errors='strict', final=False):
-    """None
-    """
-    res = PyUnicode_DecodeUnicodeEscape(data, len(data), errors, final)
-    res = ''.join(res)
-    return res, len(data)
+
+def unicode_escape_decode(data, errors="strict", final=True):
+    """None"""
+    res, consumed = PyUnicode_DecodeUnicodeEscape(data, len(data), errors, final)
+    res = "".join(res)
+    return res, consumed
 
 
-def ascii_decode( data, errors='strict'):
-    """None
-    """
+def ascii_decode(data, errors="strict"):
+    """None"""
     res = PyUnicode_DecodeASCII(data, len(data), errors)
-    res = ''.join(res)
+    res = "".join(res)
     return res, len(data)
 
-def charmap_encode(obj, errors='strict', mapping='latin-1'):
-    """None
-    """
+
+def charmap_encode(obj, errors="strict", mapping="latin-1"):
+    """None"""
 
     res = PyUnicode_EncodeCharmap(obj, len(obj), mapping, errors)
     res = bytes(res)
     return res, len(obj)
 
+
 def charmap_build(s):
     return {ord(c): i for i, c in enumerate(s)}
 
+
 if sys.maxunicode == 65535:
     unicode_bytes = 2
 else:
     unicode_bytes = 4
 
-def unicode_internal_encode( obj, errors='strict'):
-    """None
-    """
+
+def unicode_internal_encode(obj, errors="strict"):
+    """None"""
     if type(obj) == str:
         p = bytearray()
         t = [ord(x) for x in obj]
         for i in t:
             b = bytearray()
             for j in range(unicode_bytes):
-                b.append(i%256)
+                b.append(i % 256)
                 i >>= 8
             if sys.byteorder == "big":
                 b.reverse()
@@ -180,12 +211,12 @@ def unicode_internal_encode( obj, errors='strict'):
         res = bytes(p)
         return res, len(res)
     else:
-        res = "You can do better than this" # XXX make this right
+        res = "You can do better than this"  # XXX make this right
         return res, len(res)
 
-def unicode_internal_decode( unistr, errors='strict'):
-    """None
-    """
+
+def unicode_internal_decode(unistr, errors="strict"):
+    """None"""
     if type(unistr) == str:
         return unistr, len(unistr)
     else:
@@ -199,232 +230,281 @@ def unicode_internal_decode( unistr, errors='strict'):
             start = 0
             stop = unicode_bytes
             step = 1
-        while i < len(unistr)-unicode_bytes+1:
+        while i < len(unistr) - unicode_bytes + 1:
             t = 0
             h = 0
             for j in range(start, stop, step):
-                t += ord(unistr[i+j])<<(h*8)
+                t += ord(unistr[i + j]) << (h * 8)
                 h += 1
             i += unicode_bytes
             p += chr(t)
-        res = ''.join(p)
+        res = "".join(p)
         return res, len(res)
 
-def utf_16_ex_decode( data, errors='strict', byteorder=0, final=0):
-    """None
-    """
+
+def utf_16_ex_decode(data, errors="strict", byteorder=0, final=0):
+    """None"""
     if byteorder == 0:
-        bm = 'native'
+        bm = "native"
     elif byteorder == -1:
-        bm = 'little'
+        bm = "little"
     else:
-        bm = 'big'
+        bm = "big"
     consumed = len(data)
     if final:
         consumed = 0
-    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, bm, final)
-    res = ''.join(res)
+    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(
+        data, len(data), errors, bm, final
+    )
+    res = "".join(res)
     return res, consumed, byteorder
 
-def utf_32_ex_decode( data, errors='strict', byteorder=0, final=0):
-    """None
-    """
+
+def utf_32_ex_decode(data, errors="strict", byteorder=0, final=0):
+    """None"""
     if byteorder == 0:
         if len(data) < 4:
             if final and len(data):
-                if sys.byteorder == 'little':
-                    bm = 'little'
+                if sys.byteorder == "little":
+                    bm = "little"
                 else:
-                    bm = 'big'
+                    bm = "big"
                 res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
                     data, len(data), errors, bm, final
                 )
-                return ''.join(res), consumed, 0
-            return '', 0, 0
-        if data[0:4] == b'\xff\xfe\x00\x00':
+                return "".join(res), consumed, 0
+            return "", 0, 0
+        if data[0:4] == b"\xff\xfe\x00\x00":
             res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
-                data[4:], len(data) - 4, errors, 'little', final
+                data[4:], len(data) - 4, errors, "little", final
             )
-            return ''.join(res), consumed + 4, -1
-        if data[0:4] == b'\x00\x00\xfe\xff':
+            return "".join(res), consumed + 4, -1
+        if data[0:4] == b"\x00\x00\xfe\xff":
             res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
-                data[4:], len(data) - 4, errors, 'big', final
+                data[4:], len(data) - 4, errors, "big", final
             )
-            return ''.join(res), consumed + 4, 1
-        if sys.byteorder == 'little':
-            bm = 'little'
+            return "".join(res), consumed + 4, 1
+        if sys.byteorder == "little":
+            bm = "little"
         else:
-            bm = 'big'
-        res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, bm, final)
-        return ''.join(res), consumed, 0
+            bm = "big"
+        res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+            data, len(data), errors, bm, final
+        )
+        return "".join(res), consumed, 0
 
     if byteorder == -1:
-        res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'little', final)
-        return ''.join(res), consumed, -1
+        res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+            data, len(data), errors, "little", final
+        )
+        return "".join(res), consumed, -1
+
+    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+        data, len(data), errors, "big", final
+    )
+    return "".join(res), consumed, 1
+
 
-    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'big', final)
-    return ''.join(res), consumed, 1
+def _is_hex_digit(b):
+    return (
+        0x30 <= b <= 0x39  # 0-9
+        or 0x41 <= b <= 0x46  # A-F
+        or 0x61 <= b <= 0x66
+    )  # a-f
 
-# XXX needs error messages when the input is invalid
-def escape_decode(data, errors='strict'):
-    """None
-    """
+
+def escape_decode(data, errors="strict"):
+    if isinstance(data, str):
+        data = data.encode("latin-1")
     l = len(data)
     i = 0
     res = bytearray()
     while i < l:
-        
-        if data[i] == '\\':
+        if data[i] == 0x5C:  # '\\'
             i += 1
             if i >= l:
                 raise ValueError("Trailing \\ in string")
-            else:
-                if data[i] == '\\':
-                    res += b'\\'
-                elif data[i] == 'n':
-                    res += b'\n'
-                elif data[i] == 't':
-                    res += b'\t'
-                elif data[i] == 'r':
-                    res += b'\r'
-                elif data[i] == 'b':
-                    res += b'\b'
-                elif data[i] == '\'':
-                    res += b'\''
-                elif data[i] == '\"':
-                    res += b'\"'
-                elif data[i] == 'f':
-                    res += b'\f'
-                elif data[i] == 'a':
-                    res += b'\a'
-                elif data[i] == 'v':
-                    res += b'\v'
-                elif '0' <= data[i] <= '9':
-                    # emulate a strange wrap-around behavior of CPython:
-                    # \400 is the same as \000 because 0400 == 256
-                    octal = data[i:i+3]
-                    res.append(int(octal, 8) & 0xFF)
-                    i += 2
-                elif data[i] == 'x':
-                    hexa = data[i+1:i+3]
-                    res.append(int(hexa, 16))
+            ch = data[i]
+            if ch == 0x5C:
+                res.append(0x5C)  # \\
+            elif ch == 0x27:
+                res.append(0x27)  # \'
+            elif ch == 0x22:
+                res.append(0x22)  # \"
+            elif ch == 0x61:
+                res.append(0x07)  # \a
+            elif ch == 0x62:
+                res.append(0x08)  # \b
+            elif ch == 0x66:
+                res.append(0x0C)  # \f
+            elif ch == 0x6E:
+                res.append(0x0A)  # \n
+            elif ch == 0x72:
+                res.append(0x0D)  # \r
+            elif ch == 0x74:
+                res.append(0x09)  # \t
+            elif ch == 0x76:
+                res.append(0x0B)  # \v
+            elif ch == 0x0A:
+                pass  # \<newline> continuation
+            elif 0x30 <= ch <= 0x37:  # \0-\7 octal
+                val = ch - 0x30
+                if i + 1 < l and 0x30 <= data[i + 1] <= 0x37:
+                    i += 1
+                    val = (val << 3) | (data[i] - 0x30)
+                    if i + 1 < l and 0x30 <= data[i + 1] <= 0x37:
+                        i += 1
+                        val = (val << 3) | (data[i] - 0x30)
+                res.append(val & 0xFF)
+            elif ch == 0x78:  # \x hex
+                hex_count = 0
+                for j in range(1, 3):
+                    if i + j < l and _is_hex_digit(data[i + j]):
+                        hex_count += 1
+                    else:
+                        break
+                if hex_count < 2:
+                    if errors == "strict":
+                        raise ValueError("invalid \\x escape at position %d" % (i - 1))
+                    elif errors == "replace":
+                        res.append(0x3F)  # '?'
+                    i += hex_count
+                else:
+                    res.append(int(bytes(data[i + 1 : i + 3]), 16))
                     i += 2
+            else:
+                import warnings
+
+                warnings.warn(
+                    '"\\%c" is an invalid escape sequence' % ch
+                    if 0x20 <= ch < 0x7F
+                    else '"\\x%02x" is an invalid escape sequence' % ch,
+                    DeprecationWarning,
+                    stacklevel=2,
+                )
+                res.append(0x5C)
+                res.append(ch)
         else:
             res.append(data[i])
         i += 1
-    res = bytes(res)
-    return res, len(res)
+    return bytes(res), l
+
 
-def charmap_decode( data, errors='strict', mapping=None):
-    """None
-    """
+def charmap_decode(data, errors="strict", mapping=None):
+    """None"""
     res = PyUnicode_DecodeCharmap(data, len(data), mapping, errors)
-    res = ''.join(res)
+    res = "".join(res)
     return res, len(data)
 
 
-def utf_7_encode( obj, errors='strict'):
-    """None
-    """
+def utf_7_encode(obj, errors="strict"):
+    """None"""
     res = PyUnicode_EncodeUTF7(obj, len(obj), 0, 0, errors)
-    res = b''.join(res)
+    res = b"".join(res)
     return res, len(obj)
 
-def mbcs_encode( obj, errors='strict'):
-    """None
-    """
+
+def mbcs_encode(obj, errors="strict"):
+    """None"""
     pass
+
+
 ##    return (PyUnicode_EncodeMBCS(
-##                             (obj), 
+##                             (obj),
 ##                             len(obj),
 ##                             errors),
 ##                  len(obj))
-    
 
-def ascii_encode( obj, errors='strict'):
-    """None
-    """
+
+def ascii_encode(obj, errors="strict"):
+    """None"""
     res = PyUnicode_EncodeASCII(obj, len(obj), errors)
     res = bytes(res)
     return res, len(obj)
 
-def utf_16_encode( obj, errors='strict'):
-    """None
-    """
-    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'native')
+
+def utf_16_encode(obj, errors="strict"):
+    """None"""
+    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "native")
     res = bytes(res)
     return res, len(obj)
 
-def raw_unicode_escape_encode( obj, errors='strict'):
-    """None
-    """
+
+def raw_unicode_escape_encode(obj, errors="strict"):
+    """None"""
     res = PyUnicode_EncodeRawUnicodeEscape(obj, len(obj))
     res = bytes(res)
     return res, len(obj)
 
-def utf_16_le_encode( obj, errors='strict'):
-    """None
-    """
-    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'little')
+
+def utf_16_le_encode(obj, errors="strict"):
+    """None"""
+    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "little")
     res = bytes(res)
     return res, len(obj)
 
-def utf_16_be_encode( obj, errors='strict'):
-    """None
-    """
-    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'big')
+
+def utf_16_be_encode(obj, errors="strict"):
+    """None"""
+    res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "big")
     res = bytes(res)
     return res, len(obj)
 
-def utf_16_le_decode(data, errors='strict', final=0):
-    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'little', final)
-    res = ''.join(res)
+
+def utf_16_le_decode(data, errors="strict", final=0):
+    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(
+        data, len(data), errors, "little", final
+    )
+    res = "".join(res)
     return res, consumed
 
-def utf_16_be_decode(data, errors='strict', final=0):
-    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'big', final)
-    res = ''.join(res)
+
+def utf_16_be_decode(data, errors="strict", final=0):
+    res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(
+        data, len(data), errors, "big", final
+    )
+    res = "".join(res)
     return res, consumed
 
 
 def STORECHAR32(ch, byteorder):
     """Store a 32-bit character as 4 bytes in the specified byte order."""
-    b0 = ch & 0xff
-    b1 = (ch >> 8) & 0xff
-    b2 = (ch >> 16) & 0xff
-    b3 = (ch >> 24) & 0xff
-    if byteorder == 'little':
+    b0 = ch & 0xFF
+    b1 = (ch >> 8) & 0xFF
+    b2 = (ch >> 16) & 0xFF
+    b3 = (ch >> 24) & 0xFF
+    if byteorder == "little":
         return [b0, b1, b2, b3]
     else:  # big-endian
         return [b3, b2, b1, b0]
 
 
-def PyUnicode_EncodeUTF32(s, size, errors, byteorder='little'):
+def PyUnicode_EncodeUTF32(s, size, errors, byteorder="little"):
     """Encode a Unicode string to UTF-32."""
     p = []
     bom = sys.byteorder
 
-    if byteorder == 'native':
+    if byteorder == "native":
         bom = sys.byteorder
         # Add BOM for native encoding
         p += STORECHAR32(0xFEFF, bom)
 
-    if byteorder == 'little':
-        bom = 'little'
-    elif byteorder == 'big':
-        bom = 'big'
+    if byteorder == "little":
+        bom = "little"
+    elif byteorder == "big":
+        bom = "big"
 
     pos = 0
     while pos < len(s):
         ch = ord(s[pos])
         if 0xD800 <= ch <= 0xDFFF:
-            if errors == 'surrogatepass':
+            if errors == "surrogatepass":
                 p += STORECHAR32(ch, bom)
                 pos += 1
             else:
                 res, pos = unicode_call_errorhandler(
-                    errors, 'utf-32', 'surrogates not allowed',
-                    s, pos, pos + 1, False)
+                    errors, "utf-32", "surrogates not allowed", s, pos, pos + 1, False
+                )
                 for c in res:
                     p += STORECHAR32(ord(c), bom)
         else:
@@ -434,26 +514,26 @@ def PyUnicode_EncodeUTF32(s, size, errors, byteorder='little'):
     return p
 
 
-def utf_32_encode(obj, errors='strict'):
+def utf_32_encode(obj, errors="strict"):
     """UTF-32 encoding with BOM."""
-    encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'native')
+    encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, "native")
     return bytes(encoded), len(obj)
 
 
-def utf_32_le_encode(obj, errors='strict'):
+def utf_32_le_encode(obj, errors="strict"):
     """UTF-32 little-endian encoding without BOM."""
-    encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'little')
+    encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, "little")
     return bytes(encoded), len(obj)
 
 
-def utf_32_be_encode(obj, errors='strict'):
+def utf_32_be_encode(obj, errors="strict"):
     """UTF-32 big-endian encoding without BOM."""
-    res = PyUnicode_EncodeUTF32(obj, len(obj), errors, 'big')
+    res = PyUnicode_EncodeUTF32(obj, len(obj), errors, "big")
     res = bytes(res)
     return res, len(obj)
 
 
-def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder='little', final=0):
+def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder="little", final=0):
     """Decode UTF-32 encoded bytes to Unicode string."""
     if size == 0:
         return [], 0, 0
@@ -463,28 +543,44 @@ def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder='little', final=
     aligned_size = (size // 4) * 4
 
     while pos + 3 < aligned_size:
-        if byteorder == 'little':
-            ch = data[pos] | (data[pos+1] << 8) | (data[pos+2] << 16) | (data[pos+3] << 24)
+        if byteorder == "little":
+            ch = (
+                data[pos]
+                | (data[pos + 1] << 8)
+                | (data[pos + 2] << 16)
+                | (data[pos + 3] << 24)
+            )
         else:  # big-endian
-            ch = (data[pos] << 24) | (data[pos+1] << 16) | (data[pos+2] << 8) | data[pos+3]
+            ch = (
+                (data[pos] << 24)
+                | (data[pos + 1] << 16)
+                | (data[pos + 2] << 8)
+                | data[pos + 3]
+            )
 
         # Validate code point
         if ch > 0x10FFFF:
-            if errors == 'strict':
-                raise UnicodeDecodeError('utf-32', bytes(data), pos, pos+4,
-                                        'codepoint not in range(0x110000)')
-            elif errors == 'replace':
-                result.append('\ufffd')
+            if errors == "strict":
+                raise UnicodeDecodeError(
+                    "utf-32",
+                    bytes(data),
+                    pos,
+                    pos + 4,
+                    "codepoint not in range(0x110000)",
+                )
+            elif errors == "replace":
+                result.append("\ufffd")
             # 'ignore' - skip this character
             pos += 4
         elif 0xD800 <= ch <= 0xDFFF:
-            if errors == 'surrogatepass':
+            if errors == "surrogatepass":
                 result.append(chr(ch))
                 pos += 4
             else:
-                msg = 'code point in surrogate code point range(0xd800, 0xe000)'
+                msg = "code point in surrogate code point range(0xd800, 0xe000)"
                 res, pos = unicode_call_errorhandler(
-                    errors, 'utf-32', msg, data, pos, pos + 4, True)
+                    errors, "utf-32", msg, data, pos, pos + 4, True
+                )
                 result.append(res)
         else:
             result.append(chr(ch))
@@ -494,47 +590,57 @@ def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder='little', final=
     if pos < size:
         if final:
             res, pos = unicode_call_errorhandler(
-                errors, 'utf-32', 'truncated data',
-                data, pos, size, True)
+                errors, "utf-32", "truncated data", data, pos, size, True
+            )
             if res:
                 result.append(res)
 
     return result, pos, 0
 
 
-def utf_32_decode(data, errors='strict', final=0):
+def utf_32_decode(data, errors="strict", final=0):
     """UTF-32 decoding with BOM detection."""
     if len(data) >= 4:
         # Check for BOM
-        if data[0:4] == b'\xff\xfe\x00\x00':
+        if data[0:4] == b"\xff\xfe\x00\x00":
             # UTF-32 LE BOM
-            res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data[4:], len(data)-4, errors, 'little', final)
-            res = ''.join(res)
+            res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+                data[4:], len(data) - 4, errors, "little", final
+            )
+            res = "".join(res)
             return res, consumed + 4
-        elif data[0:4] == b'\x00\x00\xfe\xff':
+        elif data[0:4] == b"\x00\x00\xfe\xff":
             # UTF-32 BE BOM
-            res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data[4:], len(data)-4, errors, 'big', final)
-            res = ''.join(res)
+            res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+                data[4:], len(data) - 4, errors, "big", final
+            )
+            res = "".join(res)
             return res, consumed + 4
 
     # Default to little-endian if no BOM
-    byteorder = 'little' if sys.byteorder == 'little' else 'big'
-    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, byteorder, final)
-    res = ''.join(res)
+    byteorder = "little" if sys.byteorder == "little" else "big"
+    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+        data, len(data), errors, byteorder, final
+    )
+    res = "".join(res)
     return res, consumed
 
 
-def utf_32_le_decode(data, errors='strict', final=0):
+def utf_32_le_decode(data, errors="strict", final=0):
     """UTF-32 little-endian decoding without BOM."""
-    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'little', final)
-    res = ''.join(res)
+    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+        data, len(data), errors, "little", final
+    )
+    res = "".join(res)
     return res, consumed
 
 
-def utf_32_be_decode(data, errors='strict', final=0):
+def utf_32_be_decode(data, errors="strict", final=0):
     """UTF-32 big-endian decoding without BOM."""
-    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(data, len(data), errors, 'big', final)
-    res = ''.join(res)
+    res, consumed, _ = PyUnicode_DecodeUTF32Stateful(
+        data, len(data), errors, "big", final
+    )
+    res = "".join(res)
     return res, consumed
 
 
@@ -543,9 +649,9 @@ def utf_32_be_decode(data, errors='strict', final=0):
 ##import sys
 ##""" Python implementation of CPythons builtin unicode codecs.
 ##
-##    Generally the functions in this module take a list of characters an returns 
+##    Generally the functions in this module take a list of characters an returns
 ##    a list of characters.
-##    
+##
 ##    For use in the PyPy project"""
 
 
@@ -555,62 +661,211 @@ def utf_32_be_decode(data, errors='strict', final=0):
 ##         1 - special
 ##         2 - whitespace (optional)
 ##         3 - RFC2152 Set O (optional)
-    
+
 utf7_special = [
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
-    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3,
-    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    2,
+    2,
+    1,
+    1,
+    2,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    2,
+    3,
+    3,
+    3,
+    3,
+    3,
+    3,
+    0,
+    0,
+    0,
+    3,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    3,
+    3,
+    3,
+    3,
+    0,
+    3,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    3,
+    1,
+    3,
+    3,
+    3,
+    3,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    3,
+    3,
+    3,
+    1,
+    1,
 ]
-unicode_latin1 = [None]*256
+unicode_latin1 = [None] * 256
 
 
 def SPECIAL(c, encodeO, encodeWS):
     c = ord(c)
-    return (c>127 or utf7_special[c] == 1) or \
-            (encodeWS and (utf7_special[(c)] == 2)) or \
-            (encodeO and (utf7_special[(c)] == 3))
+    return (
+        (c > 127 or utf7_special[c] == 1)
+        or (encodeWS and (utf7_special[(c)] == 2))
+        or (encodeO and (utf7_special[(c)] == 3))
+    )
+
+
 def B64(n):
-    return bytes([b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]])
+    return bytes(
+        [
+            b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[
+                (n) & 0x3F
+            ]
+        ]
+    )
+
+
 def B64CHAR(c):
-    return (c.isalnum() or (c) == b'+' or (c) == b'/')
+    return c.isalnum() or (c) == b"+" or (c) == b"/"
+
+
 def UB64(c):
-    if (c) == b'+' :
-        return 62 
-    elif (c) == b'/':
-        return 63 
-    elif (c) >= b'a':
-        return ord(c) - 71 
-    elif (c) >= b'A':
-        return ord(c) - 65 
-    else: 
+    if (c) == b"+":
+        return 62
+    elif (c) == b"/":
+        return 63
+    elif (c) >= b"a":
+        return ord(c) - 71
+    elif (c) >= b"A":
+        return ord(c) - 65
+    else:
         return ord(c) + 4
 
-def ENCODE( ch, bits) :
+
+def ENCODE(ch, bits):
     out = []
-    while (bits >= 6):
-        out +=  B64(ch >> (bits-6))
-        bits -= 6 
+    while bits >= 6:
+        out += B64(ch >> (bits - 6))
+        bits -= 6
     return out, bits
 
+
 def _IS_BASE64(ch):
-    return (ord('A') <= ch <= ord('Z')) or (ord('a') <= ch <= ord('z')) or \
-           (ord('0') <= ch <= ord('9')) or ch == ord('+') or ch == ord('/')
+    return (
+        (ord("A") <= ch <= ord("Z"))
+        or (ord("a") <= ch <= ord("z"))
+        or (ord("0") <= ch <= ord("9"))
+        or ch == ord("+")
+        or ch == ord("/")
+    )
+
 
 def _FROM_BASE64(ch):
-    if ch == ord('+'): return 62
-    if ch == ord('/'): return 63
-    if ch >= ord('a'): return ch - 71
-    if ch >= ord('A'): return ch - 65
-    if ch >= ord('0'): return ch - ord('0') + 52
+    if ch == ord("+"):
+        return 62
+    if ch == ord("/"):
+        return 63
+    if ch >= ord("a"):
+        return ch - 71
+    if ch >= ord("A"):
+        return ch - 65
+    if ch >= ord("0"):
+        return ch - ord("0") + 52
     return -1
 
+
 def _DECODE_DIRECT(ch):
-    return ch <= 127 and ch != ord('+')
+    return ch <= 127 and ch != ord("+")
+
 
 def PyUnicode_DecodeUTF7(s, size, errors, final=False):
     if size == 0:
@@ -633,12 +888,16 @@ def PyUnicode_DecodeUTF7(s, size, errors, final=False):
                 base64bits += 6
                 i += 1
                 if base64bits >= 16:
-                    outCh = (base64buffer >> (base64bits - 16)) & 0xffff
+                    outCh = (base64buffer >> (base64bits - 16)) & 0xFFFF
                     base64bits -= 16
                     base64buffer &= (1 << base64bits) - 1
                     if surrogate:
                         if 0xDC00 <= outCh <= 0xDFFF:
-                            ch2 = 0x10000 + ((surrogate - 0xD800) << 10) + (outCh - 0xDC00)
+                            ch2 = (
+                                0x10000
+                                + ((surrogate - 0xD800) << 10)
+                                + (outCh - 0xDC00)
+                            )
                             p.append(chr(ch2))
                             surrogate = 0
                             continue
@@ -656,7 +915,8 @@ def PyUnicode_DecodeUTF7(s, size, errors, final=False):
                         i += 1
                         errmsg = "partial character in shift sequence"
                         out, i = unicode_call_errorhandler(
-                            errors, 'utf-7', errmsg, s, startinpos, i)
+                            errors, "utf-7", errmsg, s, startinpos, i
+                        )
                         p.append(out)
                         continue
                     else:
@@ -664,25 +924,27 @@ def PyUnicode_DecodeUTF7(s, size, errors, final=False):
                             i += 1
                             errmsg = "non-zero padding bits in shift sequence"
                             out, i = unicode_call_errorhandler(
-                                errors, 'utf-7', errmsg, s, startinpos, i)
+                                errors, "utf-7", errmsg, s, startinpos, i
+                            )
                             p.append(out)
                             continue
                 if surrogate and _DECODE_DIRECT(ch):
                     p.append(chr(surrogate))
                 surrogate = 0
-                if ch == ord('-'):
+                if ch == ord("-"):
                     i += 1
-        elif ch == ord('+'):
+        elif ch == ord("+"):
             startinpos = i
             i += 1
-            if i < size and s[i] == ord('-'):
+            if i < size and s[i] == ord("-"):
                 i += 1
-                p.append('+')
+                p.append("+")
             elif i < size and not _IS_BASE64(s[i]):
                 i += 1
                 errmsg = "ill-formed sequence"
                 out, i = unicode_call_errorhandler(
-                    errors, 'utf-7', errmsg, s, startinpos, i)
+                    errors, "utf-7", errmsg, s, startinpos, i
+                )
                 p.append(out)
             else:
                 inShift = True
@@ -698,7 +960,8 @@ def PyUnicode_DecodeUTF7(s, size, errors, final=False):
             i += 1
             errmsg = "unexpected special character"
             out, i = unicode_call_errorhandler(
-                errors, 'utf-7', errmsg, s, startinpos, i)
+                errors, "utf-7", errmsg, s, startinpos, i
+            )
             p.append(out)
 
     if inShift and not final:
@@ -708,11 +971,13 @@ def PyUnicode_DecodeUTF7(s, size, errors, final=False):
         if surrogate or base64bits >= 6 or (base64bits > 0 and base64buffer != 0):
             errmsg = "unterminated shift sequence"
             out, i = unicode_call_errorhandler(
-                errors, 'utf-7', errmsg, s, startinpos, size)
+                errors, "utf-7", errmsg, s, startinpos, size
+            )
             p.append(out)
 
     return p, size
 
+
 def _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace):
     c = ord(ch) if isinstance(ch, str) else ch
     if c > 127:
@@ -725,6 +990,7 @@ def _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace):
         return not encodeSetO
     return False
 
+
 def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors):
     inShift = False
     base64bits = 0
@@ -741,8 +1007,8 @@ def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors):
                     base64buffer = 0
                     base64bits = 0
                 inShift = False
-                if B64CHAR(ch) or ch == '-':
-                    out.append(b'-')
+                if B64CHAR(ch) or ch == "-":
+                    out.append(b"-")
                 out.append(bytes([ch_ord]))
             else:
                 # encode character in base64
@@ -765,12 +1031,12 @@ def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors):
                     base64bits -= 6
                 base64buffer &= (1 << base64bits) - 1 if base64bits else 0
         else:
-            if ch == '+':
-                out.append(b'+-')
+            if ch == "+":
+                out.append(b"+-")
             elif _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace):
                 out.append(bytes([ch_ord]))
             else:
-                out.append(b'+')
+                out.append(b"+")
                 inShift = True
                 # encode character in base64
                 if ch_ord >= 0x10000:
@@ -795,95 +1061,96 @@ def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors):
                     if i + 1 < size:
                         ch2 = s[i + 1]
                         if _ENCODE_DIRECT(ch2, encodeSetO, encodeWhiteSpace):
-                            if B64CHAR(ch2) or ch2 == '-':
-                                out.append(b'-')
+                            if B64CHAR(ch2) or ch2 == "-":
+                                out.append(b"-")
                             inShift = False
                     else:
-                        out.append(b'-')
+                        out.append(b"-")
                         inShift = False
 
     if base64bits:
         out.append(B64(base64buffer << (6 - base64bits)))
     if inShift:
-        out.append(b'-')
+        out.append(b"-")
 
     return out
 
-unicode_empty = ''
 
-def unicodeescape_string(s, size, quotes):
+unicode_empty = ""
 
+
+def unicodeescape_string(s, size, quotes):
     p = []
-    if (quotes) :
-        if (s.find('\'') != -1 and s.find('"') == -1):
+    if quotes:
+        if s.find("'") != -1 and s.find('"') == -1:
             p.append(b'"')
         else:
-            p.append(b'\'')
+            p.append(b"'")
     pos = 0
-    while (pos < size):
+    while pos < size:
         ch = s[pos]
-        #/* Escape quotes */
-        if (quotes and (ch == p[1] or ch == '\\')):
-            p.append(b'\\%c' % ord(ch))
+        # /* Escape quotes */
+        if quotes and (ch == p[1] or ch == "\\"):
+            p.append(b"\\%c" % ord(ch))
             pos += 1
             continue
 
-#ifdef Py_UNICODE_WIDE
-        #/* Map 21-bit characters to '\U00xxxxxx' */
-        elif (ord(ch) >= 0x10000):
-            p.append(b'\\U%08x' % ord(ch))
+        # ifdef Py_UNICODE_WIDE
+        # /* Map 21-bit characters to '\U00xxxxxx' */
+        elif ord(ch) >= 0x10000:
+            p.append(b"\\U%08x" % ord(ch))
             pos += 1
-            continue        
-#endif
-        #/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
-        elif (ord(ch) >= 0xD800 and ord(ch) < 0xDC00):
+            continue
+        # endif
+        # /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+        elif ord(ch) >= 0xD800 and ord(ch) < 0xDC00:
             pos += 1
             ch2 = s[pos]
-            
-            if (ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
+
+            if ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF:
                 ucs = (((ord(ch) & 0x03FF) << 10) | (ord(ch2) & 0x03FF)) + 0x00010000
-                p.append(b'\\U%08x' % ucs)
+                p.append(b"\\U%08x" % ucs)
                 pos += 1
                 continue
-           
-            #/* Fall through: isolated surrogates are copied as-is */
+
+            # /* Fall through: isolated surrogates are copied as-is */
             pos -= 1
-            
-        #/* Map 16-bit characters to '\uxxxx' */
-        if (ord(ch) >= 256):
-            p.append(b'\\u%04x' % ord(ch))
-            
-        #/* Map special whitespace to '\t', \n', '\r' */
-        elif (ch == '\t'):
-            p.append(b'\\t')
-        
-        elif (ch == '\n'):
-            p.append(b'\\n')
-
-        elif (ch == '\r'):
-            p.append(b'\\r')
-
-        elif (ch == '\\'):
-            p.append(b'\\\\')
-
-        #/* Map non-printable US ASCII to '\xhh' */
-        elif (ch < ' ' or ch >= chr(0x7F)) :
-            p.append(b'\\x%02x' % ord(ch))
-        #/* Copy everything else as-is */
+
+        # /* Map 16-bit characters to '\uxxxx' */
+        if ord(ch) >= 256:
+            p.append(b"\\u%04x" % ord(ch))
+
+        # /* Map special whitespace to '\t', \n', '\r' */
+        elif ch == "\t":
+            p.append(b"\\t")
+
+        elif ch == "\n":
+            p.append(b"\\n")
+
+        elif ch == "\r":
+            p.append(b"\\r")
+
+        elif ch == "\\":
+            p.append(b"\\\\")
+
+        # /* Map non-printable US ASCII to '\xhh' */
+        elif ch < " " or ch >= chr(0x7F):
+            p.append(b"\\x%02x" % ord(ch))
+        # /* Copy everything else as-is */
         else:
             p.append(bytes([ord(ch)]))
         pos += 1
-    if (quotes):
+    if quotes:
         p.append(p[0])
     return p
 
-def PyUnicode_DecodeASCII(s, size, errors):
 
-#    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
-    if (size == 1 and ord(s) < 128) :
+def PyUnicode_DecodeASCII(s, size, errors):
+    #    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
+    if size == 1 and ord(s) < 128:
         return [chr(ord(s))]
-    if (size == 0):
-        return [''] #unicode('')
+    if size == 0:
+        return [""]  # unicode('')
     p = []
     pos = 0
     while pos < len(s):
@@ -892,54 +1159,50 @@ def PyUnicode_DecodeASCII(s, size, errors):
             p += chr(c)
             pos += 1
         else:
-            
             res = unicode_call_errorhandler(
-                    errors, "ascii", "ordinal not in range(128)",
-                    s,  pos, pos+1)
+                errors, "ascii", "ordinal not in range(128)", s, pos, pos + 1
+            )
             p += res[0]
             pos = res[1]
     return p
 
-def PyUnicode_EncodeASCII(p, size, errors):
 
+def PyUnicode_EncodeASCII(p, size, errors):
     return unicode_encode_ucs1(p, size, errors, 128)
 
-def PyUnicode_AsASCIIString(unistr):
 
+def PyUnicode_AsASCIIString(unistr):
     if not type(unistr) == str:
         raise TypeError
-    return PyUnicode_EncodeASCII(unistr,
-                                 len(unistr),
-                                None)
+    return PyUnicode_EncodeASCII(unistr, len(unistr), None)
 
-def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True):
 
-    bo = 0       #/* assume native ordering by default */
+def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder="native", final=True):
+    bo = 0  # /* assume native ordering by default */
     consumed = 0
     errmsg = ""
 
-    if sys.byteorder == 'little':
+    if sys.byteorder == "little":
         ihi = 1
         ilo = 0
     else:
         ihi = 0
         ilo = 1
-    
 
-    #/* Unpack UTF-16 encoded data */
+    # /* Unpack UTF-16 encoded data */
 
-##    /* Check for BOM marks (U+FEFF) in the input and adjust current
-##       byte order setting accordingly. In native mode, the leading BOM
-##       mark is skipped, in all other modes, it is copied to the output
-##       stream as-is (giving a ZWNBSP character). */
+    ##    /* Check for BOM marks (U+FEFF) in the input and adjust current
+    ##       byte order setting accordingly. In native mode, the leading BOM
+    ##       mark is skipped, in all other modes, it is copied to the output
+    ##       stream as-is (giving a ZWNBSP character). */
     q = 0
     p = []
-    if byteorder == 'native':
-        if (size >= 2):
+    if byteorder == "native":
+        if size >= 2:
             bom = (s[ihi] << 8) | s[ilo]
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if sys.byteorder == 'little':
-                if (bom == 0xFEFF):
+            # ifdef BYTEORDER_IS_LITTLE_ENDIAN
+            if sys.byteorder == "little":
+                if bom == 0xFEFF:
                     q += 2
                     bo = -1
                 elif bom == 0xFFFE:
@@ -952,126 +1215,125 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru
                 elif bom == 0xFFFE:
                     q += 2
                     bo = -1
-    elif byteorder == 'little':
+    elif byteorder == "little":
         bo = -1
     else:
         bo = 1
-        
-    if (size == 0):
-        return [''], 0, bo
-    
-    if (bo == -1):
-        #/* force LE */
+
+    if size == 0:
+        return [""], 0, bo
+
+    if bo == -1:
+        # /* force LE */
         ihi = 1
         ilo = 0
 
-    elif (bo == 1):
-        #/* force BE */
+    elif bo == 1:
+        # /* force BE */
         ihi = 0
         ilo = 1
 
-    while (q < len(s)):
-
-        #/* remaining bytes at the end? (size should be even) */
-        if (len(s) - q < 2):
+    while q < len(s):
+        # /* remaining bytes at the end? (size should be even) */
+        if len(s) - q < 2:
             if not final:
                 break
             res, q = unicode_call_errorhandler(
-                errors, 'utf-16', "truncated data",
-                s, q, len(s), True)
+                errors, "utf-16", "truncated data", s, q, len(s), True
+            )
             p.append(res)
             break
 
-        ch = (s[q+ihi] << 8) | s[q+ilo]
+        ch = (s[q + ihi] << 8) | s[q + ilo]
 
-        if (ch < 0xD800 or ch > 0xDFFF):
+        if ch < 0xD800 or ch > 0xDFFF:
             p.append(chr(ch))
             q += 2
             continue
 
-        #/* UTF-16 code pair: high surrogate */
-        if (0xD800 <= ch <= 0xDBFF):
-            if (q + 4 <= len(s)):
-                ch2 = (s[q+2+ihi] << 8) | s[q+2+ilo]
-                if (0xDC00 <= ch2 <= 0xDFFF):
+        # /* UTF-16 code pair: high surrogate */
+        if 0xD800 <= ch <= 0xDBFF:
+            if q + 4 <= len(s):
+                ch2 = (s[q + 2 + ihi] << 8) | s[q + 2 + ilo]
+                if 0xDC00 <= ch2 <= 0xDFFF:
                     # Valid surrogate pair - always assemble
                     p.append(chr((((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000))
                     q += 4
                     continue
                 else:
                     # High surrogate followed by non-low-surrogate
-                    if errors == 'surrogatepass':
+                    if errors == "surrogatepass":
                         p.append(chr(ch))
                         q += 2
                         continue
                     res, q = unicode_call_errorhandler(
-                        errors, 'utf-16', "illegal UTF-16 surrogate",
-                        s, q, q + 2, True)
+                        errors, "utf-16", "illegal UTF-16 surrogate", s, q, q + 2, True
+                    )
                     p.append(res)
             else:
                 # High surrogate at end of data
                 if not final:
                     break
-                if errors == 'surrogatepass':
+                if errors == "surrogatepass":
                     p.append(chr(ch))
                     q += 2
                     continue
                 res, q = unicode_call_errorhandler(
-                    errors, 'utf-16', "unexpected end of data",
-                    s, q, len(s), True)
+                    errors, "utf-16", "unexpected end of data", s, q, len(s), True
+                )
                 p.append(res)
         else:
             # Low surrogate without preceding high surrogate
-            if errors == 'surrogatepass':
+            if errors == "surrogatepass":
                 p.append(chr(ch))
                 q += 2
                 continue
             res, q = unicode_call_errorhandler(
-                errors, 'utf-16', "illegal encoding",
-                s, q, q + 2, True)
+                errors, "utf-16", "illegal encoding", s, q, q + 2, True
+            )
             p.append(res)
 
     return p, q, bo
 
+
 # moved out of local scope, especially because it didn't
 # have any nested variables.
 
+
 def STORECHAR(CH, byteorder):
-    hi = (CH >> 8) & 0xff
-    lo = CH & 0xff
-    if byteorder == 'little':
+    hi = (CH >> 8) & 0xFF
+    lo = CH & 0xFF
+    if byteorder == "little":
         return [lo, hi]
     else:
         return [hi, lo]
 
-def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'):
 
-#    /* Offsets from p for storing byte pairs in the right order. */
+def PyUnicode_EncodeUTF16(s, size, errors, byteorder="little"):
+    #    /* Offsets from p for storing byte pairs in the right order. */
 
-        
     p = []
     bom = sys.byteorder
-    if (byteorder == 'native'):
-        
+    if byteorder == "native":
         bom = sys.byteorder
         p += STORECHAR(0xFEFF, bom)
-        
-    if (byteorder == 'little' ):
-        bom = 'little'
-    elif (byteorder == 'big'):
-        bom = 'big'
+
+    if byteorder == "little":
+        bom = "little"
+    elif byteorder == "big":
+        bom = "big"
 
     pos = 0
     while pos < len(s):
         ch = ord(s[pos])
         if 0xD800 <= ch <= 0xDFFF:
-            if errors == 'surrogatepass':
+            if errors == "surrogatepass":
                 p += STORECHAR(ch, bom)
                 pos += 1
             else:
                 res, pos = unicode_call_errorhandler(
-                    errors, 'utf-16', 'surrogates not allowed',
-                    s, pos, pos + 1, False)
+                    errors, "utf-16", "surrogates not allowed", s, pos, pos + 1, False
+                )
                 for c in res:
                     cp = ord(c)
                     cp2 = 0
@@ -1097,123 +1359,149 @@ def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'):
 def PyUnicode_DecodeMBCS(s, size, errors):
     pass
 
+
 def PyUnicode_EncodeMBCS(p, size, errors):
     pass
 
-def unicode_call_errorhandler(errors,  encoding, 
-                reason, input, startinpos, endinpos, decode=True):
-    
+
+def unicode_call_errorhandler(
+    errors, encoding, reason, input, startinpos, endinpos, decode=True
+):
     errorHandler = lookup_error(errors)
     if decode:
-        exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason)
+        exceptionObject = UnicodeDecodeError(
+            encoding, input, startinpos, endinpos, reason
+        )
     else:
-        exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason)
+        exceptionObject = UnicodeEncodeError(
+            encoding, input, startinpos, endinpos, reason
+        )
     res = errorHandler(exceptionObject)
-    if isinstance(res, tuple) and isinstance(res[0], (str, bytes)) and isinstance(res[1], int):
+    if (
+        isinstance(res, tuple)
+        and isinstance(res[0], (str, bytes))
+        and isinstance(res[1], int)
+    ):
         newpos = res[1]
-        if (newpos < 0):
+        if newpos < 0:
             newpos = len(input) + newpos
         if newpos < 0 or newpos > len(input):
-            raise IndexError( "position %d from error handler out of bounds" % newpos)
+            raise IndexError("position %d from error handler out of bounds" % newpos)
         return res[0], newpos
     else:
-        raise TypeError("encoding error handler must return (unicode, int) tuple, not %s" % repr(res))
+        raise TypeError(
+            "encoding error handler must return (unicode, int) tuple, not %s"
+            % repr(res)
+        )
+
+
+# /* --- Latin-1 Codec ------------------------------------------------------ */
 
-#/* --- Latin-1 Codec ------------------------------------------------------ */
 
 def PyUnicode_DecodeLatin1(s, size, errors):
-    #/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
-##    if (size == 1):
-##        return [PyUnicode_FromUnicode(s, 1)]
+    # /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
+    ##    if (size == 1):
+    ##        return [PyUnicode_FromUnicode(s, 1)]
     pos = 0
     p = []
-    while (pos < size):
+    while pos < size:
         p += chr(s[pos])
         pos += 1
     return p
 
+
 def unicode_encode_ucs1(p, size, errors, limit):
-    
     if limit == 256:
         reason = "ordinal not in range(256)"
         encoding = "latin-1"
     else:
         reason = "ordinal not in range(128)"
         encoding = "ascii"
-    
-    if (size == 0):
+
+    if size == 0:
         return []
     res = bytearray()
     pos = 0
     while pos < len(p):
-    #for ch in p:
+        # for ch in p:
         ch = p[pos]
-        
+
         if ord(ch) < limit:
             res.append(ord(ch))
             pos += 1
         else:
-            #/* startpos for collecting unencodable chars */
-            collstart = pos 
-            collend = pos+1 
+            # /* startpos for collecting unencodable chars */
+            collstart = pos
+            collend = pos + 1
             while collend < len(p) and ord(p[collend]) >= limit:
                 collend += 1
-            x = unicode_call_errorhandler(errors, encoding, reason, p, collstart, collend, False)
+            x = unicode_call_errorhandler(
+                errors, encoding, reason, p, collstart, collend, False
+            )
             replacement = x[0]
             if isinstance(replacement, bytes):
                 res += replacement
             else:
                 res += replacement.encode()
             pos = x[1]
-    
+
     return res
 
+
 def PyUnicode_EncodeLatin1(p, size, errors):
     res = unicode_encode_ucs1(p, size, errors, 256)
     return res
 
-hexdigits = [ord(hex(i)[-1]) for i in range(16)]+[ord(hex(i)[-1].upper()) for i in range(10, 16)]
+
+hexdigits = [ord(hex(i)[-1]) for i in range(16)] + [
+    ord(hex(i)[-1].upper()) for i in range(10, 16)
+]
+
 
 def hex_number_end(s, pos, digits):
     target_end = pos + digits
-    while pos < target_end and pos < len(s) and s[pos] in hexdigits: 
+    while pos < target_end and pos < len(s) and s[pos] in hexdigits:
         pos += 1
     return pos
 
+
 def hexescape(s, pos, digits, message, errors):
     ch = 0
     p = []
     number_end = hex_number_end(s, pos, digits)
     if number_end - pos != digits:
-        x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2, number_end)
+        x = unicode_call_errorhandler(
+            errors, "unicodeescape", message, s, pos - 2, number_end
+        )
         p.append(x[0])
         pos = x[1]
     else:
-        ch = int(s[pos:pos+digits], 16)
-        #/* when we get here, ch is a 32-bit unicode character */
+        ch = int(s[pos : pos + digits], 16)
+        # /* when we get here, ch is a 32-bit unicode character */
         if ch <= sys.maxunicode:
             p.append(chr(ch))
             pos += digits
 
-        elif (ch <= 0x10ffff):
+        elif ch <= 0x10FFFF:
             ch -= 0x10000
             p.append(chr(0xD800 + (ch >> 10)))
-            p.append(chr(0xDC00 +  (ch & 0x03FF)))
+            p.append(chr(0xDC00 + (ch & 0x03FF)))
             pos += digits
         else:
             message = "illegal Unicode character"
-            x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2,
-                    pos+digits)
+            x = unicode_call_errorhandler(
+                errors, "unicodeescape", message, s, pos - 2, pos + digits
+            )
             p.append(x[0])
             pos = x[1]
     res = p
     return res, pos
 
+
 def PyUnicode_DecodeUnicodeEscape(s, size, errors, final):
+    if size == 0:
+        return "", 0
 
-    if (size == 0):
-        return ''
-    
     if isinstance(s, str):
         s = s.encode()
 
@@ -1221,129 +1509,166 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final):
 
     p = []
     pos = 0
-    while (pos < size): 
-##        /* Non-escape characters are interpreted as Unicode ordinals */
-        if (chr(s[pos]) != '\\') :
+    while pos < size:
+        ##        /* Non-escape characters are interpreted as Unicode ordinals */
+        if s[pos] != ord("\\"):
             p.append(chr(s[pos]))
             pos += 1
             continue
-##        /* \ - Escapes */
-        else:
-            pos += 1
-            if pos >= len(s):
-                errmessage = "\\ at end of string"
-                unicode_call_errorhandler(errors, "unicodeescape", errmessage, s, pos-1, size)
-            ch = chr(s[pos])
-            pos += 1
-    ##        /* \x escapes */
-            if   ch == '\n': pass
-            elif ch == '\\': p += '\\'
-            elif ch == '\'': p += '\''
-            elif ch == '\"': p += '\"' 
-            elif ch == 'b' : p += '\b' 
-            elif ch == 'f' : p += '\014' #/* FF */
-            elif ch == 't' : p += '\t' 
-            elif ch == 'n' : p += '\n'
-            elif ch == 'r' : p += '\r' 
-            elif ch == 'v' : p += '\013' #break; /* VT */
-            elif ch == 'a' : p += '\007' # break; /* BEL, not classic C */
-            elif '0' <= ch <= '7':
-                x = ord(ch) - ord('0')
-                if pos < size:
-                    ch = chr(s[pos])
-                    if '0' <= ch <= '7':
-                        pos += 1
-                        x = (x<<3) + ord(ch) - ord('0')
-                        if pos < size:
-                            ch = chr(s[pos])
-                            if '0' <= ch <= '7':
-                                pos += 1
-                                x = (x<<3) + ord(ch) - ord('0')
-                p.append(chr(x))
-    ##        /* hex escapes */
-    ##        /* \xXX */
-            elif ch == 'x':
+        ##        /* \ - Escapes */
+        escape_start = pos
+        pos += 1
+        if pos >= size:
+            if not final:
+                pos = escape_start
+                break
+            errmessage = "\\ at end of string"
+            unicode_call_errorhandler(
+                errors, "unicodeescape", errmessage, s, pos - 1, size
+            )
+            break
+        ch = chr(s[pos])
+        pos += 1
+        ##        /* \x escapes */
+        if ch == "\n":
+            pass
+        elif ch == "\\":
+            p += "\\"
+        elif ch == "'":
+            p += "'"
+        elif ch == '"':
+            p += '"'
+        elif ch == "b":
+            p += "\b"
+        elif ch == "f":
+            p += "\014"  # /* FF */
+        elif ch == "t":
+            p += "\t"
+        elif ch == "n":
+            p += "\n"
+        elif ch == "r":
+            p += "\r"
+        elif ch == "v":
+            p += "\013"  # break; /* VT */
+        elif ch == "a":
+            p += "\007"  # break; /* BEL, not classic C */
+        elif "0" <= ch <= "7":
+            x = ord(ch) - ord("0")
+            if pos < size:
+                ch = chr(s[pos])
+                if "0" <= ch <= "7":
+                    pos += 1
+                    x = (x << 3) + ord(ch) - ord("0")
+                    if pos < size:
+                        ch = chr(s[pos])
+                        if "0" <= ch <= "7":
+                            pos += 1
+                            x = (x << 3) + ord(ch) - ord("0")
+            p.append(chr(x))
+        ##        /* hex escapes */
+        ##        /* \xXX */
+        elif ch in ("x", "u", "U"):
+            if ch == "x":
                 digits = 2
                 message = "truncated \\xXX escape"
-                x = hexescape(s, pos, digits, message, errors)
-                p += x[0]
-                pos = x[1]
-    
-         #   /* \uXXXX */
-            elif ch == 'u':
+            elif ch == "u":
                 digits = 4
                 message = "truncated \\uXXXX escape"
+            else:
+                digits = 8
+                message = "truncated \\UXXXXXXXX escape"
+            number_end = hex_number_end(s, pos, digits)
+            if number_end - pos != digits:
+                if not final:
+                    pos = escape_start
+                    break
                 x = hexescape(s, pos, digits, message, errors)
                 p += x[0]
                 pos = x[1]
-    
-          #  /* \UXXXXXXXX */
-            elif ch == 'U':
-                digits = 8
-                message = "truncated \\UXXXXXXXX escape"
+            else:
                 x = hexescape(s, pos, digits, message, errors)
                 p += x[0]
                 pos = x[1]
-##        /* \N{name} */
-            elif ch == 'N':
-                message = "malformed \\N character escape"
-                # pos += 1
-                look = pos
-                try:
-                    import unicodedata
-                except ImportError:
-                    message = "\\N escapes not supported (can't load unicodedata module)"
-                    unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, size)
-                if look < size and chr(s[look]) == '{':
-                    #/* look for the closing brace */
-                    while (look < size and chr(s[look]) != '}'):
-                        look += 1
-                    if (look > pos+1 and look < size and chr(s[look]) == '}'):
-                        #/* found a name.  look it up in the unicode database */
-                        message = "unknown Unicode character name"
-                        st = s[pos+1:look]
-                        try:
-                            chr_codec = unicodedata.lookup("%s" % st)
-                        except LookupError as e:
-                            x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
-                        else:
-                            x = chr_codec, look + 1 
-                        p.append(x[0])
-                        pos = x[1]
-                    else:        
-                        x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
-                else:        
-                    x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1)
+        ##        /* \N{name} */
+        elif ch == "N":
+            message = "malformed \\N character escape"
+            look = pos
+            try:
+                import unicodedata
+            except ImportError:
+                message = "\\N escapes not supported (can't load unicodedata module)"
+                unicode_call_errorhandler(
+                    errors, "unicodeescape", message, s, pos - 1, size
+                )
+                continue
+            if look < size and chr(s[look]) == "{":
+                # /* look for the closing brace */
+                while look < size and chr(s[look]) != "}":
+                    look += 1
+                if look > pos + 1 and look < size and chr(s[look]) == "}":
+                    # /* found a name.  look it up in the unicode database */
+                    message = "unknown Unicode character name"
+                    st = s[pos + 1 : look]
+                    try:
+                        chr_codec = unicodedata.lookup("%s" % st)
+                    except LookupError as e:
+                        x = unicode_call_errorhandler(
+                            errors, "unicodeescape", message, s, pos - 1, look + 1
+                        )
+                    else:
+                        x = chr_codec, look + 1
+                    p.append(x[0])
+                    pos = x[1]
+                else:
+                    if not final:
+                        pos = escape_start
+                        break
+                    x = unicode_call_errorhandler(
+                        errors, "unicodeescape", message, s, pos - 1, look + 1
+                    )
+                    p.append(x[0])
+                    pos = x[1]
             else:
-                if not found_invalid_escape:
-                    found_invalid_escape = True
-                    warnings.warn("invalid escape sequence '\\%c'" % ch, DeprecationWarning, 2)
-                p.append('\\')
-                p.append(ch)
-    return p
+                if not final:
+                    pos = escape_start
+                    break
+                x = unicode_call_errorhandler(
+                    errors, "unicodeescape", message, s, pos - 1, look + 1
+                )
+                p.append(x[0])
+                pos = x[1]
+        else:
+            if not found_invalid_escape:
+                found_invalid_escape = True
+                warnings.warn(
+                    "invalid escape sequence '\\%c'" % ch, DeprecationWarning, 2
+                )
+            p.append("\\")
+            p.append(ch)
+    return p, pos
+
 
 def PyUnicode_EncodeRawUnicodeEscape(s, size):
-    
-    if (size == 0):
-        return b''
+    if size == 0:
+        return b""
 
     p = bytearray()
     for ch in s:
-#       /* Map 32-bit characters to '\Uxxxxxxxx' */
-        if (ord(ch) >= 0x10000):
-            p += b'\\U%08x' % ord(ch)
-        elif (ord(ch) >= 256) :
-#       /* Map 16-bit characters to '\uxxxx' */
-            p += b'\\u%04x' % (ord(ch))
-#       /* Copy everything else as-is */
+        #       /* Map 32-bit characters to '\Uxxxxxxxx' */
+        if ord(ch) >= 0x10000:
+            p += b"\\U%08x" % ord(ch)
+        elif ord(ch) >= 256:
+            #       /* Map 16-bit characters to '\uxxxx' */
+            p += b"\\u%04x" % (ord(ch))
+        #       /* Copy everything else as-is */
         else:
             p.append(ord(ch))
-    
-    #p += '\0'
+
+    # p += '\0'
     return p
 
-def charmapencode_output(c, mapping):
 
+def charmapencode_output(c, mapping):
     rep = mapping[c]
     if isinstance(rep, int):
         if rep < 256:
@@ -1359,27 +1684,34 @@ def charmapencode_output(c, mapping):
     else:
         raise TypeError("character mapping must return integer, None or str")
 
-def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'):
 
-##    /* the following variable is used for caching string comparisons
-##     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
-##     * 3=ignore, 4=xmlcharrefreplace */
+def PyUnicode_EncodeCharmap(p, size, mapping="latin-1", errors="strict"):
+    ##    /* the following variable is used for caching string comparisons
+    ##     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
+    ##     * 3=ignore, 4=xmlcharrefreplace */
 
-#    /* Default to Latin-1 */
-    if mapping == 'latin-1':
+    #    /* Default to Latin-1 */
+    if mapping == "latin-1":
         return PyUnicode_EncodeLatin1(p, size, errors)
-    if (size == 0):
-        return b''
+    if size == 0:
+        return b""
     inpos = 0
     res = []
-    while (inpos<size):
-        #/* try to encode it */
+    while inpos < size:
+        # /* try to encode it */
         try:
             x = charmapencode_output(ord(p[inpos]), mapping)
             res += x
         except KeyError:
-            x = unicode_call_errorhandler(errors, "charmap",
-            "character maps to <undefined>", p, inpos, inpos+1, False)
+            x = unicode_call_errorhandler(
+                errors,
+                "charmap",
+                "character maps to <undefined>",
+                p,
+                inpos,
+                inpos + 1,
+                False,
+            )
             replacement = x[0]
             if isinstance(replacement, bytes):
                 res += list(replacement)
@@ -1388,119 +1720,120 @@ def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'):
                     for y in replacement:
                         res += charmapencode_output(ord(y), mapping)
                 except KeyError:
-                    raise UnicodeEncodeError("charmap", p, inpos, inpos+1,
-                                            "character maps to <undefined>")
+                    raise UnicodeEncodeError(
+                        "charmap", p, inpos, inpos + 1, "character maps to <undefined>"
+                    )
         inpos += 1
     return res
 
-def PyUnicode_DecodeCharmap(s, size, mapping, errors):
 
-##    /* Default to Latin-1 */
-    if (mapping == None):
+def PyUnicode_DecodeCharmap(s, size, mapping, errors):
+    ##    /* Default to Latin-1 */
+    if mapping == None:
         return PyUnicode_DecodeLatin1(s, size, errors)
 
-    if (size == 0):
-        return ''
+    if size == 0:
+        return ""
     p = []
     inpos = 0
-    while (inpos< len(s)):
-        
-        #/* Get mapping (char ordinal -> integer, Unicode char or None) */
+    while inpos < len(s):
+        # /* Get mapping (char ordinal -> integer, Unicode char or None) */
         ch = s[inpos]
         try:
             x = mapping[ch]
             if isinstance(x, int):
-                if x < 65536:
+                if x == 0xFFFE:
+                    raise KeyError
+                if 0 <= x <= 0x10FFFF:
                     p += chr(x)
                 else:
-                    raise TypeError("character mapping must be in range(65536)")
+                    raise TypeError(
+                        "character mapping must be in range(0x%x)" % (0x110000,)
+                    )
             elif isinstance(x, str):
+                if len(x) == 1 and x == "\ufffe":
+                    raise KeyError
                 p += x
-            elif not x:
+            elif x is None:
                 raise KeyError
             else:
                 raise TypeError
-        except KeyError:
-            x = unicode_call_errorhandler(errors, "charmap",
-                "character maps to <undefined>", s, inpos, inpos+1)
+        except (KeyError, IndexError):
+            x = unicode_call_errorhandler(
+                errors, "charmap", "character maps to <undefined>", s, inpos, inpos + 1
+            )
             p += x[0]
         inpos += 1
     return p
 
-def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final):
 
-    if (size == 0):
-        return ''
+def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final):
+    if size == 0:
+        return "", 0
 
     if isinstance(s, str):
         s = s.encode()
 
     pos = 0
     p = []
-    while (pos < len(s)):
-        ch = chr(s[pos])
-    #/* Non-escape characters are interpreted as Unicode ordinals */
-        if (ch != '\\'):
-            p.append(ch)
+    while pos < len(s):
+        # /* Non-escape characters are interpreted as Unicode ordinals */
+        if s[pos] != ord("\\"):
+            p.append(chr(s[pos]))
             pos += 1
-            continue        
+            continue
         startinpos = pos
-##      /* \u-escapes are only interpreted iff the number of leading
-##         backslashes is odd */
+        p_len_before = len(p)
+        ##      /* \u-escapes are only interpreted iff the number of leading
+        ##         backslashes is odd */
         bs = pos
         while pos < size:
-            if (s[pos] != ord('\\')):
+            if s[pos] != ord("\\"):
                 break
             p.append(chr(s[pos]))
             pos += 1
-    
-        if (pos >= size):
+
+        if pos >= size:
+            if not final:
+                del p[p_len_before:]
+                pos = startinpos
             break
-        if (((pos - bs) & 1) == 0 or
-            (s[pos] != ord('u') and s[pos] != ord('U'))) :
+        if ((pos - bs) & 1) == 0 or (s[pos] != ord("u") and s[pos] != ord("U")):
             p.append(chr(s[pos]))
             pos += 1
             continue
-        
+
         p.pop(-1)
-        if s[pos] == ord('u'):
-            count = 4 
-        else: 
-            count = 8
+        count = 4 if s[pos] == ord("u") else 8
         pos += 1
 
-        #/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
+        # /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
         number_end = hex_number_end(s, pos, count)
         if number_end - pos != count:
+            if not final:
+                del p[p_len_before:]
+                pos = startinpos
+                break
             res = unicode_call_errorhandler(
-                    errors, "rawunicodeescape", "truncated \\uXXXX",
-                    s, pos-2, number_end)
+                errors, "rawunicodeescape", "truncated \\uXXXX", s, pos - 2, number_end
+            )
             p.append(res[0])
             pos = res[1]
         else:
-            x = int(s[pos:pos+count], 16)
-    #ifndef Py_UNICODE_WIDE
-            if sys.maxunicode > 0xffff:
-                if (x > sys.maxunicode):
-                    res = unicode_call_errorhandler(
-                        errors, "rawunicodeescape", "\\Uxxxxxxxx out of range",
-                        s, pos-2, pos+count)
-                    pos = res[1]
-                    p.append(res[0])
-                else:
-                    p.append(chr(x))
-                    pos += count
+            x = int(s[pos : pos + count], 16)
+            if x > sys.maxunicode:
+                res = unicode_call_errorhandler(
+                    errors,
+                    "rawunicodeescape",
+                    "\\Uxxxxxxxx out of range",
+                    s,
+                    pos - 2,
+                    pos + count,
+                )
+                pos = res[1]
+                p.append(res[0])
             else:
-                if (x > 0x10000):
-                    res = unicode_call_errorhandler(
-                        errors, "rawunicodeescape", "\\Uxxxxxxxx out of range",
-                        s, pos-2, pos+count)
-                    pos = res[1]
-                    p.append(res[0])
-
-    #endif
-                else:
-                    p.append(chr(x))
-                    pos += count
+                p.append(chr(x))
+                pos += count
 
-    return p
+    return p, pos
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index e08db712a6f..254b6c7fcc9 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -335,7 +335,7 @@ def read_stringnl(f, decode=True, stripquotes=True, *, encoding='latin-1'):
     ValueError: no newline found when trying to read stringnl
 
     Embedded escapes are undone in the result.
-    >>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'")) # TODO: RUSTPYTHON # doctest: +EXPECTED_FAILURE
+    >>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'"))
     'a\n\\b\x00c\td'
     """
 
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index 1bb74c6d969..8352b69f7b2 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -1620,7 +1620,6 @@ def test_pickling(self):
             self.assertEqual(orig, derived)
         self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_compat_unpickle(self):
         tests = [
             b"cdatetime\ndate\n(S'\\x07\\xdf\\x0b\\x1b'\ntR.",
@@ -2407,7 +2406,6 @@ def test_pickling_subclass_datetime(self):
             self.assertEqual(orig, derived)
             self.assertTrue(isinstance(derived, SubclassDatetime))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_compat_unpickle(self):
         tests = [
             b'cdatetime\ndatetime\n('
@@ -3768,7 +3766,6 @@ def test_pickling_subclass_time(self):
             self.assertEqual(orig, derived)
             self.assertTrue(isinstance(derived, SubclassTime))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_compat_unpickle(self):
         tests = [
             (b"cdatetime\ntime\n(S'\\x14;\\x10\\x00\\x10\\x00'\ntR.",
@@ -4186,7 +4183,6 @@ def test_pickling(self):
             self.assertEqual(derived.tzname(), 'cookie')
         self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_compat_unpickle(self):
         tests = [
             b"cdatetime\ntime\n(S'\\x05\\x06\\x07\\x01\\xe2@'\n"
@@ -4652,7 +4648,6 @@ def test_pickling(self):
             self.assertEqual(derived.tzname(), 'cookie')
         self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
 
-    @unittest.expectedFailure # TODO: RUSTPYTHON
     def test_compat_unpickle(self):
         tests = [
             b'cdatetime\ndatetime\n'
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 232121b6210..85364299f0a 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1239,7 +1239,6 @@ def test_raw(self):
             if b != b'\\':
                 self.assertEqual(decode(b + b'0'), (b + b'0', 2))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; + (b'[]', 4)
     def test_escape(self):
         decode = codecs.escape_decode
         check = coding_checker(self, decode)
@@ -1296,7 +1295,6 @@ def test_warnings(self):
                 r'"\\501" is an invalid octal escape sequence'):
             self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; ValueError: not raised by escape_decode
     def test_errors(self):
         decode = codecs.escape_decode
         self.assertRaises(ValueError, decode, br"\x")
@@ -2387,7 +2385,6 @@ def test_decoder_state(self):
 
 
 class CharmapTest(unittest.TestCase):
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; IndexError: index out of range
     def test_decode_with_string_map(self):
         self.assertEqual(
             codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"),
@@ -2443,7 +2440,6 @@ def test_decode_with_string_map(self):
             ("", len(allbytes))
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: UnicodeDecodeError not raised by charmap_decode
     def test_decode_with_int2str_map(self):
         self.assertEqual(
             codecs.charmap_decode(b"\x00\x01\x02", "strict",
@@ -2560,7 +2556,6 @@ def test_decode_with_int2str_map(self):
             b"\x00\x01\x02", "strict", {0: "A", 1: 'Bb', 2: 999999999}
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; TypeError: character mapping must be in range(65536)
     def test_decode_with_int2int_map(self):
         a = ord('a')
         b = ord('b')
@@ -2805,7 +2800,6 @@ def test_decode_errors(self):
         self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
         self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: '\x00\t\n\r\\' != '\x00\t\n\r'
     def test_partial(self):
         self.check_partial(
             "\x00\t\n\r\\\xff\uffff\U00010000",
@@ -2849,7 +2843,6 @@ def test_partial(self):
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'unicodeescape' codec can't decode bytes in position 72-75: truncated \uXXXX escape
     def test_readline(self):
         return super().test_readline()
 
@@ -2908,7 +2901,6 @@ def test_decode_errors(self):
         self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
         self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; - \
     def test_partial(self):
         self.check_partial(
             "\x00\t\n\r\\\xff\uffff\U00010000",
@@ -2938,11 +2930,9 @@ def test_partial(self):
             ]
         )
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; - \
     def test_incremental_surrogatepass(self):
         return super().test_incremental_surrogatepass()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; UnicodeDecodeError: 'rawunicodeescape' codec can't decode bytes in position 72-76: truncated \uXXXX
     def test_readline(self):
         return super().test_readline()
 
@@ -2990,7 +2980,6 @@ def test_ascii(self):
         self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
                          b"foo\x80bar")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; Result: FAILURE
     def test_charmap(self):
         # bad byte: \xa5 is unmapped in iso-8859-3
         self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
@@ -3183,7 +3172,6 @@ def test_binary_to_text_denylists_text_transforms(self):
                     bad_input.decode("rot_13")
                 self.assertIsNone(failure.exception.__cause__)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'error' object has no attribute '__notes__'. Did you mean: '__ne__'?
     @unittest.skipUnless(zlib, "Requires zlib support")
     def test_custom_zlib_error_is_noted(self):
         # Check zlib codec gives a good error for malformed input
@@ -3192,7 +3180,6 @@ def test_custom_zlib_error_is_noted(self):
             codecs.decode(b"hello", "zlib_codec")
         self.assertEqual(msg, failure.exception.__notes__[0])
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; - AttributeError: 'Error' object has no attribute '__notes__'
     def test_custom_hex_error_is_noted(self):
         # Check hex codec gives a good error for malformed input
         import binascii
@@ -3292,55 +3279,46 @@ def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
         with self.assertNoted("decoding", exc_type, msg):
             codecs.decode(b"bytes input", self.codec_name)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_raise_by_type(self):
         self.check_note(RuntimeError, "")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_raise_by_value(self):
         msg = "This should be noted"
         self.check_note(RuntimeError(msg), msg)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'MyRuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_raise_grandchild_subclass_exact_size(self):
         msg = "This should be noted"
         class MyRuntimeError(RuntimeError):
             __slots__ = ()
         self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'MyRuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_raise_subclass_with_weakref_support(self):
         msg = "This should be noted"
         class MyRuntimeError(RuntimeError):
             pass
         self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'CustomInit' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_init_override(self):
         class CustomInit(RuntimeError):
             def __init__(self):
                 pass
         self.check_note(CustomInit, "")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'CustomNew' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_new_override(self):
         class CustomNew(RuntimeError):
             def __new__(cls):
                 return super().__new__(cls)
         self.check_note(CustomNew, "")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_instance_attribute(self):
         msg = "This should be noted"
         exc = RuntimeError(msg)
         exc.attr = 1
         self.check_note(exc, "^{}$".format(msg))
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_non_str_arg(self):
         self.check_note(RuntimeError(1), "1")
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AttributeError: 'RuntimeError' object has no attribute '__notes__'. Did you mean: '__ne__'?
     def test_multiple_args(self):
         msg_re = r"^\('a', 'b', 'c'\)$"
         self.check_note(RuntimeError('a', 'b', 'c'), msg_re)
@@ -3357,7 +3335,6 @@ def test_codec_lookup_failure(self):
         with self.assertRaisesRegex(LookupError, msg):
             codecs.decode(b"bytes input", self.codec_name)
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: "^'exception_notes_test' encoder returned 'str' instead of 'bytes'; use codecs.encode\(\) to encode to arbitrary types$" does not match "'exception_notes_test' encoder returned 'str' instead of 'bytes'; use codecs.encode() to encode arbitrary types"
     def test_unflagged_non_text_codec_handling(self):
         # The stdlib non-text codecs are now marked so they're
         # pre-emptively skipped by the text model related methods
@@ -3985,7 +3962,6 @@ def test_rot13_func(self):
 
 class CodecNameNormalizationTest(unittest.TestCase):
     """Test codec name normalization"""
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; AssertionError: Tuples differ: (1, 2, 3, 4) != (None, None, None, None)
     def test_codecs_lookup(self):
         FOUND = (1, 2, 3, 4)
         NOT_FOUND = (None, None, None, None)
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index 1a14024db08..865406738e5 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -73,15 +73,12 @@ def loads(self, buf, **kwds):
         u = self.unpickler(f, **kwds)
         return u.load()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_badly_escaped_string(self):
         return super().test_badly_escaped_string()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_correctly_quoted_string(self):
         return super().test_correctly_quoted_string()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_load_python2_str_as_bytes(self):
         return super().test_load_python2_str_as_bytes()
 
@@ -182,7 +179,6 @@ def loads(self, buf, **kwds):
     test_find_class = None
     test_custom_find_class = None
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_badly_escaped_string(self):
         return super().test_badly_escaped_string()
 
@@ -202,7 +198,6 @@ def test_bytes_memoization(self):
     def test_c_methods(self):
         return super().test_c_methods()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_correctly_quoted_string(self):
         return super().test_correctly_quoted_string()
 
@@ -210,7 +205,6 @@ def test_correctly_quoted_string(self):
     def test_in_band_buffers(self):
         return super().test_in_band_buffers()
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON
     def test_load_python2_str_as_bytes(self):
         return super().test_load_python2_str_as_bytes()
 
diff --git a/crates/vm/src/codecs.rs b/crates/vm/src/codecs.rs
index cdae4c2ba13..cca33eba2e1 100644
--- a/crates/vm/src/codecs.rs
+++ b/crates/vm/src/codecs.rs
@@ -220,10 +220,11 @@ impl CodecsRegistry {
     }
 
     pub(crate) fn register_manual(&self, name: &str, codec: PyCodec) -> PyResult<()> {
+        let name = normalize_encoding_name(name);
         self.inner
             .write()
             .search_cache
-            .insert(name.to_owned(), codec);
+            .insert(name.into_owned(), codec);
         Ok(())
     }
 
@@ -283,7 +284,9 @@ impl CodecsRegistry {
         vm: &VirtualMachine,
     ) -> PyResult {
         let codec = self.lookup(encoding, vm)?;
-        codec.encode(obj, errors, vm)
+        codec.encode(obj, errors, vm).inspect_err(|exc| {
+            Self::add_codec_note(exc, "encoding", encoding, vm);
+        })
     }
 
     pub fn decode(
@@ -294,7 +297,9 @@ impl CodecsRegistry {
         vm: &VirtualMachine,
     ) -> PyResult {
         let codec = self.lookup(encoding, vm)?;
-        codec.decode(obj, errors, vm)
+        codec.decode(obj, errors, vm).inspect_err(|exc| {
+            Self::add_codec_note(exc, "decoding", encoding, vm);
+        })
     }
 
     pub fn encode_text(
@@ -306,12 +311,15 @@ impl CodecsRegistry {
     ) -> PyResult<PyBytesRef> {
         let codec = self._lookup_text_encoding(encoding, "codecs.encode()", vm)?;
         codec
-            .encode(obj.into(), errors, vm)?
+            .encode(obj.into(), errors, vm)
+            .inspect_err(|exc| {
+                Self::add_codec_note(exc, "encoding", encoding, vm);
+            })?
             .downcast()
             .map_err(|obj| {
                 vm.new_type_error(format!(
                     "'{}' encoder returned '{}' instead of 'bytes'; use codecs.encode() to \
-                     encode arbitrary types",
+                     encode to arbitrary types",
                     encoding,
                     obj.class().name(),
                 ))
@@ -326,20 +334,55 @@ impl CodecsRegistry {
         vm: &VirtualMachine,
     ) -> PyResult<PyStrRef> {
         let codec = self._lookup_text_encoding(encoding, "codecs.decode()", vm)?;
-        codec.decode(obj, errors, vm)?.downcast().map_err(|obj| {
-            vm.new_type_error(format!(
-                "'{}' decoder returned '{}' instead of 'str'; use codecs.decode() \
-                 to encode arbitrary types",
-                encoding,
-                obj.class().name(),
-            ))
-        })
+        codec
+            .decode(obj, errors, vm)
+            .inspect_err(|exc| {
+                Self::add_codec_note(exc, "decoding", encoding, vm);
+            })?
+            .downcast()
+            .map_err(|obj| {
+                vm.new_type_error(format!(
+                    "'{}' decoder returned '{}' instead of 'str'; use codecs.decode() to \
+                 decode to arbitrary types",
+                    encoding,
+                    obj.class().name(),
+                ))
+            })
+    }
+
+    fn add_codec_note(
+        exc: &crate::builtins::PyBaseExceptionRef,
+        operation: &str,
+        encoding: &str,
+        vm: &VirtualMachine,
+    ) {
+        let note = format!("{operation} with '{encoding}' codec failed");
+        let _ = vm.call_method(exc.as_object(), "add_note", (vm.ctx.new_str(note),));
     }
 
     pub fn register_error(&self, name: String, handler: PyObjectRef) -> Option<PyObjectRef> {
         self.inner.write().errors.insert(name, handler)
     }
 
+    pub fn unregister_error(&self, name: &str, vm: &VirtualMachine) -> PyResult<bool> {
+        const BUILTIN_ERROR_HANDLERS: &[&str] = &[
+            "strict",
+            "ignore",
+            "replace",
+            "xmlcharrefreplace",
+            "backslashreplace",
+            "namereplace",
+            "surrogatepass",
+            "surrogateescape",
+        ];
+        if BUILTIN_ERROR_HANDLERS.contains(&name) {
+            return Err(vm.new_value_error(format!(
+                "cannot un-register built-in error handler '{name}'"
+            )));
+        }
+        Ok(self.inner.write().errors.remove(name).is_some())
+    }
+
     pub fn lookup_error_opt(&self, name: &str) -> Option<PyObjectRef> {
         self.inner.read().errors.get(name).cloned()
     }
@@ -351,19 +394,28 @@ impl CodecsRegistry {
 }
 
 fn normalize_encoding_name(encoding: &str) -> Cow<'_, str> {
-    if let Some(i) = encoding.find(|c: char| c == ' ' || c.is_ascii_uppercase()) {
-        let mut out = encoding.as_bytes().to_owned();
-        for byte in &mut out[i..] {
-            if *byte == b' ' {
-                *byte = b'-';
-            } else {
-                byte.make_ascii_lowercase();
+    // _Py_normalize_encoding: collapse non-alphanumeric/non-dot chars into
+    // single underscore, strip non-ASCII, lowercase ASCII letters.
+    let needs_transform = encoding
+        .bytes()
+        .any(|b| b.is_ascii_uppercase() || !b.is_ascii_alphanumeric() && b != b'.');
+    if !needs_transform {
+        return encoding.into();
+    }
+    let mut out = String::with_capacity(encoding.len());
+    let mut punct = false;
+    for c in encoding.chars() {
+        if c.is_ascii_alphanumeric() || c == '.' {
+            if punct && !out.is_empty() {
+                out.push('_');
             }
+            out.push(c.to_ascii_lowercase());
+            punct = false;
+        } else {
+            punct = true;
         }
-        String::from_utf8(out).unwrap().into()
-    } else {
-        encoding.into()
     }
+    out.into()
 }
 
 #[derive(Eq, PartialEq)]
@@ -416,7 +468,7 @@ impl StandardEncoding {
             } else {
                 None
             }
-        } else if encoding == "CP_UTF8" {
+        } else if encoding == "cp65001" {
             Some(Self::Utf8)
         } else {
             None
diff --git a/crates/vm/src/stdlib/codecs.rs b/crates/vm/src/stdlib/codecs.rs
index f1fdbf1bdcd..161876c965a 100644
--- a/crates/vm/src/stdlib/codecs.rs
+++ b/crates/vm/src/stdlib/codecs.rs
@@ -98,6 +98,21 @@ mod _codecs {
         vm.state.codec_registry.lookup_error(name.as_str(), vm)
     }
 
+    #[pyfunction]
+    fn _unregister_error(errors: PyStrRef, vm: &VirtualMachine) -> PyResult<bool> {
+        if errors.as_wtf8().as_bytes().contains(&0) {
+            return Err(cstring_error(vm));
+        }
+        if !errors.as_wtf8().is_utf8() {
+            return Err(vm.new_unicode_encode_error(
+                "'utf-8' codec can't encode character: surrogates not allowed".to_owned(),
+            ));
+        }
+        vm.state
+            .codec_registry
+            .unregister_error(errors.as_str(), vm)
+    }
+
     type EncodeResult = PyResult<(Vec<u8>, usize)>;
 
     #[derive(FromArgs)]