diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index d381bfe1e03..1f3fc4864cd 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -178,6 +178,7 @@ PYTHONHASHSEED PYTHONHOME PYTHONINSPECT PYTHONINTMAXSTRDIGITS +PYTHONNODEBUGRANGES PYTHONNOUSERSITE PYTHONOPTIMIZE PYTHONPATH diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 804cce1dba4..f2ef233a59a 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -425,8 +425,6 @@ def test_co_positions_artificial_instructions(self): ] ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges(self): # Make sure that if `-X no_debug_ranges` is used, there is # minimal debug info @@ -442,8 +440,6 @@ def f(): """) assert_python_ok('-X', 'no_debug_ranges', '-c', code) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_endline_and_columntable_none_when_no_debug_ranges_env(self): # Same as above but using the environment variable opt out. code = textwrap.dedent(""" diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 9d95903d526..f2ec8344b2f 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -218,8 +218,6 @@ def test_base_exception(self): lst = traceback.format_exception_only(e.__class__, e) self.assertEqual(lst, ['KeyboardInterrupt\n']) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_only_bad__str__(self): class X(Exception): def __str__(self): @@ -238,8 +236,6 @@ def test_format_exception_group_without_show_group(self): err = traceback.format_exception_only(eg) self.assertEqual(err, ['ExceptionGroup: A (1 sub-exception)\n']) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group(self): eg = ExceptionGroup('A', [ValueError('B')]) err = traceback.format_exception_only(eg, show_group=True) @@ -248,8 +244,6 @@ def test_format_exception_group(self): ' ValueError: B\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_base_exception_group(self): eg = BaseExceptionGroup('A', [BaseException('B')]) err = traceback.format_exception_only(eg, show_group=True) @@ -258,8 +252,6 @@ def test_format_base_exception_group(self): ' BaseException: B\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_with_note(self): exc = ValueError('B') exc.add_note('Note') @@ -271,8 +263,6 @@ def test_format_exception_group_with_note(self): ' Note\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_explicit_class(self): eg = ExceptionGroup('A', [ValueError('B')]) err = traceback.format_exception_only(ExceptionGroup, eg, show_group=True) @@ -281,8 +271,6 @@ def test_format_exception_group_explicit_class(self): ' ValueError: B\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_multiple_exceptions(self): eg = ExceptionGroup('A', [ValueError('B'), TypeError('C')]) err = traceback.format_exception_only(eg, show_group=True) @@ -292,8 +280,6 @@ def test_format_exception_group_multiple_exceptions(self): ' TypeError: C\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_multiline_messages(self): eg = ExceptionGroup('A\n1', [ValueError('B\n2')]) err = traceback.format_exception_only(eg, show_group=True) @@ -303,8 +289,6 @@ def test_format_exception_group_multiline_messages(self): ' 2\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_multiline2_messages(self): exc = ValueError('B\n\n2\n') exc.add_note('\nC\n\n3') @@ -323,8 +307,6 @@ def test_format_exception_group_multiline2_messages(self): ' IndexError: D\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_syntax_error(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) eg = ExceptionGroup('A\n1', [exc]) @@ -336,8 +318,6 @@ def test_format_exception_group_syntax_error(self): ' SyntaxError: error\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_nested_with_notes(self): exc = IndexError('D') exc.add_note('Note\nmultiline') @@ -358,8 +338,6 @@ def test_format_exception_group_nested_with_notes(self): ' TypeError: F\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_with_tracebacks(self): def f(): try: @@ -385,8 +363,6 @@ def g(): ' TypeError: g\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_with_cause(self): def f(): try: @@ -404,8 +380,6 @@ def f(): ' ValueError: 0\n', ]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_group_syntax_error_with_custom_values(self): # See https://github.com/python/cpython/issues/128894 for exc in [ @@ -550,16 +524,12 @@ def test_print_exception_exc(self): traceback.print_exception(Exception("projector"), file=output) self.assertEqual(output.getvalue(), "Exception: projector\n") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_print_last(self): with support.swap_attr(sys, 'last_exc', ValueError(42)): output = StringIO() traceback.print_last(file=output) self.assertEqual(output.getvalue(), "ValueError: 42\n") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_format_exception_exc(self): e = Exception("projector") output = traceback.format_exception(e) @@ -598,8 +568,6 @@ def test_exception_is_None(self): self.assertEqual( traceback.format_exception_only(None, None), [NONE_EXC_STRING]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_signatures(self): self.assertEqual( str(inspect.signature(traceback.print_exception)), @@ -2286,8 +2254,6 @@ def test_print_exception_bad_type_capi(self): 'Exception expected for value, int found\n') ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_print_exception_bad_type_python(self): msg = "Exception expected for value, int found" with self.assertRaisesRegex(TypeError, msg): @@ -2366,8 +2332,6 @@ def test_simple(self): self.assertTrue(lines[1].startswith(' File')) self.assertIn('1/0 # Marker', lines[2]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_cause(self): def inner_raise(): try: @@ -2501,8 +2465,6 @@ def test_message_none(self): err = self.get_report(Exception('')) self.assertIn('Exception\n', err) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_syntax_error_various_offsets(self): for offset in range(-5, 10): for add in [0, 2]: @@ -2525,8 +2487,6 @@ def test_syntax_error_various_offsets(self): exp = "\n".join(expected) self.assertEqual(exp, err) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_with_note(self): e = ValueError(123) vanilla = self.get_report(e) @@ -2545,8 +2505,6 @@ def test_exception_with_note(self): del e.__notes__ self.assertEqual(self.get_report(e), vanilla) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_with_invalid_notes(self): e = ValueError(123) vanilla = self.get_report(e) @@ -2604,8 +2562,6 @@ def __getattr__(self, name): self.get_report(e), vanilla + "Ignored error getting __notes__: ValueError('no __notes__')\n") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_with_multiple_notes(self): for e in [ValueError(42), SyntaxError('bad syntax')]: with self.subTest(e=e): @@ -2685,8 +2641,6 @@ def __str__(self): exp = f'.{X.__qualname__}: I am X\n' self.assertEqual(exp, err) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_bad__str__(self): class X(Exception): def __str__(self): @@ -2861,8 +2815,6 @@ def exc(): report = self.get_report(exc) self.assertEqual(report, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_group_width_limit(self): excs = [] for i in range(1000): @@ -2907,8 +2859,6 @@ def test_exception_group_width_limit(self): report = self.get_report(eg) self.assertEqual(report, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_group_depth_limit(self): exc = TypeError('bad type') for i in range(1000): @@ -3371,8 +3321,6 @@ def test_basics(self): self.assertNotEqual(f, object()) self.assertEqual(f, ALWAYS_EQ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_lazy_lines(self): linecache.clearcache() f = traceback.FrameSummary("f", 1, "dummy", lookup_line=False) @@ -3509,8 +3457,6 @@ def some_inner(): s.format(), [f'{__file__}:{some_inner.__code__.co_firstlineno + 1}']) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_dropping_frames(self): def f(): 1/0 @@ -3611,13 +3557,9 @@ def do_test_smoke(self, exc, expected_type_str): self.assertEqual(expected_type_str, exc.exc_type_str) self.assertEqual(str(exc_obj), str(exc)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_smoke_builtin(self): self.do_test_smoke(ValueError(42), 'ValueError') - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_smoke_user_exception(self): class MyException(Exception): pass @@ -3630,8 +3572,6 @@ class MyException(Exception): 'test_smoke_user_exception..MyException') self.do_test_smoke(MyException('bad things happened'), expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_from_exception(self): # Check all the parameters are accepted. def foo(): @@ -3657,8 +3597,6 @@ def foo(): self.assertEqual(type(exc_obj).__name__, exc.exc_type_str) self.assertEqual(str(exc_obj), str(exc)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_cause(self): try: try: @@ -3683,8 +3621,6 @@ def test_cause(self): self.assertEqual(type(exc_obj).__name__, exc.exc_type_str) self.assertEqual(str(exc_obj), str(exc)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_context(self): try: try: @@ -3734,8 +3670,6 @@ def f(): self.assertIn( "RecursionError: maximum recursion depth exceeded", res[-1]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_compact_with_cause(self): try: try: @@ -3758,8 +3692,6 @@ def test_compact_with_cause(self): self.assertEqual(type(exc_obj).__name__, exc.exc_type_str) self.assertEqual(str(exc_obj), str(exc)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_compact_no_cause(self): try: try: @@ -3782,8 +3714,6 @@ def test_compact_no_cause(self): self.assertEqual(type(exc_obj).__name__, exc.exc_type_str) self.assertEqual(str(exc_obj), str(exc)) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_no_save_exc_type(self): try: 1/0 @@ -3912,8 +3842,6 @@ def test_lookup_lines(self): linecache.updatecache('/foo.py', globals()) self.assertEqual(exc.stack[0].line, "import sys") - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_locals(self): linecache.updatecache('/foo.py', globals()) e = Exception("uh oh") @@ -3962,8 +3890,6 @@ def f(): 'ZeroDivisionError: division by zero', '']) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_dont_swallow_cause_or_context_of_falsey_exception(self): # see gh-132308: Ensure that __cause__ or __context__ attributes of exceptions # that evaluate as falsey are included in the output. For falsey term, @@ -4034,8 +3960,6 @@ def test_exception_group_format_exception_only(self): self.assertEqual(formatted, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_exception_group_format_exception_onlyi_recursive(self): teg = traceback.TracebackException.from_exception(self.eg) formatted = ''.join(teg.format_exception_only(show_group=True)).split('\n') @@ -4100,8 +4024,6 @@ def test_exception_group_format(self): self.assertEqual(formatted, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_max_group_width(self): excs1 = [] excs2 = [] @@ -4140,8 +4062,6 @@ def test_max_group_width(self): self.assertEqual(formatted, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_max_group_depth(self): exc = TypeError('bad type') for i in range(3): @@ -4191,8 +4111,6 @@ def test_comparison(self): self.assertNotEqual(exc, object()) self.assertEqual(exc, ALWAYS_EQ) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_dont_swallow_subexceptions_of_falsey_exceptiongroup(self): # see gh-132308: Ensure that subexceptions of exception groups # that evaluate as falsey are displayed in the output. For falsey term, @@ -4230,8 +4148,6 @@ def callable(): ) return result_lines[0] - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getattr_suggestions(self): class Substitution: noise = more_noise = a = bc = None @@ -4275,8 +4191,6 @@ class CaseChangeOverSubstitution: actual = self.get_suggestion(cls(), 'bluch') self.assertIn(suggestion, actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getattr_suggestions_underscored(self): class A: bluch = None @@ -4330,8 +4244,6 @@ class A: actual = self.get_suggestion(A(), 'bluch') self.assertNotIn("blech", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getattr_suggestions_no_args(self): class A: blech = None @@ -4349,8 +4261,6 @@ def __getattr__(self, attr): actual = self.get_suggestion(A(), 'bluch') self.assertIn("blech", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_getattr_suggestions_invalid_args(self): class NonStringifyClass: __str__ = None @@ -4392,8 +4302,6 @@ def __dir__(self): self.assertNotIn("blech", actual) self.assertNotIn("oh no!", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_attribute_error_with_non_string_candidates(self): class T: bluch = 1 @@ -4567,8 +4475,6 @@ def raise_attribute_error_with_bad_name(): ) self.assertNotIn("?", result_lines[-1]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_suggestions(self): def Substitution(): noise = more_noise = a = bc = None @@ -4609,24 +4515,18 @@ def EliminationOverAddition(): actual = self.get_suggestion(func) self.assertIn(suggestion, actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_suggestions_from_globals(self): def func(): print(global_for_suggestio) actual = self.get_suggestion(func) self.assertIn("'global_for_suggestions'?", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_suggestions_from_builtins(self): def func(): print(ZeroDivisionErrrrr) actual = self.get_suggestion(func) self.assertIn("'ZeroDivisionError'?", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_suggestions_from_builtins_when_builtins_is_module(self): def func(): custom_globals = globals().copy() @@ -4635,8 +4535,6 @@ def func(): actual = self.get_suggestion(func) self.assertIn("'ZeroDivisionError'?", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_suggestions_with_non_string_candidates(self): def func(): abc = 1 @@ -4785,8 +4683,6 @@ def func(): actual = self.get_suggestion(func) self.assertNotIn("blech", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_with_instance(self): class A: def __init__(self): @@ -4843,8 +4739,6 @@ def func(): actual = self.get_suggestion(func) self.assertNotIn("something", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_for_stdlib_modules(self): def func(): stream = io.StringIO() @@ -4852,8 +4746,6 @@ def func(): actual = self.get_suggestion(func) self.assertIn("forget to import 'io'", actual) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_name_error_for_private_stdlib_modules(self): def func(): stream = _io.StringIO() @@ -4898,8 +4790,6 @@ def test_all(self): expected.add(name) self.assertCountEqual(traceback.__all__, expected) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_levenshtein_distance(self): # copied from _testinternalcapi.test_edit_cost # to also exercise the Python implementation diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py index b291d530169..b9c1fd6134e 100644 --- a/Lib/test/test_zipimport.py +++ b/Lib/test/test_zipimport.py @@ -724,6 +724,8 @@ def doTraceback(self, module): else: raise AssertionError("This ought to be impossible") + # TODO: RUSTPYTHON; empty caret lines from equal col/end_col + @unittest.expectedFailure def testTraceback(self): files = {TESTMOD + ".py": (NOW, raise_src)} self.doTest(None, files, TESTMOD, call=self.doTraceback) diff --git a/Lib/traceback.py b/Lib/traceback.py index d6a010f4157..572a3177cb0 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -1,9 +1,14 @@ """Extract, format and print information about Python stack traces.""" -import collections +import collections.abc import itertools import linecache import sys +import textwrap +import warnings +from contextlib import suppress +import _colorize +from _colorize import ANSIColors __all__ = ['extract_stack', 'extract_tb', 'format_exception', 'format_exception_only', 'format_list', 'format_stack', @@ -16,6 +21,7 @@ # Formatting and printing lists of traceback lines. # + def print_list(extracted_list, file=None): """Print the list of tuples as returned by extract_tb() or extract_stack() as a formatted stack trace to the given file.""" @@ -69,7 +75,8 @@ def extract_tb(tb, limit=None): trace. The line is a string with leading and trailing whitespace stripped; if the source is not available it is None. """ - return StackSummary.extract(walk_tb(tb), limit=limit) + return StackSummary._extract_from_extended_frame_gen( + _walk_tb_with_full_positions(tb), limit=limit) # # Exception formatting and output. @@ -95,14 +102,18 @@ def _parse_value_tb(exc, value, tb): raise ValueError("Both or neither of value and tb must be given") if value is tb is _sentinel: if exc is not None: - return exc, exc.__traceback__ + if isinstance(exc, BaseException): + return exc, exc.__traceback__ + + raise TypeError(f'Exception expected for value, ' + f'{type(exc).__name__} found') else: return None, None return value, tb def print_exception(exc, /, value=_sentinel, tb=_sentinel, limit=None, \ - file=None, chain=True): + file=None, chain=True, **kwargs): """Print exception up to 'limit' stack trace entries from 'tb' to 'file'. This differs from print_tb() in the following ways: (1) if @@ -113,16 +124,23 @@ def print_exception(exc, /, value=_sentinel, tb=_sentinel, limit=None, \ occurred with a caret on the next line indicating the approximate position of the error. """ + colorize = kwargs.get("colorize", False) value, tb = _parse_value_tb(exc, value, tb) - if file is None: - file = sys.stderr te = TracebackException(type(value), value, tb, limit=limit, compact=True) - for line in te.format(chain=chain): - print(line, file=file, end="") + te.print(file=file, chain=chain, colorize=colorize) + + +BUILTIN_EXCEPTION_LIMIT = object() + + +def _print_exception_bltin(exc, /): + file = sys.stderr if sys.stderr is not None else sys.__stderr__ + colorize = _colorize.can_colorize(file=file) + return print_exception(exc, limit=BUILTIN_EXCEPTION_LIMIT, file=file, colorize=colorize) def format_exception(exc, /, value=_sentinel, tb=_sentinel, limit=None, \ - chain=True): + chain=True, **kwargs): """Format a stack trace and the exception information. The arguments have the same meaning as the corresponding arguments @@ -131,64 +149,79 @@ def format_exception(exc, /, value=_sentinel, tb=_sentinel, limit=None, \ these lines are concatenated and printed, exactly the same text is printed as does print_exception(). """ + colorize = kwargs.get("colorize", False) value, tb = _parse_value_tb(exc, value, tb) te = TracebackException(type(value), value, tb, limit=limit, compact=True) - return list(te.format(chain=chain)) + return list(te.format(chain=chain, colorize=colorize)) -def format_exception_only(exc, /, value=_sentinel): +def format_exception_only(exc, /, value=_sentinel, *, show_group=False, **kwargs): """Format the exception part of a traceback. The return value is a list of strings, each ending in a newline. - Normally, the list contains a single string; however, for - SyntaxError exceptions, it contains several lines that (when - printed) display detailed information about where the syntax - error occurred. - - The message indicating which exception occurred is always the last - string in the list. + The list contains the exception's message, which is + normally a single string; however, for :exc:`SyntaxError` exceptions, it + contains several lines that (when printed) display detailed information + about where the syntax error occurred. Following the message, the list + contains the exception's ``__notes__``. + When *show_group* is ``True``, and the exception is an instance of + :exc:`BaseExceptionGroup`, the nested exceptions are included as + well, recursively, with indentation relative to their nesting depth. """ + colorize = kwargs.get("colorize", False) if value is _sentinel: value = exc te = TracebackException(type(value), value, None, compact=True) - return list(te.format_exception_only()) + return list(te.format_exception_only(show_group=show_group, colorize=colorize)) # -- not official API but folk probably use these two functions. -def _format_final_exc_line(etype, value): - valuestr = _some_str(value) - if value is None or not valuestr: - line = "%s\n" % etype +def _format_final_exc_line(etype, value, *, insert_final_newline=True, colorize=False): + valuestr = _safe_string(value, 'exception') + end_char = "\n" if insert_final_newline else "" + if colorize: + if value is None or not valuestr: + line = f"{ANSIColors.BOLD_MAGENTA}{etype}{ANSIColors.RESET}{end_char}" + else: + line = f"{ANSIColors.BOLD_MAGENTA}{etype}{ANSIColors.RESET}: {ANSIColors.MAGENTA}{valuestr}{ANSIColors.RESET}{end_char}" else: - line = "%s: %s\n" % (etype, valuestr) + if value is None or not valuestr: + line = f"{etype}{end_char}" + else: + line = f"{etype}: {valuestr}{end_char}" return line -def _some_str(value): + +def _safe_string(value, what, func=str): try: - return str(value) + return func(value) except: - return '' % type(value).__name__ + return f'<{what} {func.__name__}() failed>' # -- def print_exc(limit=None, file=None, chain=True): - """Shorthand for 'print_exception(*sys.exc_info(), limit, file)'.""" - print_exception(*sys.exc_info(), limit=limit, file=file, chain=chain) + """Shorthand for 'print_exception(sys.exception(), limit=limit, file=file, chain=chain)'.""" + print_exception(sys.exception(), limit=limit, file=file, chain=chain) def format_exc(limit=None, chain=True): """Like print_exc() but return a string.""" - return "".join(format_exception(*sys.exc_info(), limit=limit, chain=chain)) + return "".join(format_exception(sys.exception(), limit=limit, chain=chain)) def print_last(limit=None, file=None, chain=True): - """This is a shorthand for 'print_exception(sys.last_type, - sys.last_value, sys.last_traceback, limit, file)'.""" - if not hasattr(sys, "last_type"): + """This is a shorthand for 'print_exception(sys.last_exc, limit=limit, file=file, chain=chain)'.""" + if not hasattr(sys, "last_exc") and not hasattr(sys, "last_type"): raise ValueError("no last exception") - print_exception(sys.last_type, sys.last_value, sys.last_traceback, - limit, file, chain) + + if hasattr(sys, "last_exc"): + print_exception(sys.last_exc, limit=limit, file=file, chain=chain) + else: + print_exception(sys.last_type, sys.last_value, sys.last_traceback, + limit=limit, file=file, chain=chain) + # # Printing and Extracting Stacks. @@ -241,7 +274,7 @@ def clear_frames(tb): class FrameSummary: - """A single frame from a traceback. + """Information about a single frame from a traceback. - :attr:`filename` The filename for the frame. - :attr:`lineno` The line within filename for the frame that was @@ -254,10 +287,12 @@ class FrameSummary: mapping the name to the repr() of the variable. """ - __slots__ = ('filename', 'lineno', 'name', '_line', 'locals') + __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno', + 'name', '_lines', '_lines_dedented', 'locals', '_code') def __init__(self, filename, lineno, name, *, lookup_line=True, - locals=None, line=None): + locals=None, line=None, + end_lineno=None, colno=None, end_colno=None, **kwargs): """Construct a FrameSummary. :param lookup_line: If True, `linecache` is consulted for the source @@ -269,11 +304,17 @@ def __init__(self, filename, lineno, name, *, lookup_line=True, """ self.filename = filename self.lineno = lineno + self.end_lineno = lineno if end_lineno is None else end_lineno + self.colno = colno + self.end_colno = end_colno self.name = name - self._line = line + self._code = kwargs.get("_code") + self._lines = line + self._lines_dedented = None if lookup_line: self.line - self.locals = {k: repr(v) for k, v in locals.items()} if locals else None + self.locals = {k: _safe_string(v, 'local', func=repr) + for k, v in locals.items()} if locals else None def __eq__(self, other): if isinstance(other, FrameSummary): @@ -298,13 +339,43 @@ def __repr__(self): def __len__(self): return 4 + def _set_lines(self): + if ( + self._lines is None + and self.lineno is not None + and self.end_lineno is not None + ): + lines = [] + for lineno in range(self.lineno, self.end_lineno + 1): + # treat errors (empty string) and empty lines (newline) as the same + line = linecache.getline(self.filename, lineno).rstrip() + if not line and self._code is not None and self.filename.startswith("<"): + line = linecache._getline_from_code(self._code, lineno).rstrip() + lines.append(line) + self._lines = "\n".join(lines) + "\n" + + @property + def _original_lines(self): + # Returns the line as-is from the source, without modifying whitespace. + self._set_lines() + return self._lines + + @property + def _dedented_lines(self): + # Returns _original_lines, but dedented + self._set_lines() + if self._lines_dedented is None and self._lines is not None: + self._lines_dedented = textwrap.dedent(self._lines) + return self._lines_dedented + @property def line(self): - if self._line is None: - if self.lineno is None: - return None - self._line = linecache.getline(self.filename, self.lineno) - return self._line.strip() + self._set_lines() + if self._lines is None: + return None + # return only the first line, stripped + return self._lines.partition("\n")[0].strip() + def walk_stack(f): """Walk a stack yielding the frame and line number for each frame. @@ -313,7 +384,7 @@ def walk_stack(f): current stack is used. Usually used with StackSummary.extract. """ if f is None: - f = sys._getframe().f_back.f_back + f = sys._getframe().f_back.f_back.f_back.f_back while f is not None: yield f, f.f_lineno f = f.f_back @@ -330,18 +401,40 @@ def walk_tb(tb): tb = tb.tb_next +def _walk_tb_with_full_positions(tb): + # Internal version of walk_tb that yields full code positions including + # end line and column information. + while tb is not None: + positions = _get_code_position(tb.tb_frame.f_code, tb.tb_lasti) + # Yield tb_lineno when co_positions does not have a line number to + # maintain behavior with walk_tb. + if positions[0] is None: + yield tb.tb_frame, (tb.tb_lineno, ) + positions[1:] + else: + yield tb.tb_frame, positions + tb = tb.tb_next + + +def _get_code_position(code, instruction_index): + if instruction_index < 0: + return (None, None, None, None) + positions_gen = code.co_positions() + return next(itertools.islice(positions_gen, instruction_index // 2, None)) + + _RECURSIVE_CUTOFF = 3 # Also hardcoded in traceback.c. + class StackSummary(list): - """A stack of frames.""" + """A list of FrameSummary objects, representing a stack of frames.""" @classmethod def extract(klass, frame_gen, *, limit=None, lookup_lines=True, capture_locals=False): """Create a StackSummary from a traceback or stack object. - :param frame_gen: A generator that yields (frame, lineno) tuples to - include in the stack. + :param frame_gen: A generator that yields (frame, lineno) tuples + whose summaries are to be included in the stack. :param limit: None to include all frames or the number of frames to include. :param lookup_lines: If True, lookup lines for each frame immediately, @@ -349,23 +442,41 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True, :param capture_locals: If True, the local variables from each frame will be captured as object representations into the FrameSummary. """ - if limit is None: + def extended_frame_gen(): + for f, lineno in frame_gen: + yield f, (lineno, None, None, None) + + return klass._extract_from_extended_frame_gen( + extended_frame_gen(), limit=limit, lookup_lines=lookup_lines, + capture_locals=capture_locals) + + @classmethod + def _extract_from_extended_frame_gen(klass, frame_gen, *, limit=None, + lookup_lines=True, capture_locals=False): + # Same as extract but operates on a frame generator that yields + # (frame, (lineno, end_lineno, colno, end_colno)) in the stack. + # Only lineno is required, the remaining fields can be None if the + # information is not available. + builtin_limit = limit is BUILTIN_EXCEPTION_LIMIT + if limit is None or builtin_limit: limit = getattr(sys, 'tracebacklimit', None) if limit is not None and limit < 0: limit = 0 if limit is not None: - if limit >= 0: + if builtin_limit: + frame_gen = tuple(frame_gen) + frame_gen = frame_gen[len(frame_gen) - limit:] + elif limit >= 0: frame_gen = itertools.islice(frame_gen, limit) else: frame_gen = collections.deque(frame_gen, maxlen=-limit) result = klass() fnames = set() - for f, lineno in frame_gen: + for f, (lineno, end_lineno, colno, end_colno) in frame_gen: co = f.f_code filename = co.co_filename name = co.co_name - fnames.add(filename) linecache.lazycache(filename, f.f_globals) # Must defer line lookups until we have called checkcache. @@ -373,10 +484,16 @@ def extract(klass, frame_gen, *, limit=None, lookup_lines=True, f_locals = f.f_locals else: f_locals = None - result.append(FrameSummary( - filename, lineno, name, lookup_line=False, locals=f_locals)) + result.append( + FrameSummary(filename, lineno, name, + lookup_line=False, locals=f_locals, + end_lineno=end_lineno, colno=colno, end_colno=end_colno, + _code=f.f_code, + ) + ) for filename in fnames: linecache.checkcache(filename) + # If immediate lookup was desired, trigger lookups now. if lookup_lines: for f in result: @@ -402,7 +519,223 @@ def from_list(klass, a_list): result.append(FrameSummary(filename, lineno, name, line=line)) return result - def format(self): + def format_frame_summary(self, frame_summary, **kwargs): + """Format the lines for a single FrameSummary. + + Returns a string representing one frame involved in the stack. This + gets called for every frame to be printed in the stack summary. + """ + colorize = kwargs.get("colorize", False) + row = [] + filename = frame_summary.filename + if frame_summary.filename.startswith("'): + filename = "" + if colorize: + row.append(' File {}"{}"{}, line {}{}{}, in {}{}{}\n'.format( + ANSIColors.MAGENTA, + filename, + ANSIColors.RESET, + ANSIColors.MAGENTA, + frame_summary.lineno, + ANSIColors.RESET, + ANSIColors.MAGENTA, + frame_summary.name, + ANSIColors.RESET, + ) + ) + else: + row.append(' File "{}", line {}, in {}\n'.format( + filename, frame_summary.lineno, frame_summary.name)) + if frame_summary._dedented_lines and frame_summary._dedented_lines.strip(): + if ( + frame_summary.colno is None or + frame_summary.end_colno is None + ): + # only output first line if column information is missing + row.append(textwrap.indent(frame_summary.line, ' ') + "\n") + else: + # get first and last line + all_lines_original = frame_summary._original_lines.splitlines() + first_line = all_lines_original[0] + # assume all_lines_original has enough lines (since we constructed it) + last_line = all_lines_original[frame_summary.end_lineno - frame_summary.lineno] + + # character index of the start/end of the instruction + start_offset = _byte_offset_to_character_offset(first_line, frame_summary.colno) + end_offset = _byte_offset_to_character_offset(last_line, frame_summary.end_colno) + + all_lines = frame_summary._dedented_lines.splitlines()[ + :frame_summary.end_lineno - frame_summary.lineno + 1 + ] + + # adjust start/end offset based on dedent + dedent_characters = len(first_line) - len(all_lines[0]) + start_offset = max(0, start_offset - dedent_characters) + end_offset = max(0, end_offset - dedent_characters) + + # When showing this on a terminal, some of the non-ASCII characters + # might be rendered as double-width characters, so we need to take + # that into account when calculating the length of the line. + dp_start_offset = _display_width(all_lines[0], offset=start_offset) + dp_end_offset = _display_width(all_lines[-1], offset=end_offset) + + # get exact code segment corresponding to the instruction + segment = "\n".join(all_lines) + segment = segment[start_offset:len(segment) - (len(all_lines[-1]) - end_offset)] + + # attempt to parse for anchors + anchors = None + show_carets = False + with suppress(Exception): + anchors = _extract_caret_anchors_from_line_segment(segment) + show_carets = self._should_show_carets(start_offset, end_offset, all_lines, anchors) + + result = [] + + # only display first line, last line, and lines around anchor start/end + significant_lines = {0, len(all_lines) - 1} + + anchors_left_end_offset = 0 + anchors_right_start_offset = 0 + primary_char = "^" + secondary_char = "^" + if anchors: + anchors_left_end_offset = anchors.left_end_offset + anchors_right_start_offset = anchors.right_start_offset + # computed anchor positions do not take start_offset into account, + # so account for it here + if anchors.left_end_lineno == 0: + anchors_left_end_offset += start_offset + if anchors.right_start_lineno == 0: + anchors_right_start_offset += start_offset + + # account for display width + anchors_left_end_offset = _display_width( + all_lines[anchors.left_end_lineno], offset=anchors_left_end_offset + ) + anchors_right_start_offset = _display_width( + all_lines[anchors.right_start_lineno], offset=anchors_right_start_offset + ) + + primary_char = anchors.primary_char + secondary_char = anchors.secondary_char + significant_lines.update( + range(anchors.left_end_lineno - 1, anchors.left_end_lineno + 2) + ) + significant_lines.update( + range(anchors.right_start_lineno - 1, anchors.right_start_lineno + 2) + ) + + # remove bad line numbers + significant_lines.discard(-1) + significant_lines.discard(len(all_lines)) + + def output_line(lineno): + """output all_lines[lineno] along with carets""" + result.append(all_lines[lineno] + "\n") + if not show_carets: + return + num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip()) + carets = [] + num_carets = dp_end_offset if lineno == len(all_lines) - 1 else _display_width(all_lines[lineno]) + # compute caret character for each position + for col in range(num_carets): + if col < num_spaces or (lineno == 0 and col < dp_start_offset): + # before first non-ws char of the line, or before start of instruction + carets.append(' ') + elif anchors and ( + lineno > anchors.left_end_lineno or + (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset) + ) and ( + lineno < anchors.right_start_lineno or + (lineno == anchors.right_start_lineno and col < anchors_right_start_offset) + ): + # within anchors + carets.append(secondary_char) + else: + carets.append(primary_char) + if colorize: + # Replace the previous line with a red version of it only in the parts covered + # by the carets. + line = result[-1] + colorized_line_parts = [] + colorized_carets_parts = [] + + for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]): + caret_group = list(group) + if color == "^": + colorized_line_parts.append(ANSIColors.BOLD_RED + "".join(char for char, _ in caret_group) + ANSIColors.RESET) + colorized_carets_parts.append(ANSIColors.BOLD_RED + "".join(caret for _, caret in caret_group) + ANSIColors.RESET) + elif color == "~": + colorized_line_parts.append(ANSIColors.RED + "".join(char for char, _ in caret_group) + ANSIColors.RESET) + colorized_carets_parts.append(ANSIColors.RED + "".join(caret for _, caret in caret_group) + ANSIColors.RESET) + else: + colorized_line_parts.append("".join(char for char, _ in caret_group)) + colorized_carets_parts.append("".join(caret for _, caret in caret_group)) + + colorized_line = "".join(colorized_line_parts) + colorized_carets = "".join(colorized_carets_parts) + result[-1] = colorized_line + result.append(colorized_carets + "\n") + else: + result.append("".join(carets) + "\n") + + # display significant lines + sig_lines_list = sorted(significant_lines) + for i, lineno in enumerate(sig_lines_list): + if i: + linediff = lineno - sig_lines_list[i - 1] + if linediff == 2: + # 1 line in between - just output it + output_line(lineno - 1) + elif linediff > 2: + # > 1 line in between - abbreviate + result.append(f"...<{linediff - 1} lines>...\n") + output_line(lineno) + + row.append( + textwrap.indent(textwrap.dedent("".join(result)), ' ', lambda line: True) + ) + if frame_summary.locals: + for name, value in sorted(frame_summary.locals.items()): + row.append(' {name} = {value}\n'.format(name=name, value=value)) + + return ''.join(row) + + def _should_show_carets(self, start_offset, end_offset, all_lines, anchors): + with suppress(SyntaxError, ImportError): + import ast + tree = ast.parse('\n'.join(all_lines)) + if not tree.body: + return False + statement = tree.body[0] + value = None + def _spawns_full_line(value): + return ( + value.lineno == 1 + and value.end_lineno == len(all_lines) + and value.col_offset == start_offset + and value.end_col_offset == end_offset + ) + match statement: + case ast.Return(value=ast.Call()): + if isinstance(statement.value.func, ast.Name): + value = statement.value + case ast.Assign(value=ast.Call()): + if ( + len(statement.targets) == 1 and + isinstance(statement.targets[0], ast.Name) + ): + value = statement.value + if value is not None and _spawns_full_line(value): + return False + if anchors: + return True + if all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip(): + return True + return False + + def format(self, **kwargs): """Format the stack ready for printing. Returns a list of strings ready for printing. Each string in the @@ -414,37 +747,34 @@ def format(self): repetitions are shown, followed by a summary line stating the exact number of further repetitions. """ + colorize = kwargs.get("colorize", False) result = [] last_file = None last_line = None last_name = None count = 0 - for frame in self: - if (last_file is None or last_file != frame.filename or - last_line is None or last_line != frame.lineno or - last_name is None or last_name != frame.name): + for frame_summary in self: + formatted_frame = self.format_frame_summary(frame_summary, colorize=colorize) + if formatted_frame is None: + continue + if (last_file is None or last_file != frame_summary.filename or + last_line is None or last_line != frame_summary.lineno or + last_name is None or last_name != frame_summary.name): if count > _RECURSIVE_CUTOFF: count -= _RECURSIVE_CUTOFF result.append( f' [Previous line repeated {count} more ' f'time{"s" if count > 1 else ""}]\n' ) - last_file = frame.filename - last_line = frame.lineno - last_name = frame.name + last_file = frame_summary.filename + last_line = frame_summary.lineno + last_name = frame_summary.name count = 0 count += 1 if count > _RECURSIVE_CUTOFF: continue - row = [] - row.append(' File "{}", line {}, in {}\n'.format( - frame.filename, frame.lineno, frame.name)) - if frame.line: - row.append(' {}\n'.format(frame.line.strip())) - if frame.locals: - for name, value in sorted(frame.locals.items()): - row.append(' {name} = {value}\n'.format(name=name, value=value)) - result.append(''.join(row)) + result.append(formatted_frame) + if count > _RECURSIVE_CUTOFF: count -= _RECURSIVE_CUTOFF result.append( @@ -454,6 +784,216 @@ def format(self): return result +def _byte_offset_to_character_offset(str, offset): + as_utf8 = str.encode('utf-8') + return len(as_utf8[:offset].decode("utf-8", errors="replace")) + + +_Anchors = collections.namedtuple( + "_Anchors", + [ + "left_end_lineno", + "left_end_offset", + "right_start_lineno", + "right_start_offset", + "primary_char", + "secondary_char", + ], + defaults=["~", "^"] +) + +def _extract_caret_anchors_from_line_segment(segment): + """ + Given source code `segment` corresponding to a FrameSummary, determine: + - for binary ops, the location of the binary op + - for indexing and function calls, the location of the brackets. + `segment` is expected to be a valid Python expression. + """ + import ast + + try: + # Without parentheses, `segment` is parsed as a statement. + # Binary ops, subscripts, and calls are expressions, so + # we can wrap them with parentheses to parse them as + # (possibly multi-line) expressions. + # e.g. if we try to highlight the addition in + # x = ( + # a + + # b + # ) + # then we would ast.parse + # a + + # b + # which is not a valid statement because of the newline. + # Adding brackets makes it a valid expression. + # ( + # a + + # b + # ) + # Line locations will be different than the original, + # which is taken into account later on. + tree = ast.parse(f"(\n{segment}\n)") + except SyntaxError: + return None + + if len(tree.body) != 1: + return None + + lines = segment.splitlines() + + def normalize(lineno, offset): + """Get character index given byte offset""" + return _byte_offset_to_character_offset(lines[lineno], offset) + + def next_valid_char(lineno, col): + """Gets the next valid character index in `lines`, if + the current location is not valid. Handles empty lines. + """ + while lineno < len(lines) and col >= len(lines[lineno]): + col = 0 + lineno += 1 + assert lineno < len(lines) and col < len(lines[lineno]) + return lineno, col + + def increment(lineno, col): + """Get the next valid character index in `lines`.""" + col += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + def nextline(lineno, col): + """Get the next valid character at least on the next line""" + col = 0 + lineno += 1 + lineno, col = next_valid_char(lineno, col) + return lineno, col + + def increment_until(lineno, col, stop): + """Get the next valid non-"\\#" character that satisfies the `stop` predicate""" + while True: + ch = lines[lineno][col] + if ch in "\\#": + lineno, col = nextline(lineno, col) + elif not stop(ch): + lineno, col = increment(lineno, col) + else: + break + return lineno, col + + def setup_positions(expr, force_valid=True): + """Get the lineno/col position of the end of `expr`. If `force_valid` is True, + forces the position to be a valid character (e.g. if the position is beyond the + end of the line, move to the next line) + """ + # -2 since end_lineno is 1-indexed and because we added an extra + # bracket + newline to `segment` when calling ast.parse + lineno = expr.end_lineno - 2 + col = normalize(lineno, expr.end_col_offset) + return next_valid_char(lineno, col) if force_valid else (lineno, col) + + statement = tree.body[0] + match statement: + case ast.Expr(expr): + match expr: + case ast.BinOp(): + # ast gives these locations for BinOp subexpressions + # ( left_expr ) + ( right_expr ) + # left^^^^^ right^^^^^ + lineno, col = setup_positions(expr.left) + + # First operator character is the first non-space/')' character + lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')') + + # binary op is 1 or 2 characters long, on the same line, + # before the right subexpression + right_col = col + 1 + if ( + right_col < len(lines[lineno]) + and ( + # operator char should not be in the right subexpression + expr.right.lineno - 2 > lineno or + right_col < normalize(expr.right.lineno - 2, expr.right.col_offset) + ) + and not (ch := lines[lineno][right_col]).isspace() + and ch not in "\\#" + ): + right_col += 1 + + # right_col can be invalid since it is exclusive + return _Anchors(lineno, col, lineno, right_col) + case ast.Subscript(): + # ast gives these locations for value and slice subexpressions + # ( value_expr ) [ slice_expr ] + # value^^^^^ slice^^^^^ + # subscript^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.value) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) + case ast.Call(): + # ast gives these locations for function call expressions + # ( func_expr ) (args, kwargs) + # func^^^^^ + # call^^^^^^^^^^^^^^^^^^^^^^^^ + + # find left bracket + left_lineno, left_col = setup_positions(expr.func) + left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(') + # find right bracket (final character of expression) + right_lineno, right_col = setup_positions(expr, force_valid=False) + return _Anchors(left_lineno, left_col, right_lineno, right_col) + + return None + +_WIDE_CHAR_SPECIFIERS = "WF" + +def _display_width(line, offset=None): + """Calculate the extra amount of width space the given source + code segment might take if it were to be displayed on a fixed + width output device. Supports wide unicode characters and emojis.""" + + if offset is None: + offset = len(line) + + # Fast track for ASCII-only strings + if line.isascii(): + return offset + + import unicodedata + + return sum( + 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 + for char in line[:offset] + ) + + + +class _ExceptionPrintContext: + def __init__(self): + self.seen = set() + self.exception_group_depth = 0 + self.need_close = False + + def indent(self): + return ' ' * (2 * self.exception_group_depth) + + def emit(self, text_gen, margin_char=None): + if margin_char is None: + margin_char = '|' + indent_str = self.indent() + if self.exception_group_depth: + indent_str += margin_char + ' ' + + if isinstance(text_gen, str): + yield textwrap.indent(text_gen, indent_str, lambda line: True) + else: + for text in text_gen: + yield textwrap.indent(text, indent_str, lambda line: True) + + class TracebackException: """An exception ready for rendering. @@ -461,16 +1001,24 @@ class TracebackException: to this intermediary form to ensure that no references are held, while still being able to fully print or format it. + max_group_width and max_group_depth control the formatting of exception + groups. The depth refers to the nesting level of the group, and the width + refers to the size of a single exception group's exceptions array. The + formatted output is truncated when either limit is exceeded. + Use `from_exception` to create TracebackException instances from exception objects, or the constructor to create TracebackException instances from individual components. - :attr:`__cause__` A TracebackException of the original *__cause__*. - :attr:`__context__` A TracebackException of the original *__context__*. + - :attr:`exceptions` For exception groups - a list of TracebackException + instances for the nested *exceptions*. ``None`` for other exceptions. - :attr:`__suppress_context__` The *__suppress_context__* value from the original exception. - :attr:`stack` A `StackSummary` representing the traceback. - - :attr:`exc_type` The class of the original traceback. + - :attr:`exc_type` (deprecated) The class of the original traceback. + - :attr:`exc_type_str` String display of exc_type - :attr:`filename` For syntax errors - the filename where the error occurred. - :attr:`lineno` For syntax errors - the linenumber where the error @@ -481,14 +1029,14 @@ class TracebackException: occurred. - :attr:`offset` For syntax errors - the offset into the text where the error occurred. - - :attr:`end_offset` For syntax errors - the offset into the text where the - error occurred. Can be `None` if not present. + - :attr:`end_offset` For syntax errors - the end offset into the text where + the error occurred. Can be `None` if not present. - :attr:`msg` For syntax errors - the compiler error message. """ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, lookup_lines=True, capture_locals=False, compact=False, - _seen=None): + max_group_width=15, max_group_depth=10, save_exc_type=True, _seen=None): # NB: we need to accept exc_traceback, exc_value, exc_traceback to # permit backwards compat with the existing API, otherwise we # need stub thunk objects just to glue it together. @@ -498,14 +1046,34 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, _seen = set() _seen.add(id(exc_value)) - # TODO: locals. - self.stack = StackSummary.extract( - walk_tb(exc_traceback), limit=limit, lookup_lines=lookup_lines, + self.max_group_width = max_group_width + self.max_group_depth = max_group_depth + + self.stack = StackSummary._extract_from_extended_frame_gen( + _walk_tb_with_full_positions(exc_traceback), + limit=limit, lookup_lines=lookup_lines, capture_locals=capture_locals) - self.exc_type = exc_type + + self._exc_type = exc_type if save_exc_type else None + # Capture now to permit freeing resources: only complication is in the # unofficial API _format_final_exc_line - self._str = _some_str(exc_value) + self._str = _safe_string(exc_value, 'exception') + try: + self.__notes__ = getattr(exc_value, '__notes__', None) + except Exception as e: + self.__notes__ = [ + f'Ignored error getting __notes__: {_safe_string(e, '__notes__', repr)}'] + + self._is_syntax_error = False + self._have_exc_type = exc_type is not None + if exc_type is not None: + self.exc_type_qualname = exc_type.__qualname__ + self.exc_type_module = exc_type.__module__ + else: + self.exc_type_qualname = None + self.exc_type_module = None + if exc_type and issubclass(exc_type, SyntaxError): # Handle SyntaxError's specially self.filename = exc_value.filename @@ -517,6 +1085,26 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, self.offset = exc_value.offset self.end_offset = exc_value.end_offset self.msg = exc_value.msg + self._is_syntax_error = True + elif exc_type and issubclass(exc_type, ImportError) and \ + getattr(exc_value, "name_from", None) is not None: + wrong_name = getattr(exc_value, "name_from", None) + suggestion = _compute_suggestion_error(exc_value, exc_traceback, wrong_name) + if suggestion: + self._str += f". Did you mean: '{suggestion}'?" + elif exc_type and issubclass(exc_type, (NameError, AttributeError)) and \ + getattr(exc_value, "name", None) is not None: + wrong_name = getattr(exc_value, "name", None) + suggestion = _compute_suggestion_error(exc_value, exc_traceback, wrong_name) + if suggestion: + self._str += f". Did you mean: '{suggestion}'?" + if issubclass(exc_type, NameError): + wrong_name = getattr(exc_value, "name", None) + if wrong_name is not None and wrong_name in sys.stdlib_module_names: + if suggestion: + self._str += f" Or did you forget to import '{wrong_name}'?" + else: + self._str += f". Did you forget to import '{wrong_name}'?" if lookup_lines: self._load_lines() self.__suppress_context__ = \ @@ -528,7 +1116,7 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, queue = [(self, exc_value)] while queue: te, e = queue.pop() - if (e and e.__cause__ is not None + if (e is not None and e.__cause__ is not None and id(e.__cause__) not in _seen): cause = TracebackException( type(e.__cause__), @@ -537,6 +1125,8 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, limit=limit, lookup_lines=lookup_lines, capture_locals=capture_locals, + max_group_width=max_group_width, + max_group_depth=max_group_depth, _seen=_seen) else: cause = None @@ -547,7 +1137,7 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, not e.__suppress_context__) else: need_context = True - if (e and e.__context__ is not None + if (e is not None and e.__context__ is not None and need_context and id(e.__context__) not in _seen): context = TracebackException( type(e.__context__), @@ -556,21 +1146,62 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None, limit=limit, lookup_lines=lookup_lines, capture_locals=capture_locals, + max_group_width=max_group_width, + max_group_depth=max_group_depth, _seen=_seen) else: context = None + + if e is not None and isinstance(e, BaseExceptionGroup): + exceptions = [] + for exc in e.exceptions: + texc = TracebackException( + type(exc), + exc, + exc.__traceback__, + limit=limit, + lookup_lines=lookup_lines, + capture_locals=capture_locals, + max_group_width=max_group_width, + max_group_depth=max_group_depth, + _seen=_seen) + exceptions.append(texc) + else: + exceptions = None + te.__cause__ = cause te.__context__ = context + te.exceptions = exceptions if cause: queue.append((te.__cause__, e.__cause__)) if context: queue.append((te.__context__, e.__context__)) + if exceptions: + queue.extend(zip(te.exceptions, e.exceptions)) @classmethod def from_exception(cls, exc, *args, **kwargs): """Create a TracebackException from an exception.""" return cls(type(exc), exc, exc.__traceback__, *args, **kwargs) + @property + def exc_type(self): + warnings.warn('Deprecated in 3.13. Use exc_type_str instead.', + DeprecationWarning, stacklevel=2) + return self._exc_type + + @property + def exc_type_str(self): + if not self._have_exc_type: + return None + stype = self.exc_type_qualname + smod = self.exc_type_module + if smod not in ("__main__", "builtins"): + if not isinstance(smod, str): + smod = "" + stype = smod + '.' + stype + return stype + def _load_lines(self): """Private API. force all lines in the stack to be loaded.""" for frame in self.stack: @@ -584,72 +1215,152 @@ def __eq__(self, other): def __str__(self): return self._str - def format_exception_only(self): + def format_exception_only(self, *, show_group=False, _depth=0, **kwargs): """Format the exception part of the traceback. The return value is a generator of strings, each ending in a newline. - Normally, the generator emits a single string; however, for - SyntaxError exceptions, it emits several lines that (when - printed) display detailed information about where the syntax - error occurred. - - The message indicating which exception occurred is always the last - string in the output. + Generator yields the exception message. + For :exc:`SyntaxError` exceptions, it + also yields (before the exception message) + several lines that (when printed) + display detailed information about where the syntax error occurred. + Following the message, generator also yields + all the exception's ``__notes__``. + + When *show_group* is ``True``, and the exception is an instance of + :exc:`BaseExceptionGroup`, the nested exceptions are included as + well, recursively, with indentation relative to their nesting depth. """ - if self.exc_type is None: - yield _format_final_exc_line(None, self._str) - return + colorize = kwargs.get("colorize", False) - stype = self.exc_type.__qualname__ - smod = self.exc_type.__module__ - if smod not in ("__main__", "builtins"): - if not isinstance(smod, str): - smod = "" - stype = smod + '.' + stype + indent = 3 * _depth * ' ' + if not self._have_exc_type: + yield indent + _format_final_exc_line(None, self._str, colorize=colorize) + return - if not issubclass(self.exc_type, SyntaxError): - yield _format_final_exc_line(stype, self._str) + stype = self.exc_type_str + if not self._is_syntax_error: + if _depth > 0: + # Nested exceptions needs correct handling of multiline messages. + formatted = _format_final_exc_line( + stype, self._str, insert_final_newline=False, colorize=colorize + ).split('\n') + yield from [ + indent + l + '\n' + for l in formatted + ] + else: + yield _format_final_exc_line(stype, self._str, colorize=colorize) else: - yield from self._format_syntax_error(stype) - - def _format_syntax_error(self, stype): + yield from [indent + l for l in self._format_syntax_error(stype, colorize=colorize)] + + if ( + isinstance(self.__notes__, collections.abc.Sequence) + and not isinstance(self.__notes__, (str, bytes)) + ): + for note in self.__notes__: + note = _safe_string(note, 'note') + yield from [indent + l + '\n' for l in note.split('\n')] + elif self.__notes__ is not None: + yield indent + "{}\n".format(_safe_string(self.__notes__, '__notes__', func=repr)) + + if self.exceptions and show_group: + for ex in self.exceptions: + yield from ex.format_exception_only(show_group=show_group, _depth=_depth+1, colorize=colorize) + + def _format_syntax_error(self, stype, **kwargs): """Format SyntaxError exceptions (internal helper).""" # Show exactly where the problem was found. + colorize = kwargs.get("colorize", False) filename_suffix = '' if self.lineno is not None: - yield ' File "{}", line {}\n'.format( - self.filename or "", self.lineno) + if colorize: + yield ' File {}"{}"{}, line {}{}{}\n'.format( + ANSIColors.MAGENTA, + self.filename or "", + ANSIColors.RESET, + ANSIColors.MAGENTA, + self.lineno, + ANSIColors.RESET, + ) + else: + yield ' File "{}", line {}\n'.format( + self.filename or "", self.lineno) elif self.filename is not None: filename_suffix = ' ({})'.format(self.filename) text = self.text - if text is not None: + if isinstance(text, str): # text = " foo\n" # rtext = " foo" # ltext = "foo" rtext = text.rstrip('\n') ltext = rtext.lstrip(' \n\f') spaces = len(rtext) - len(ltext) - yield ' {}\n'.format(ltext) - - if self.offset is not None: + if self.offset is None: + yield ' {}\n'.format(ltext) + elif isinstance(self.offset, int): offset = self.offset - end_offset = self.end_offset if self.end_offset not in {None, 0} else offset - if offset == end_offset or end_offset == -1: + if self.lineno == self.end_lineno: + end_offset = ( + self.end_offset + if ( + isinstance(self.end_offset, int) + and self.end_offset != 0 + ) + else offset + ) + else: + end_offset = len(rtext) + 1 + + if self.text and offset > len(self.text): + offset = len(rtext) + 1 + if self.text and end_offset > len(self.text): + end_offset = len(rtext) + 1 + if offset >= end_offset or end_offset < 0: end_offset = offset + 1 # Convert 1-based column offset to 0-based index into stripped text colno = offset - 1 - spaces end_colno = end_offset - 1 - spaces + caretspace = ' ' if colno >= 0: # non-space whitespace (likes tabs) must be kept for alignment caretspace = ((c if c.isspace() else ' ') for c in ltext[:colno]) - yield ' {}{}'.format("".join(caretspace), ('^' * (end_colno - colno) + "\n")) + start_color = end_color = "" + if colorize: + # colorize from colno to end_colno + ltext = ( + ltext[:colno] + + ANSIColors.BOLD_RED + ltext[colno:end_colno] + ANSIColors.RESET + + ltext[end_colno:] + ) + start_color = ANSIColors.BOLD_RED + end_color = ANSIColors.RESET + yield ' {}\n'.format(ltext) + yield ' {}{}{}{}\n'.format( + "".join(caretspace), + start_color, + ('^' * (end_colno - colno)), + end_color, + ) + else: + yield ' {}\n'.format(ltext) msg = self.msg or "" - yield "{}: {}{}\n".format(stype, msg, filename_suffix) + if colorize: + yield "{}{}{}: {}{}{}{}\n".format( + ANSIColors.BOLD_MAGENTA, + stype, + ANSIColors.RESET, + ANSIColors.MAGENTA, + msg, + ANSIColors.RESET, + filename_suffix) + else: + yield "{}: {}{}\n".format(stype, msg, filename_suffix) - def format(self, *, chain=True): + def format(self, *, chain=True, _ctx=None, **kwargs): """Format the exception. If chain is not *True*, *__cause__* and *__context__* will not be formatted. @@ -661,11 +1372,14 @@ def format(self, *, chain=True): The message indicating which exception occurred is always the last string in the output. """ + colorize = kwargs.get("colorize", False) + if _ctx is None: + _ctx = _ExceptionPrintContext() output = [] exc = self - while exc: - if chain: + if chain: + while exc: if exc.__cause__ is not None: chained_msg = _cause_message chained_exc = exc.__cause__ @@ -679,14 +1393,246 @@ def format(self, *, chain=True): output.append((chained_msg, exc)) exc = chained_exc - else: - output.append((None, exc)) - exc = None + else: + output.append((None, exc)) for msg, exc in reversed(output): if msg is not None: - yield msg - if exc.stack: - yield 'Traceback (most recent call last):\n' - yield from exc.stack.format() - yield from exc.format_exception_only() + yield from _ctx.emit(msg) + if exc.exceptions is None: + if exc.stack: + yield from _ctx.emit('Traceback (most recent call last):\n') + yield from _ctx.emit(exc.stack.format(colorize=colorize)) + yield from _ctx.emit(exc.format_exception_only(colorize=colorize)) + elif _ctx.exception_group_depth > self.max_group_depth: + # exception group, but depth exceeds limit + yield from _ctx.emit( + f"... (max_group_depth is {self.max_group_depth})\n") + else: + # format exception group + is_toplevel = (_ctx.exception_group_depth == 0) + if is_toplevel: + _ctx.exception_group_depth += 1 + + if exc.stack: + yield from _ctx.emit( + 'Exception Group Traceback (most recent call last):\n', + margin_char = '+' if is_toplevel else None) + yield from _ctx.emit(exc.stack.format(colorize=colorize)) + + yield from _ctx.emit(exc.format_exception_only(colorize=colorize)) + num_excs = len(exc.exceptions) + if num_excs <= self.max_group_width: + n = num_excs + else: + n = self.max_group_width + 1 + _ctx.need_close = False + for i in range(n): + last_exc = (i == n-1) + if last_exc: + # The closing frame may be added by a recursive call + _ctx.need_close = True + + if self.max_group_width is not None: + truncated = (i >= self.max_group_width) + else: + truncated = False + title = f'{i+1}' if not truncated else '...' + yield (_ctx.indent() + + ('+-' if i==0 else ' ') + + f'+---------------- {title} ----------------\n') + _ctx.exception_group_depth += 1 + if not truncated: + yield from exc.exceptions[i].format(chain=chain, _ctx=_ctx, colorize=colorize) + else: + remaining = num_excs - self.max_group_width + plural = 's' if remaining > 1 else '' + yield from _ctx.emit( + f"and {remaining} more exception{plural}\n") + + if last_exc and _ctx.need_close: + yield (_ctx.indent() + + "+------------------------------------\n") + _ctx.need_close = False + _ctx.exception_group_depth -= 1 + + if is_toplevel: + assert _ctx.exception_group_depth == 1 + _ctx.exception_group_depth = 0 + + + def print(self, *, file=None, chain=True, **kwargs): + """Print the result of self.format(chain=chain) to 'file'.""" + colorize = kwargs.get("colorize", False) + if file is None: + file = sys.stderr + for line in self.format(chain=chain, colorize=colorize): + print(line, file=file, end="") + + +_MAX_CANDIDATE_ITEMS = 750 +_MAX_STRING_SIZE = 40 +_MOVE_COST = 2 +_CASE_COST = 1 + + +def _substitution_cost(ch_a, ch_b): + if ch_a == ch_b: + return 0 + if ch_a.lower() == ch_b.lower(): + return _CASE_COST + return _MOVE_COST + + +def _compute_suggestion_error(exc_value, tb, wrong_name): + if wrong_name is None or not isinstance(wrong_name, str): + return None + if isinstance(exc_value, AttributeError): + obj = exc_value.obj + try: + try: + d = dir(obj) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(obj.__class__.__dict__.keys()) + list(obj.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) + hide_underscored = (wrong_name[:1] != '_') + if hide_underscored and tb is not None: + while tb.tb_next is not None: + tb = tb.tb_next + frame = tb.tb_frame + if 'self' in frame.f_locals and frame.f_locals['self'] is obj: + hide_underscored = False + if hide_underscored: + d = [x for x in d if x[:1] != '_'] + except Exception: + return None + elif isinstance(exc_value, ImportError): + try: + mod = __import__(exc_value.name) + try: + d = dir(mod) + except TypeError: # Attributes are unsortable, e.g. int and str + d = list(mod.__dict__.keys()) + d = sorted([x for x in d if isinstance(x, str)]) + if wrong_name[:1] != '_': + d = [x for x in d if x[:1] != '_'] + except Exception: + return None + else: + assert isinstance(exc_value, NameError) + # find most recent frame + if tb is None: + return None + while tb.tb_next is not None: + tb = tb.tb_next + frame = tb.tb_frame + d = ( + list(frame.f_locals) + + list(frame.f_globals) + + list(frame.f_builtins) + ) + d = [x for x in d if isinstance(x, str)] + + # Check first if we are in a method and the instance + # has the wrong name as attribute + if 'self' in frame.f_locals: + self = frame.f_locals['self'] + try: + has_wrong_name = hasattr(self, wrong_name) + except Exception: + has_wrong_name = False + if has_wrong_name: + return f"self.{wrong_name}" + + try: + import _suggestions + except ImportError: + pass + else: + return _suggestions._generate_suggestions(d, wrong_name) + + # Compute closest match + + if len(d) > _MAX_CANDIDATE_ITEMS: + return None + wrong_name_len = len(wrong_name) + if wrong_name_len > _MAX_STRING_SIZE: + return None + best_distance = wrong_name_len + suggestion = None + for possible_name in d: + if possible_name == wrong_name: + # A missing attribute is "found". Don't suggest it (see GH-88821). + continue + # No more than 1/3 of the involved characters should need changed. + max_distance = (len(possible_name) + wrong_name_len + 3) * _MOVE_COST // 6 + # Don't take matches we've already beaten. + max_distance = min(max_distance, best_distance - 1) + current_distance = _levenshtein_distance(wrong_name, possible_name, max_distance) + if current_distance > max_distance: + continue + if not suggestion or current_distance < best_distance: + suggestion = possible_name + best_distance = current_distance + return suggestion + + +def _levenshtein_distance(a, b, max_cost): + # A Python implementation of Python/suggestions.c:levenshtein_distance. + + # Both strings are the same + if a == b: + return 0 + + # Trim away common affixes + pre = 0 + while a[pre:] and b[pre:] and a[pre] == b[pre]: + pre += 1 + a = a[pre:] + b = b[pre:] + post = 0 + while a[:post or None] and b[:post or None] and a[post-1] == b[post-1]: + post -= 1 + a = a[:post or None] + b = b[:post or None] + if not a or not b: + return _MOVE_COST * (len(a) + len(b)) + if len(a) > _MAX_STRING_SIZE or len(b) > _MAX_STRING_SIZE: + return max_cost + 1 + + # Prefer shorter buffer + if len(b) < len(a): + a, b = b, a + + # Quick fail when a match is impossible + if (len(b) - len(a)) * _MOVE_COST > max_cost: + return max_cost + 1 + + # Instead of producing the whole traditional len(a)-by-len(b) + # matrix, we can update just one row in place. + # Initialize the buffer row + row = list(range(_MOVE_COST, _MOVE_COST * (len(a) + 1), _MOVE_COST)) + + result = 0 + for bindex in range(len(b)): + bchar = b[bindex] + distance = result = bindex * _MOVE_COST + minimum = sys.maxsize + for index in range(len(a)): + # 1) Previous distance in this row is cost(b[:b_index], a[:index]) + substitute = distance + _substitution_cost(bchar, a[index]) + # 2) cost(b[:b_index], a[:index+1]) from previous row + distance = row[index] + # 3) existing result is cost(b[:b_index+1], a[index]) + + insert_delete = min(result, distance) + _MOVE_COST + result = min(insert_delete, substitute) + + # cost(b[:b_index+1], a[:index+1]) + row[index] = result + if result < minimum: + minimum = result + if minimum > max_cost: + # Everything in this row is too big, so bail early. + return max_cost + 1 + return result diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index c44b2b00684..5cc7d0d2212 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -115,11 +115,22 @@ enum DoneWithFuture { Yes, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct CompileOpts { /// How optimized the bytecode output should be; any optimize > 0 does /// not emit assert statements pub optimize: u8, + /// Include column info in bytecode (-X no_debug_ranges disables) + pub debug_ranges: bool, +} + +impl Default for CompileOpts { + fn default() -> Self { + Self { + optimize: 0, + debug_ranges: true, + } + } } #[derive(Debug, Clone, Copy)] @@ -859,7 +870,7 @@ impl Compiler { let pop = self.code_stack.pop(); let stack_top = compiler_unwrap_option(self, pop); // No parent scope stack to maintain - unwrap_internal(self, stack_top.finalize_code(self.opts.optimize)) + unwrap_internal(self, stack_top.finalize_code(&self.opts)) } /// Push a new fblock @@ -1486,7 +1497,9 @@ impl Compiler { .. }) => self.compile_for(target, iter, body, orelse, *is_async)?, Stmt::Match(StmtMatch { subject, cases, .. }) => self.compile_match(subject, cases)?, - Stmt::Raise(StmtRaise { exc, cause, .. }) => { + Stmt::Raise(StmtRaise { + exc, cause, range, .. + }) => { let kind = match exc { Some(value) => { self.compile_expression(value)?; @@ -1500,6 +1513,7 @@ impl Compiler { } None => bytecode::RaiseKind::Reraise, }; + self.set_source_range(*range); emit!(self, Instruction::Raise { kind }); } Stmt::Try(StmtTry { @@ -5639,17 +5653,15 @@ impl Compiler { // Low level helper functions: fn _emit(&mut self, instr: Instruction, arg: OpArg, target: BlockIdx) { let range = self.current_source_range; - let location = self - .source_file - .to_source_code() - .source_location(range.start(), PositionEncoding::Utf8); - // TODO: insert source filename + let source = self.source_file.to_source_code(); + let location = source.source_location(range.start(), PositionEncoding::Utf8); + let end_location = source.source_location(range.end(), PositionEncoding::Utf8); self.current_block().instructions.push(ir::InstructionInfo { instr, arg, target, location, - // range, + end_location, }); } diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 670635fbd37..31ee8d8b230 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -86,9 +86,8 @@ pub struct InstructionInfo { pub instr: Instruction, pub arg: OpArg, pub target: BlockIdx, - // pub range: TextRange, pub location: SourceLocation, - // TODO: end_location for debug ranges + pub end_location: SourceLocation, } // spell-checker:ignore petgraph @@ -133,8 +132,11 @@ pub struct CodeInfo { } impl CodeInfo { - pub fn finalize_code(mut self, optimize: u8) -> crate::InternalResult { - if optimize > 0 { + pub fn finalize_code( + mut self, + opts: &crate::compile::CompileOpts, + ) -> crate::InternalResult { + if opts.optimize > 0 { self.dce(); } @@ -198,7 +200,10 @@ impl CodeInfo { *arg = new_arg; } let (extras, lo_arg) = arg.split(); - locations.extend(core::iter::repeat_n(info.location, arg.instr_size())); + locations.extend(core::iter::repeat_n( + (info.location, info.end_location), + arg.instr_size(), + )); instructions.extend( extras .map(|byte| CodeUnit::new(Instruction::ExtendedArg, byte)) @@ -217,7 +222,11 @@ impl CodeInfo { } // Generate linetable from locations - let linetable = generate_linetable(&locations, first_line_number.get() as i32); + let linetable = generate_linetable( + &locations, + first_line_number.get() as i32, + opts.debug_ranges, + ); Ok(CodeObject { flags, @@ -412,7 +421,11 @@ fn iter_blocks(blocks: &[Block]) -> impl Iterator + ' } /// Generate CPython 3.11+ format linetable from source locations -fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8]> { +fn generate_linetable( + locations: &[(SourceLocation, SourceLocation)], + first_line: i32, + debug_ranges: bool, +) -> Box<[u8]> { if locations.is_empty() { return Box::new([]); } @@ -424,7 +437,7 @@ fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8] let mut i = 0; while i < locations.len() { - let loc = &locations[i]; + let (loc, end_loc) = &locations[i]; // Count consecutive instructions with the same location let mut length = 1; @@ -436,18 +449,33 @@ fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8] while length > 0 { let entry_length = length.min(8); - // Get line and column information - // SourceLocation always has row and column (both are OneIndexed) + // Get line information let line = loc.line.get() as i32; - let col = loc.character_offset.to_zero_indexed() as i32; - + let end_line = end_loc.line.get() as i32; let line_delta = line - prev_line; + let end_line_delta = end_line - line; - // Choose the appropriate encoding based on line delta and column info - // Note: SourceLocation always has valid column, so we never get NO_COLUMNS case - if line_delta == 0 { - let end_col = col; // Use same column for end (no range info available) + // When debug_ranges is disabled, only emit line info (NoColumns format) + if !debug_ranges { + // NoColumns format (code 13): line info only, no column data + linetable.push( + 0x80 | ((PyCodeLocationInfoKind::NoColumns as u8) << 3) + | ((entry_length - 1) as u8), + ); + write_signed_varint(&mut linetable, line_delta); + prev_line = line; + length -= entry_length; + i += entry_length; + continue; + } + + // Get column information (only when debug_ranges is enabled) + let col = loc.character_offset.to_zero_indexed() as i32; + let end_col = end_loc.character_offset.to_zero_indexed() as i32; + + // Choose the appropriate encoding based on line delta and column info + if line_delta == 0 && end_line_delta == 0 { if col < 80 && end_col - col < 16 && end_col >= col { // Short form (codes 0-9) for common cases let code = (col / 8).min(9) as u8; // Short0 to Short9 @@ -470,42 +498,37 @@ fn generate_linetable(locations: &[SourceLocation], first_line: i32) -> Box<[u8] ); write_signed_varint(&mut linetable, 0); // line_delta = 0 write_varint(&mut linetable, 0); // end_line delta = 0 - write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding - write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + write_varint(&mut linetable, (col as u32) + 1); + write_varint(&mut linetable, (end_col as u32) + 1); } - } else if line_delta > 0 && line_delta < 3 - /* && column.is_some() */ - { + } else if line_delta > 0 && line_delta < 3 && end_line_delta == 0 { // One-line form (codes 11-12) for line deltas 1-2 - let end_col = col; // Use same column for end - if col < 128 && end_col < 128 { - let code = (PyCodeLocationInfoKind::OneLine0 as u8) + (line_delta as u8); // 11 for delta=1, 12 for delta=2 + let code = (PyCodeLocationInfoKind::OneLine0 as u8) + (line_delta as u8); linetable.push(0x80 | (code << 3) | ((entry_length - 1) as u8)); linetable.push(col as u8); linetable.push(end_col as u8); } else { - // Long form for columns >= 128 or negative line delta + // Long form for columns >= 128 linetable.push( 0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) | ((entry_length - 1) as u8), ); write_signed_varint(&mut linetable, line_delta); write_varint(&mut linetable, 0); // end_line delta = 0 - write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding - write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + write_varint(&mut linetable, (col as u32) + 1); + write_varint(&mut linetable, (end_col as u32) + 1); } } else { // Long form (code 14) for all other cases - // This handles: line_delta < 0, line_delta >= 3, or columns >= 128 - let end_col = col; // Use same column for end + // Handles: line_delta < 0, line_delta >= 3, multi-line spans, or columns >= 128 linetable.push( 0x80 | ((PyCodeLocationInfoKind::Long as u8) << 3) | ((entry_length - 1) as u8), ); write_signed_varint(&mut linetable, line_delta); - write_varint(&mut linetable, 0); // end_line delta = 0 - write_varint(&mut linetable, (col as u32) + 1); // column + 1 for encoding - write_varint(&mut linetable, (end_col as u32) + 1); // end_col + 1 + write_varint(&mut linetable, end_line_delta as u32); + write_varint(&mut linetable, (col as u32) + 1); + write_varint(&mut linetable, (end_col as u32) + 1); } prev_line = line; diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 5569fa2012b..7675ef34863 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -258,7 +258,7 @@ impl ConstantBag for BasicBag { #[derive(Clone)] pub struct CodeObject { pub instructions: CodeUnits, - pub locations: Box<[SourceLocation]>, + pub locations: Box<[(SourceLocation, SourceLocation)]>, pub flags: CodeFlags, /// Number of positional-only arguments pub posonlyarg_count: u32, @@ -1483,14 +1483,14 @@ impl CodeObject { level: usize, ) -> fmt::Result { let label_targets = self.label_targets(); - let line_digits = (3).max(self.locations.last().unwrap().line.digits().get()); + let line_digits = (3).max(self.locations.last().unwrap().0.line.digits().get()); let offset_digits = (4).max(1 + self.instructions.len().ilog10() as usize); let mut last_line = OneIndexed::MAX; let mut arg_state = OpArgState::default(); for (offset, &instruction) in self.instructions.iter().enumerate() { let (instruction, arg) = arg_state.get(instruction); // optional line number - let line = self.locations[offset].line; + let line = self.locations[offset].0.line; if line != last_line { if last_line != OneIndexed::MAX { writeln!(f)?; diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index b30894ea065..5b528fe7e50 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -190,12 +190,17 @@ pub fn deserialize_code( let len = rdr.read_u32()?; let locations = (0..len) .map(|_| { - Ok(SourceLocation { + let start = SourceLocation { line: OneIndexed::new(rdr.read_u32()? as _).ok_or(MarshalError::InvalidLocation)?, character_offset: OneIndexed::from_zero_indexed(rdr.read_u32()? as _), - }) + }; + let end = SourceLocation { + line: OneIndexed::new(rdr.read_u32()? as _).ok_or(MarshalError::InvalidLocation)?, + character_offset: OneIndexed::from_zero_indexed(rdr.read_u32()? as _), + }; + Ok((start, end)) }) - .collect::>>()?; + .collect::>>()?; let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?); @@ -648,9 +653,11 @@ pub fn serialize_code(buf: &mut W, code: &CodeObject) buf.write_slice(instructions_bytes); write_len(buf, code.locations.len()); - for loc in &*code.locations { - buf.write_u32(loc.line.get() as _); - buf.write_u32(loc.character_offset.to_zero_indexed() as _); + for (start, end) in &*code.locations { + buf.write_u32(start.line.get() as _); + buf.write_u32(start.character_offset.to_zero_indexed() as _); + buf.write_u32(end.line.get() as _); + buf.write_u32(end.character_offset.to_zero_indexed() as _); } buf.write_u16(code.flags.bits()); diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index b897ef9d311..32e53a5d376 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -467,20 +467,22 @@ impl Constructor for PyCode { .collect::>() .into_boxed_slice(); - // Create locations + // Create locations (start and end pairs) let row = if args.firstlineno > 0 { OneIndexed::new(args.firstlineno as usize).unwrap_or(OneIndexed::MIN) } else { OneIndexed::MIN }; - let locations: Box<[rustpython_compiler_core::SourceLocation]> = vec![ - rustpython_compiler_core::SourceLocation { - line: row, - character_offset: OneIndexed::from_zero_indexed(0), - }; - instructions.len() - ] - .into_boxed_slice(); + let loc = rustpython_compiler_core::SourceLocation { + line: row, + character_offset: OneIndexed::from_zero_indexed(0), + }; + let locations: Box< + [( + rustpython_compiler_core::SourceLocation, + rustpython_compiler_core::SourceLocation, + )], + > = vec![(loc, loc); instructions.len()].into_boxed_slice(); // Build the CodeObject let code = CodeObject { @@ -809,7 +811,6 @@ impl PyCode { Some(line + end_line_delta) }; - // Convert Option to PyObject (None or int) let line_obj = final_line.to_pyobject(vm); let end_line_obj = final_endline.to_pyobject(vm); let column_obj = column.to_pyobject(vm); diff --git a/crates/vm/src/builtins/frame.rs b/crates/vm/src/builtins/frame.rs index 3712b04e875..28c4e751476 100644 --- a/crates/vm/src/builtins/frame.rs +++ b/crates/vm/src/builtins/frame.rs @@ -58,12 +58,19 @@ impl Frame { #[pygetset] fn f_lasti(&self) -> u32 { - self.lasti() + // Return byte offset (each instruction is 2 bytes) for compatibility + self.lasti() * 2 } #[pygetset] pub fn f_lineno(&self) -> usize { - self.current_location().line.get() + // If lasti is 0, execution hasn't started yet - use first line number + // Similar to PyCode_Addr2Line which returns co_firstlineno for addr_q < 0 + if self.lasti() == 0 { + self.code.first_line_number.map(|n| n.get()).unwrap_or(1) + } else { + self.current_location().line.get() + } } #[pygetset] diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 6acf0e84795..6500f578dca 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -172,7 +172,7 @@ impl Frame { } pub fn current_location(&self) -> SourceLocation { - self.code.locations[self.lasti() as usize - 1] + self.code.locations[self.lasti() as usize - 1].0 } pub fn lasti(&self) -> u32 { @@ -385,12 +385,12 @@ impl ExecutingFrame<'_> { // 2. Add new entry with current execution position (filename, lineno, code_object) to traceback. // 3. Unwind block stack till appropriate handler is found. - let loc = frame.code.locations[idx]; + let (loc, _end_loc) = frame.code.locations[idx]; let next = exception.__traceback__(); let new_traceback = PyTraceback::new( next, frame.object.to_owned(), - frame.lasti(), + frame.lasti() * 2, loc.line, ); vm_trace!("Adding to traceback: {:?} {:?}", new_traceback, loc.line); diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 7c527b3e0da..ddbf7660f7d 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -508,6 +508,7 @@ impl VirtualMachine { pub fn compile_opts(&self) -> crate::compiler::CompileOpts { crate::compiler::CompileOpts { optimize: self.state.config.settings.optimize, + debug_ranges: self.state.config.settings.code_debug_ranges, } } diff --git a/crates/vm/src/vm/setting.rs b/crates/vm/src/vm/setting.rs index 53e2cef1160..8a307d1852b 100644 --- a/crates/vm/src/vm/setting.rs +++ b/crates/vm/src/vm/setting.rs @@ -57,7 +57,8 @@ pub struct Settings { // int tracemalloc; // int perf_profiling; // int import_time; - // int code_debug_ranges; + /// -X no_debug_ranges: disable column info in bytecode + pub code_debug_ranges: bool, // int show_ref_count; // int dump_refs; // wchar_t *dump_refs_file; @@ -192,6 +193,7 @@ impl Default for Settings { argv: vec![], hash_seed: None, faulthandler: false, + code_debug_ranges: true, buffered_stdio: true, check_hash_pycs_mode: CheckHashPycsMode::Default, allow_external_library: cfg!(feature = "importlib"), diff --git a/examples/dis.rs b/examples/dis.rs index 942643cd54b..1ca350603f9 100644 --- a/examples/dis.rs +++ b/examples/dis.rs @@ -53,7 +53,10 @@ fn main() -> Result<(), lexopt::Error> { return Err("expected at least one argument".into()); } - let opts = compiler::CompileOpts { optimize }; + let opts = compiler::CompileOpts { + optimize, + debug_ranges: true, + }; for script in &scripts { if script.exists() && script.is_file() { diff --git a/src/settings.rs b/src/settings.rs index a63f1a07ccc..54e66086932 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -270,6 +270,7 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> { "faulthandler" => settings.faulthandler = true, "warn_default_encoding" => settings.warn_default_encoding = true, "no_sig_int" => settings.install_signal_handlers = false, + "no_debug_ranges" => settings.code_debug_ranges = false, "int_max_str_digits" => { settings.int_max_str_digits = match value.unwrap().parse() { Ok(digits) if digits == 0 || digits >= 640 => digits, @@ -293,6 +294,9 @@ pub fn parse_opts() -> Result<(Settings, RunMode), lexopt::Error> { settings.warn_default_encoding = settings.warn_default_encoding || env_bool("PYTHONWARNDEFAULTENCODING"); settings.faulthandler = settings.faulthandler || env_bool("PYTHONFAULTHANDLER"); + if env_bool("PYTHONNODEBUGRANGES") { + settings.code_debug_ranges = false; + } if settings.dev_mode { settings.warnoptions.push("default".to_owned());