Add column information to exception tracebacks. Fixes #1099

microsoft · Nov 11, 2022 · 04403dd · 04403dd
1 parent 6c1c3d6
commit 04403dd
Show file tree

Hide file tree

Showing 4 changed files with 340 additions and 5 deletions.
diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py
@@ -87,7 +87,6 @@
 from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id, resume_threads
 from _pydevd_bundle.pydevd_dont_trace_files import PYDEV_FILE
 import dis
-from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_exception_cause
 import pydevd_file_utils
 import itertools
 from urllib.parse import quote_plus, unquote_plus

diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py
@@ -2,7 +2,6 @@
     IS_PY311_OR_GREATER
 from _pydev_bundle import pydev_log
 import itertools
-from collections import namedtuple
 from typing import Any, Dict
 
 
@@ -79,7 +78,111 @@ def cached_call(obj, func, *args):
     return getattr(obj, cached_name)
 
 
-_LineColInfo = namedtuple('_LineColInfo', 'lineno, end_lineno, colno, end_colno')
+class _LineColInfo:
+
+    def __init__(self, lineno, end_lineno, colno, end_colno):
+        self.lineno = lineno
+        self.end_lineno = end_lineno
+        self.colno = colno
+        self.end_colno = end_colno
+
+    def map_columns_to_line(self, original_line: str):
+        '''
+        The columns internally are actually based on bytes.
+
+        Also, the position isn't always the ideal one as the start may not be
+        what we want (if the user has many subscripts in the line the start
+        will always be the same and only the end would change).
+        For more details see:
+        https://github.com/microsoft/debugpy/issues/1099#issuecomment-1303403995
+
+        So, this function maps the start/end columns to the position to be shown in the editor.
+        '''
+        colno = _utf8_byte_offset_to_character_offset(original_line, self.colno)
+        end_colno = _utf8_byte_offset_to_character_offset(original_line, self.end_colno)
+
+        if self.lineno == self.end_lineno:
+            try:
+                ret = _extract_caret_anchors_in_bytes_from_line_segment(
+                    original_line[colno:end_colno]
+                )
+                if ret is not None:
+                    return (
+                        _utf8_byte_offset_to_character_offset(original_line, ret[0] + self.colno),
+                        _utf8_byte_offset_to_character_offset(original_line, ret[1] + self.colno)
+                    )
+            except Exception:
+                pass  # Suppress exception
+
+        return colno, end_colno
+
+
+_utf8_with_2_bytes = 0x80
+_utf8_with_3_bytes = 0x800
+_utf8_with_4_bytes = 0x10000
+
+
+def _utf8_byte_offset_to_character_offset(s: str, offset: int):
+    byte_offset = 0
+    char_offset = 0
+
+    for char_offset, character in enumerate(s):
+        byte_offset += 1
+
+        codepoint = ord(character)
+
+        if codepoint >= _utf8_with_4_bytes:
+            byte_offset += 3
+
+        elif codepoint >= _utf8_with_3_bytes:
+            byte_offset += 2
+
+        elif codepoint >= _utf8_with_2_bytes:
+            byte_offset += 1
+
+        if byte_offset > offset:
+            break
+    else:
+        char_offset += 1
+
+    return char_offset
+
+
+# Based on traceback._extract_caret_anchors_in_bytes_from_line_segment (Python 3.11.0)
+def _extract_caret_anchors_in_bytes_from_line_segment(segment: str):
+    import ast
+
+    try:
+        segment = segment.encode('utf-8')
+    except UnicodeEncodeError:
+        return None
+    try:
+        tree = ast.parse(segment)
+    except SyntaxError:
+        return None
+
+    if len(tree.body) != 1:
+        return None
+
+    statement = tree.body[0]
+    if isinstance(statement, ast.Expr):
+        expr = statement.value
+        if isinstance(expr, ast.BinOp):
+            operator_str = segment[expr.left.end_col_offset:expr.right.col_offset]
+            operator_offset = len(operator_str) - len(operator_str.lstrip())
+
+            left_anchor = expr.left.end_col_offset + operator_offset
+            right_anchor = left_anchor + 1
+            if (
+                operator_offset + 1 < len(operator_str)
+                and not operator_str[operator_offset + 1] == ord(b' ')
+            ):
+                right_anchor += 1
+            return left_anchor, right_anchor
+        if isinstance(expr, ast.Subscript):
+            return expr.value.end_col_offset, expr.slice.end_col_offset + 1
+
+    return None
 
 
 class FramesList(object):

diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py
@@ -19,7 +19,7 @@
     CMD_THREAD_RESUME_SINGLE_NOTIFICATION, CMD_THREAD_KILL, CMD_STOP_ON_START, CMD_INPUT_REQUESTED, \
     CMD_EXIT, CMD_STEP_INTO_COROUTINE, CMD_STEP_RETURN_MY_CODE, CMD_SMART_STEP_INTO, \
     CMD_SET_FUNCTION_BREAK
-from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock
+from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock, DebugInfoHolder
 from _pydevd_bundle.pydevd_net_command import NetCommand, NULL_NET_COMMAND
 from _pydevd_bundle.pydevd_net_command_factory_xml import NetCommandFactory
 from _pydevd_bundle.pydevd_utils import get_non_pydevd_threads
@@ -30,6 +30,7 @@
 import linecache
 from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id
 from io import StringIO
+from _pydev_bundle import pydev_log
 
 
 class ModulesManager(object):
@@ -265,8 +266,23 @@ def make_get_thread_stack_message(self, py_db, seq, thread_id, topmost_frame, fm
                             source_reference = pydevd_file_utils.create_source_reference_for_linecache(
                                 original_filename)
 
+                column = 1
+                endcol = None
+                if line_col_info is not None:
+                    try:
+                        line_text = linecache.getline(original_filename, lineno)
+                    except:
+                        if DebugInfoHolder.DEBUG_TRACE_LEVEL >= 2:
+                            pydev_log.exception('Unable to get line from linecache for file: %s', original_filename)
+                    else:
+                        if line_text:
+                            colno, endcolno = line_col_info.map_columns_to_line(line_text)
+                            column = colno + 1
+                            if line_col_info.lineno == line_col_info.end_lineno:
+                                endcol = endcolno + 1
+
                 frames.append(pydevd_schema.StackFrame(
-                    frame_id, formatted_name, lineno, column=1, source={
+                    frame_id, formatted_name, lineno, column=column, endColumn=endcol, source={
                         'path': filename_in_utf8,
                         'sourceReference': source_reference,
                     },

diff --git a/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py b/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py
@@ -1,5 +1,8 @@
+# coding: utf-8
 import sys
 from _pydevd_bundle.pydevd_constants import EXCEPTION_TYPE_USER_UNHANDLED
+import pytest
+from tests_python.debug_constants import IS_PY311_OR_GREATER
 
 
 def test_create_frames_list_from_traceback():
@@ -32,3 +35,217 @@ def method2():
         assert str(frames_list.chained_frames_list.chained_frames_list.exc_desc) == 'first'
         assert frames_list.chained_frames_list.chained_frames_list.chained_frames_list is None
 
+
+if IS_PY311_OR_GREATER:
+    import traceback
+    _byte_offset_to_character_offset = getattr(traceback, '_byte_offset_to_character_offset', None)
+    if _byte_offset_to_character_offset is not None:
+        _original = traceback._byte_offset_to_character_offset
+
+        def _byte_offset_to_character_offset(*args, **kwargs):
+            try:
+                return _original(*args, **kwargs)
+            except:
+
+                # Replacement to deal with the buggy version released on Python 3.11.0.
+                def replacement(str, offset):
+                    as_utf8 = str.encode('utf-8')
+                    if offset > len(as_utf8):
+                        offset = len(as_utf8)
+
+                    return len(as_utf8[:offset + 1].decode("utf-8", 'replace'))
+
+                return replacement(*args , **kwargs)
+
+        traceback._byte_offset_to_character_offset = _byte_offset_to_character_offset
+
+_USE_UNICODE = [False, True]
+
+
+@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
+@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
+def test_collect_anchors_subscript(use_unicode):
+    from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback
+
+    if use_unicode:
+
+        def method():
+            d = {
+                "x": {
+                    "á": {
+                        "í": {
+                            "theta": 1
+                        }
+                    }
+                }
+            }
+
+            result = d["x"]["á"]["í"]["beta"]
+
+    else:
+
+        def method():
+            d = {
+                "x": {
+                    "y": {
+                        "i": {
+                            "theta": 1
+                        }
+                    }
+                }
+            }
+
+            result = d["x"]["y"]["i"]["beta"]
+
+    try:
+        method()
+    except:
+        exc_type, exc_desc, trace_obj = sys.exc_info()
+        memo = {}
+        frame = None
+        frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
+        iter_in = iter(frames_list)
+        f = next(iter_in)
+        assert f.f_code.co_name == 'method'
+        line_col_info = frames_list.frame_id_to_line_col_info[id(f)]
+
+        if use_unicode:
+            line = '            result = d["x"]["á"]["í"]["beta"]'
+        else:
+            line = '            result = d["x"]["y"]["i"]["beta"]'
+
+        # Ok, so, the range that we we have covers >>d["x"]["á"]["í"]["beta"]<<
+        # the problem here is that ideally we'd like to present to the user that
+        # the current key is "beta", so, we need to do some additional computation
+        # to find out the proper column to show to the user.
+        # (see https://github.com/microsoft/debugpy/issues/1099
+        # for more information).
+        assert line_col_info.colno == line.index('d["x"]')
+
+        # It's +1 here due to the í unicode char (we need to convert from the bytes
+        # index to the actual character in the string to get the actual col).
+        if use_unicode:
+            assert line_col_info.end_colno == len(line) + 2
+        else:
+            assert line_col_info.end_colno == len(line)
+        original_line = line
+
+        col, endcol = line_col_info.map_columns_to_line(original_line)
+        assert col == line.index('["beta"]')
+        assert endcol == len(line)
+
+
+@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
+@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
+def test_collect_anchors_binop_1(use_unicode):
+    from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback
+
+    if use_unicode:
+
+        def method():
+            á = 1
+            í = 2
+            c = tuple
+
+            result = á + í + c
+
+    else:
+
+        def method():
+            a = 1
+            b = 2
+            c = tuple
+
+            result = a + b + c
+
+    try:
+        method()
+    except:
+        exc_type, exc_desc, trace_obj = sys.exc_info()
+        memo = {}
+        frame = None
+        frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
+        iter_in = iter(frames_list)
+        f = next(iter_in)
+        assert f.f_code.co_name == 'method'
+        line_col_info = frames_list.frame_id_to_line_col_info[id(f)]
+
+        if use_unicode:
+            line = '            result = á + í + c'
+            expected_index = line.index('á + í')
+        else:
+            line = '            result = a + b + c'
+            expected_index = line.index('a + b')
+
+        assert line_col_info.colno == expected_index
+
+        # It's +2 here due to the á and í unicode chars (we need to convert from the bytes
+        # index to the actual character in the string to get the actual col).
+        if use_unicode:
+            assert line_col_info.end_colno == len(line) + 2
+        else:
+            assert line_col_info.end_colno == len(line)
+        original_line = line
+
+        col, endcol = line_col_info.map_columns_to_line(original_line)
+        assert col == line.index('+ c')
+        assert endcol == col + 1
+
+
+@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
+@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
+def test_collect_anchors_binop_2(use_unicode):
+    from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback
+
+    if use_unicode:
+
+        def method():
+            á = 1
+            í = 2
+            c = tuple
+
+            result = á + c + í
+
+    else:
+
+        def method():
+            a = 1
+            b = 2
+            c = tuple
+
+            result = a + c + b
+
+    try:
+        method()
+    except:
+        exc_type, exc_desc, trace_obj = sys.exc_info()
+        memo = {}
+        frame = None
+        frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
+        iter_in = iter(frames_list)
+        f = next(iter_in)
+        assert f.f_code.co_name == 'method'
+        line_col_info = frames_list.frame_id_to_line_col_info[id(f)]
+
+        if use_unicode:
+            line = '            result = á + c + í'
+            expected_index = line.index('á + c')
+        else:
+            line = '            result = a + c + b'
+            expected_index = line.index('a + c')
+
+        assert line_col_info.colno == expected_index
+
+        # It's +2 here due to the á and í unicode chars (we need to convert from the bytes
+        # index to the actual character in the string to get the actual col).
+        if use_unicode:
+            assert line_col_info.end_colno == line.index('c + í') + 2
+        else:
+            assert line_col_info.end_colno == line.index('c + b') + 1
+        original_line = line
+
+        col, endcol = line_col_info.map_columns_to_line(original_line)
+        assert col == 23
+        assert endcol == 24
+        assert col == line.index('+ c')
+        assert endcol == col + 1