Skip to content

Commit

Permalink
Add column information to exception tracebacks. Fixes #1099
Browse files Browse the repository at this point in the history
  • Loading branch information
fabioz committed Nov 11, 2022
1 parent 6c1c3d6 commit 04403dd
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 5 deletions.
1 change: 0 additions & 1 deletion src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id, resume_threads
from _pydevd_bundle.pydevd_dont_trace_files import PYDEV_FILE
import dis
from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_exception_cause
import pydevd_file_utils
import itertools
from urllib.parse import quote_plus, unquote_plus
Expand Down
107 changes: 105 additions & 2 deletions src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
IS_PY311_OR_GREATER
from _pydev_bundle import pydev_log
import itertools
from collections import namedtuple
from typing import Any, Dict


Expand Down Expand Up @@ -79,7 +78,111 @@ def cached_call(obj, func, *args):
return getattr(obj, cached_name)


_LineColInfo = namedtuple('_LineColInfo', 'lineno, end_lineno, colno, end_colno')
class _LineColInfo:

def __init__(self, lineno, end_lineno, colno, end_colno):
self.lineno = lineno
self.end_lineno = end_lineno
self.colno = colno
self.end_colno = end_colno

def map_columns_to_line(self, original_line: str):
'''
The columns internally are actually based on bytes.
Also, the position isn't always the ideal one as the start may not be
what we want (if the user has many subscripts in the line the start
will always be the same and only the end would change).
For more details see:
https://github.com/microsoft/debugpy/issues/1099#issuecomment-1303403995
So, this function maps the start/end columns to the position to be shown in the editor.
'''
colno = _utf8_byte_offset_to_character_offset(original_line, self.colno)
end_colno = _utf8_byte_offset_to_character_offset(original_line, self.end_colno)

if self.lineno == self.end_lineno:
try:
ret = _extract_caret_anchors_in_bytes_from_line_segment(
original_line[colno:end_colno]
)
if ret is not None:
return (
_utf8_byte_offset_to_character_offset(original_line, ret[0] + self.colno),
_utf8_byte_offset_to_character_offset(original_line, ret[1] + self.colno)
)
except Exception:
pass # Suppress exception

return colno, end_colno


_utf8_with_2_bytes = 0x80
_utf8_with_3_bytes = 0x800
_utf8_with_4_bytes = 0x10000


def _utf8_byte_offset_to_character_offset(s: str, offset: int):
byte_offset = 0
char_offset = 0

for char_offset, character in enumerate(s):
byte_offset += 1

codepoint = ord(character)

if codepoint >= _utf8_with_4_bytes:
byte_offset += 3

elif codepoint >= _utf8_with_3_bytes:
byte_offset += 2

elif codepoint >= _utf8_with_2_bytes:
byte_offset += 1

if byte_offset > offset:
break
else:
char_offset += 1

return char_offset


# Based on traceback._extract_caret_anchors_in_bytes_from_line_segment (Python 3.11.0)
def _extract_caret_anchors_in_bytes_from_line_segment(segment: str):
import ast

try:
segment = segment.encode('utf-8')
except UnicodeEncodeError:
return None
try:
tree = ast.parse(segment)
except SyntaxError:
return None

if len(tree.body) != 1:
return None

statement = tree.body[0]
if isinstance(statement, ast.Expr):
expr = statement.value
if isinstance(expr, ast.BinOp):
operator_str = segment[expr.left.end_col_offset:expr.right.col_offset]
operator_offset = len(operator_str) - len(operator_str.lstrip())

left_anchor = expr.left.end_col_offset + operator_offset
right_anchor = left_anchor + 1
if (
operator_offset + 1 < len(operator_str)
and not operator_str[operator_offset + 1] == ord(b' ')
):
right_anchor += 1
return left_anchor, right_anchor
if isinstance(expr, ast.Subscript):
return expr.value.end_col_offset, expr.slice.end_col_offset + 1

return None


class FramesList(object):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
CMD_THREAD_RESUME_SINGLE_NOTIFICATION, CMD_THREAD_KILL, CMD_STOP_ON_START, CMD_INPUT_REQUESTED, \
CMD_EXIT, CMD_STEP_INTO_COROUTINE, CMD_STEP_RETURN_MY_CODE, CMD_SMART_STEP_INTO, \
CMD_SET_FUNCTION_BREAK
from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock
from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock, DebugInfoHolder
from _pydevd_bundle.pydevd_net_command import NetCommand, NULL_NET_COMMAND
from _pydevd_bundle.pydevd_net_command_factory_xml import NetCommandFactory
from _pydevd_bundle.pydevd_utils import get_non_pydevd_threads
Expand All @@ -30,6 +30,7 @@
import linecache
from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id
from io import StringIO
from _pydev_bundle import pydev_log


class ModulesManager(object):
Expand Down Expand Up @@ -265,8 +266,23 @@ def make_get_thread_stack_message(self, py_db, seq, thread_id, topmost_frame, fm
source_reference = pydevd_file_utils.create_source_reference_for_linecache(
original_filename)

column = 1
endcol = None
if line_col_info is not None:
try:
line_text = linecache.getline(original_filename, lineno)
except:
if DebugInfoHolder.DEBUG_TRACE_LEVEL >= 2:
pydev_log.exception('Unable to get line from linecache for file: %s', original_filename)
else:
if line_text:
colno, endcolno = line_col_info.map_columns_to_line(line_text)
column = colno + 1
if line_col_info.lineno == line_col_info.end_lineno:
endcol = endcolno + 1

frames.append(pydevd_schema.StackFrame(
frame_id, formatted_name, lineno, column=1, source={
frame_id, formatted_name, lineno, column=column, endColumn=endcol, source={
'path': filename_in_utf8,
'sourceReference': source_reference,
},
Expand Down
217 changes: 217 additions & 0 deletions src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# coding: utf-8
import sys
from _pydevd_bundle.pydevd_constants import EXCEPTION_TYPE_USER_UNHANDLED
import pytest
from tests_python.debug_constants import IS_PY311_OR_GREATER


def test_create_frames_list_from_traceback():
Expand Down Expand Up @@ -32,3 +35,217 @@ def method2():
assert str(frames_list.chained_frames_list.chained_frames_list.exc_desc) == 'first'
assert frames_list.chained_frames_list.chained_frames_list.chained_frames_list is None


if IS_PY311_OR_GREATER:
import traceback
_byte_offset_to_character_offset = getattr(traceback, '_byte_offset_to_character_offset', None)
if _byte_offset_to_character_offset is not None:
_original = traceback._byte_offset_to_character_offset

def _byte_offset_to_character_offset(*args, **kwargs):
try:
return _original(*args, **kwargs)
except:

# Replacement to deal with the buggy version released on Python 3.11.0.
def replacement(str, offset):
as_utf8 = str.encode('utf-8')
if offset > len(as_utf8):
offset = len(as_utf8)

return len(as_utf8[:offset + 1].decode("utf-8", 'replace'))

return replacement(*args , **kwargs)

traceback._byte_offset_to_character_offset = _byte_offset_to_character_offset

_USE_UNICODE = [False, True]


@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
def test_collect_anchors_subscript(use_unicode):
from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback

if use_unicode:

def method():
d = {
"x": {
"á": {
"í": {
"theta": 1
}
}
}
}

result = d["x"]["á"]["í"]["beta"]

else:

def method():
d = {
"x": {
"y": {
"i": {
"theta": 1
}
}
}
}

result = d["x"]["y"]["i"]["beta"]

try:
method()
except:
exc_type, exc_desc, trace_obj = sys.exc_info()
memo = {}
frame = None
frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
iter_in = iter(frames_list)
f = next(iter_in)
assert f.f_code.co_name == 'method'
line_col_info = frames_list.frame_id_to_line_col_info[id(f)]

if use_unicode:
line = ' result = d["x"]["á"]["í"]["beta"]'
else:
line = ' result = d["x"]["y"]["i"]["beta"]'

# Ok, so, the range that we we have covers >>d["x"]["á"]["í"]["beta"]<<
# the problem here is that ideally we'd like to present to the user that
# the current key is "beta", so, we need to do some additional computation
# to find out the proper column to show to the user.
# (see https://github.com/microsoft/debugpy/issues/1099
# for more information).
assert line_col_info.colno == line.index('d["x"]')

# It's +1 here due to the í unicode char (we need to convert from the bytes
# index to the actual character in the string to get the actual col).
if use_unicode:
assert line_col_info.end_colno == len(line) + 2
else:
assert line_col_info.end_colno == len(line)
original_line = line

col, endcol = line_col_info.map_columns_to_line(original_line)
assert col == line.index('["beta"]')
assert endcol == len(line)


@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
def test_collect_anchors_binop_1(use_unicode):
from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback

if use_unicode:

def method():
á = 1
í = 2
c = tuple

result = á + í + c

else:

def method():
a = 1
b = 2
c = tuple

result = a + b + c

try:
method()
except:
exc_type, exc_desc, trace_obj = sys.exc_info()
memo = {}
frame = None
frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
iter_in = iter(frames_list)
f = next(iter_in)
assert f.f_code.co_name == 'method'
line_col_info = frames_list.frame_id_to_line_col_info[id(f)]

if use_unicode:
line = ' result = á + í + c'
expected_index = line.index('á + í')
else:
line = ' result = a + b + c'
expected_index = line.index('a + b')

assert line_col_info.colno == expected_index

# It's +2 here due to the á and í unicode chars (we need to convert from the bytes
# index to the actual character in the string to get the actual col).
if use_unicode:
assert line_col_info.end_colno == len(line) + 2
else:
assert line_col_info.end_colno == len(line)
original_line = line

col, endcol = line_col_info.map_columns_to_line(original_line)
assert col == line.index('+ c')
assert endcol == col + 1


@pytest.mark.parametrize('use_unicode', _USE_UNICODE)
@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.')
def test_collect_anchors_binop_2(use_unicode):
from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback

if use_unicode:

def method():
á = 1
í = 2
c = tuple

result = á + c + í

else:

def method():
a = 1
b = 2
c = tuple

result = a + c + b

try:
method()
except:
exc_type, exc_desc, trace_obj = sys.exc_info()
memo = {}
frame = None
frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo)
iter_in = iter(frames_list)
f = next(iter_in)
assert f.f_code.co_name == 'method'
line_col_info = frames_list.frame_id_to_line_col_info[id(f)]

if use_unicode:
line = ' result = á + c + í'
expected_index = line.index('á + c')
else:
line = ' result = a + c + b'
expected_index = line.index('a + c')

assert line_col_info.colno == expected_index

# It's +2 here due to the á and í unicode chars (we need to convert from the bytes
# index to the actual character in the string to get the actual col).
if use_unicode:
assert line_col_info.end_colno == line.index('c + í') + 2
else:
assert line_col_info.end_colno == line.index('c + b') + 1
original_line = line

col, endcol = line_col_info.map_columns_to_line(original_line)
assert col == 23
assert endcol == 24
assert col == line.index('+ c')
assert endcol == col + 1

0 comments on commit 04403dd

Please sign in to comment.