diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py index e7ccff112..a6b8a0ee9 100644 --- a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py +++ b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py @@ -87,7 +87,6 @@ from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id, resume_threads from _pydevd_bundle.pydevd_dont_trace_files import PYDEV_FILE import dis -from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_exception_cause import pydevd_file_utils import itertools from urllib.parse import quote_plus, unquote_plus diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py index 0a8c0a1c2..f079757a6 100644 --- a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py +++ b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_frame_utils.py @@ -2,7 +2,6 @@ IS_PY311_OR_GREATER from _pydev_bundle import pydev_log import itertools -from collections import namedtuple from typing import Any, Dict @@ -79,7 +78,111 @@ def cached_call(obj, func, *args): return getattr(obj, cached_name) -_LineColInfo = namedtuple('_LineColInfo', 'lineno, end_lineno, colno, end_colno') +class _LineColInfo: + + def __init__(self, lineno, end_lineno, colno, end_colno): + self.lineno = lineno + self.end_lineno = end_lineno + self.colno = colno + self.end_colno = end_colno + + def map_columns_to_line(self, original_line: str): + ''' + The columns internally are actually based on bytes. + + Also, the position isn't always the ideal one as the start may not be + what we want (if the user has many subscripts in the line the start + will always be the same and only the end would change). + For more details see: + https://github.com/microsoft/debugpy/issues/1099#issuecomment-1303403995 + + So, this function maps the start/end columns to the position to be shown in the editor. + ''' + colno = _utf8_byte_offset_to_character_offset(original_line, self.colno) + end_colno = _utf8_byte_offset_to_character_offset(original_line, self.end_colno) + + if self.lineno == self.end_lineno: + try: + ret = _extract_caret_anchors_in_bytes_from_line_segment( + original_line[colno:end_colno] + ) + if ret is not None: + return ( + _utf8_byte_offset_to_character_offset(original_line, ret[0] + self.colno), + _utf8_byte_offset_to_character_offset(original_line, ret[1] + self.colno) + ) + except Exception: + pass # Suppress exception + + return colno, end_colno + + +_utf8_with_2_bytes = 0x80 +_utf8_with_3_bytes = 0x800 +_utf8_with_4_bytes = 0x10000 + + +def _utf8_byte_offset_to_character_offset(s: str, offset: int): + byte_offset = 0 + char_offset = 0 + + for char_offset, character in enumerate(s): + byte_offset += 1 + + codepoint = ord(character) + + if codepoint >= _utf8_with_4_bytes: + byte_offset += 3 + + elif codepoint >= _utf8_with_3_bytes: + byte_offset += 2 + + elif codepoint >= _utf8_with_2_bytes: + byte_offset += 1 + + if byte_offset > offset: + break + else: + char_offset += 1 + + return char_offset + + +# Based on traceback._extract_caret_anchors_in_bytes_from_line_segment (Python 3.11.0) +def _extract_caret_anchors_in_bytes_from_line_segment(segment: str): + import ast + + try: + segment = segment.encode('utf-8') + except UnicodeEncodeError: + return None + try: + tree = ast.parse(segment) + except SyntaxError: + return None + + if len(tree.body) != 1: + return None + + statement = tree.body[0] + if isinstance(statement, ast.Expr): + expr = statement.value + if isinstance(expr, ast.BinOp): + operator_str = segment[expr.left.end_col_offset:expr.right.col_offset] + operator_offset = len(operator_str) - len(operator_str.lstrip()) + + left_anchor = expr.left.end_col_offset + operator_offset + right_anchor = left_anchor + 1 + if ( + operator_offset + 1 < len(operator_str) + and not operator_str[operator_offset + 1] == ord(b' ') + ): + right_anchor += 1 + return left_anchor, right_anchor + if isinstance(expr, ast.Subscript): + return expr.value.end_col_offset, expr.slice.end_col_offset + 1 + + return None class FramesList(object): diff --git a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py index 2eed6e803..6f1dfc34f 100644 --- a/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py +++ b/src/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command_factory_json.py @@ -19,7 +19,7 @@ CMD_THREAD_RESUME_SINGLE_NOTIFICATION, CMD_THREAD_KILL, CMD_STOP_ON_START, CMD_INPUT_REQUESTED, \ CMD_EXIT, CMD_STEP_INTO_COROUTINE, CMD_STEP_RETURN_MY_CODE, CMD_SMART_STEP_INTO, \ CMD_SET_FUNCTION_BREAK -from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock +from _pydevd_bundle.pydevd_constants import get_thread_id, ForkSafeLock, DebugInfoHolder from _pydevd_bundle.pydevd_net_command import NetCommand, NULL_NET_COMMAND from _pydevd_bundle.pydevd_net_command_factory_xml import NetCommandFactory from _pydevd_bundle.pydevd_utils import get_non_pydevd_threads @@ -30,6 +30,7 @@ import linecache from _pydevd_bundle.pydevd_thread_lifecycle import pydevd_find_thread_by_id from io import StringIO +from _pydev_bundle import pydev_log class ModulesManager(object): @@ -265,8 +266,23 @@ def make_get_thread_stack_message(self, py_db, seq, thread_id, topmost_frame, fm source_reference = pydevd_file_utils.create_source_reference_for_linecache( original_filename) + column = 1 + endcol = None + if line_col_info is not None: + try: + line_text = linecache.getline(original_filename, lineno) + except: + if DebugInfoHolder.DEBUG_TRACE_LEVEL >= 2: + pydev_log.exception('Unable to get line from linecache for file: %s', original_filename) + else: + if line_text: + colno, endcolno = line_col_info.map_columns_to_line(line_text) + column = colno + 1 + if line_col_info.lineno == line_col_info.end_lineno: + endcol = endcolno + 1 + frames.append(pydevd_schema.StackFrame( - frame_id, formatted_name, lineno, column=1, source={ + frame_id, formatted_name, lineno, column=column, endColumn=endcol, source={ 'path': filename_in_utf8, 'sourceReference': source_reference, }, diff --git a/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py b/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py index 2e595d4fa..afac45452 100644 --- a/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py +++ b/src/debugpy/_vendored/pydevd/tests_python/test_frame_utils.py @@ -1,5 +1,8 @@ +# coding: utf-8 import sys from _pydevd_bundle.pydevd_constants import EXCEPTION_TYPE_USER_UNHANDLED +import pytest +from tests_python.debug_constants import IS_PY311_OR_GREATER def test_create_frames_list_from_traceback(): @@ -32,3 +35,217 @@ def method2(): assert str(frames_list.chained_frames_list.chained_frames_list.exc_desc) == 'first' assert frames_list.chained_frames_list.chained_frames_list.chained_frames_list is None + +if IS_PY311_OR_GREATER: + import traceback + _byte_offset_to_character_offset = getattr(traceback, '_byte_offset_to_character_offset', None) + if _byte_offset_to_character_offset is not None: + _original = traceback._byte_offset_to_character_offset + + def _byte_offset_to_character_offset(*args, **kwargs): + try: + return _original(*args, **kwargs) + except: + + # Replacement to deal with the buggy version released on Python 3.11.0. + def replacement(str, offset): + as_utf8 = str.encode('utf-8') + if offset > len(as_utf8): + offset = len(as_utf8) + + return len(as_utf8[:offset + 1].decode("utf-8", 'replace')) + + return replacement(*args , **kwargs) + + traceback._byte_offset_to_character_offset = _byte_offset_to_character_offset + +_USE_UNICODE = [False, True] + + +@pytest.mark.parametrize('use_unicode', _USE_UNICODE) +@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.') +def test_collect_anchors_subscript(use_unicode): + from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback + + if use_unicode: + + def method(): + d = { + "x": { + "á": { + "í": { + "theta": 1 + } + } + } + } + + result = d["x"]["á"]["í"]["beta"] + + else: + + def method(): + d = { + "x": { + "y": { + "i": { + "theta": 1 + } + } + } + } + + result = d["x"]["y"]["i"]["beta"] + + try: + method() + except: + exc_type, exc_desc, trace_obj = sys.exc_info() + memo = {} + frame = None + frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo) + iter_in = iter(frames_list) + f = next(iter_in) + assert f.f_code.co_name == 'method' + line_col_info = frames_list.frame_id_to_line_col_info[id(f)] + + if use_unicode: + line = ' result = d["x"]["á"]["í"]["beta"]' + else: + line = ' result = d["x"]["y"]["i"]["beta"]' + + # Ok, so, the range that we we have covers >>d["x"]["á"]["í"]["beta"]<< + # the problem here is that ideally we'd like to present to the user that + # the current key is "beta", so, we need to do some additional computation + # to find out the proper column to show to the user. + # (see https://github.com/microsoft/debugpy/issues/1099 + # for more information). + assert line_col_info.colno == line.index('d["x"]') + + # It's +1 here due to the í unicode char (we need to convert from the bytes + # index to the actual character in the string to get the actual col). + if use_unicode: + assert line_col_info.end_colno == len(line) + 2 + else: + assert line_col_info.end_colno == len(line) + original_line = line + + col, endcol = line_col_info.map_columns_to_line(original_line) + assert col == line.index('["beta"]') + assert endcol == len(line) + + +@pytest.mark.parametrize('use_unicode', _USE_UNICODE) +@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.') +def test_collect_anchors_binop_1(use_unicode): + from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback + + if use_unicode: + + def method(): + á = 1 + í = 2 + c = tuple + + result = á + í + c + + else: + + def method(): + a = 1 + b = 2 + c = tuple + + result = a + b + c + + try: + method() + except: + exc_type, exc_desc, trace_obj = sys.exc_info() + memo = {} + frame = None + frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo) + iter_in = iter(frames_list) + f = next(iter_in) + assert f.f_code.co_name == 'method' + line_col_info = frames_list.frame_id_to_line_col_info[id(f)] + + if use_unicode: + line = ' result = á + í + c' + expected_index = line.index('á + í') + else: + line = ' result = a + b + c' + expected_index = line.index('a + b') + + assert line_col_info.colno == expected_index + + # It's +2 here due to the á and í unicode chars (we need to convert from the bytes + # index to the actual character in the string to get the actual col). + if use_unicode: + assert line_col_info.end_colno == len(line) + 2 + else: + assert line_col_info.end_colno == len(line) + original_line = line + + col, endcol = line_col_info.map_columns_to_line(original_line) + assert col == line.index('+ c') + assert endcol == col + 1 + + +@pytest.mark.parametrize('use_unicode', _USE_UNICODE) +@pytest.mark.skipif(not IS_PY311_OR_GREATER, reason='Python 3.11 required.') +def test_collect_anchors_binop_2(use_unicode): + from _pydevd_bundle.pydevd_frame_utils import create_frames_list_from_traceback + + if use_unicode: + + def method(): + á = 1 + í = 2 + c = tuple + + result = á + c + í + + else: + + def method(): + a = 1 + b = 2 + c = tuple + + result = a + c + b + + try: + method() + except: + exc_type, exc_desc, trace_obj = sys.exc_info() + memo = {} + frame = None + frames_list = create_frames_list_from_traceback(trace_obj, frame, exc_type, exc_desc, memo) + iter_in = iter(frames_list) + f = next(iter_in) + assert f.f_code.co_name == 'method' + line_col_info = frames_list.frame_id_to_line_col_info[id(f)] + + if use_unicode: + line = ' result = á + c + í' + expected_index = line.index('á + c') + else: + line = ' result = a + c + b' + expected_index = line.index('a + c') + + assert line_col_info.colno == expected_index + + # It's +2 here due to the á and í unicode chars (we need to convert from the bytes + # index to the actual character in the string to get the actual col). + if use_unicode: + assert line_col_info.end_colno == line.index('c + í') + 2 + else: + assert line_col_info.end_colno == line.index('c + b') + 1 + original_line = line + + col, endcol = line_col_info.map_columns_to_line(original_line) + assert col == 23 + assert endcol == 24 + assert col == line.index('+ c') + assert endcol == col + 1