Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XTerm parsing improvements #570

Merged
merged 12 commits into from
Jun 17, 2022
Prev Previous commit
Next Next commit
Translate "escape" to "^" when XTermParser has to backtrack
  • Loading branch information
darrenburns committed Jun 11, 2022

Unverified

This commit is not signed, but one or more authors requires that any commit attributed to them is signed.
commit 1510739227d14ce3a36c2c0cb8f0ec7073279f55
43 changes: 22 additions & 21 deletions src/textual/_xterm_parser.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from __future__ import annotations


import re
from typing import Any, Callable, Generator, Iterable

from . import messages
from . import events
from ._types import MessageTarget
from ._parser import Awaitable, Parser, TokenCallback
from . import messages
from ._ansi_sequences import ANSI_SEQUENCES_KEYS
from ._parser import Awaitable, Parser, TokenCallback
from ._types import MessageTarget

# When trying to determine whether the current sequence is a supported/valid
# escape sequence, at which length should we give up and consider our search
# to be unsuccessful?
from .keys import Keys

_MAX_SEQUENCE_SEARCH_THRESHOLD = 20
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was 20 chosen?

Copy link
Member Author

@darrenburns darrenburns Jun 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The largest that I think we could expect to receive (that I'm aware of) is length 14 (mouse events in large terminal windows). The existence of the re_sgr_mouse regex suggested to me that longer sequences could exist, so I added on some leeway for defensiveness.


_re_mouse_event = re.compile("^" + re.escape("\x1b[") + r"(<?[\d;]+[mM]|M...)\Z")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two regexes for mouse events that run.

This one suggests a mouse event can have any number of parameters, but when we parse the mouse event it assumes there are a fixed number.

Is this one definitely required? Could we possibly get away with running just the 1 regex for mouse events rather than 2?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quite possibly. IIRC there are at least two mouse codes that can be generated. At one point I thought I would have to support both, but everything supports the newer method. That might explain the dual regexes.

@@ -24,7 +25,6 @@


class XTermParser(Parser[events.Event]):

_re_sgr_mouse = re.compile(r"\x1b\[<(\d+);(\d+);(\d+)([Mm])")

def __init__(
@@ -144,12 +144,11 @@ def parse(self, on_token: TokenCallback) -> Generator[Awaitable, str, None]:
or len(sequence) > _MAX_SEQUENCE_SEARCH_THRESHOLD
):
for character in sequence:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's some very similar code further down, I wonder if it could be hoisted in to a closure?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered this, but will do it now that I know I'm not the only one with that thought :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you said closure...

I've factored it out into a separate method

keys = get_key_ansi_sequence(character, None)
if keys is not None:
for key in keys:
on_token(events.Key(self.sender, key=key))
else:
on_token(events.Key(self.sender, key=character))
key_events = self._sequence_to_key_events(character)
for event in key_events:
if event.key == "escape":
event = events.Key(event.sender, key="^")
on_token(event)
break

sequence_character = yield read1()
@@ -171,10 +170,10 @@ def parse(self, on_token: TokenCallback) -> Generator[Awaitable, str, None]:

if not bracketed_paste:
# Was it a pressed key event that we received?
keys = get_key_ansi_sequence(sequence, None)
if keys is not None:
for key in keys:
on_token(events.Key(self.sender, key=key))
key_events = list(self._sequence_to_key_events(sequence))
for event in key_events:
on_token(event)
if key_events:
break
# Or a mouse event?
mouse_match = _re_mouse_event.match(sequence)
@@ -201,9 +200,11 @@ def parse(self, on_token: TokenCallback) -> Generator[Awaitable, str, None]:
break
else:
if not bracketed_paste:
keys = get_key_ansi_sequence(character, None)
if keys is not None:
for key in keys:
on_token(events.Key(self.sender, key=key))
else:
on_token(events.Key(self.sender, key=character))
for event in self._sequence_to_key_events(character):
on_token(event)

def _sequence_to_key_events(self, sequence: str) -> Iterable[events.Key]:
default = (sequence,) if len(sequence) == 1 else ()
keys = ANSI_SEQUENCES_KEYS.get(sequence, default)
for key in keys:
yield events.Key(self.sender, key)
6 changes: 3 additions & 3 deletions tests/test_xterm_parser.py
Original file line number Diff line number Diff line change
@@ -67,8 +67,8 @@ def test_cant_match_escape_sequence_too_long(parser):
assert len(events) == len(sequence)
assert all(isinstance(event, Key) for event in events)

# '\x1b' is translated to 'escape'
assert events[0].key == "escape"
# When we backtrack '\x1b' is translated to '^'
assert events[0].key == "^"

# The rest of the characters correspond to the expected key presses
events = events[1:]
@@ -87,7 +87,7 @@ def test_unknown_sequence_followed_by_known_sequence(parser):
sequence = unknown_sequence + known_sequence
events = parser.feed(sequence)

assert next(events).key == "escape"
assert next(events).key == "^"
assert next(events).key == "["
assert next(events).key == "?"
assert next(events).key == "end"