Skip to content

Commit

Permalink
Remove untrusted progress parsing (stderr instead)
Browse files Browse the repository at this point in the history
Now that only the second container can send JSON-encoded progress
information, we can the untrusted JSON parsing. The parse_progress was
also renamed to `parse_progress_trusted` to ensure future developers
don't mistake this as a safe method.

The old methods for sending untrusted JSON were repurposed to send the
progress instead to stderr for troubleshooting in development mode.

Fixes #456
  • Loading branch information
deeplow committed Jan 10, 2024
1 parent 51bc600 commit abd3bab
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 160 deletions.
3 changes: 2 additions & 1 deletion dangerzone/conversion/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,5 +192,6 @@ def calculate_timeout(
async def convert(self) -> None:
pass

def update_progress(self, text: str, *, error: bool = False) -> None:
@abstractmethod
def update_progress(self, text: str) -> None:
pass
3 changes: 3 additions & 0 deletions dangerzone/conversion/doc_to_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ async def write_page_height(self, height: int) -> None:
async def write_page_data(self, data: bytes) -> None:
return await self.write_bytes(data)

def update_progress(self, text: str, *, error: bool = False) -> None:
print(text, file=sys.stderr)

async def convert(self) -> None:
conversions: Dict[str, Dict[str, Optional[str]]] = {
# .pdf
Expand Down
4 changes: 2 additions & 2 deletions dangerzone/conversion/pixels_to_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ async def convert(
def update_progress(self, text: str, *, error: bool = False) -> None:
if running_on_qubes():
if self.progress_callback:
self.progress_callback(error, text, int(self.percentage))
self.progress_callback(error, text, self.percentage)
else:
print(
json.dumps(
{"error": error, "text": text, "percentage": int(self.percentage)}
{"error": error, "text": text, "percentage": self.percentage}
)
)
sys.stdout.flush()
Expand Down
25 changes: 6 additions & 19 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ def convert(
exception = errors.exception_from_error_code(error_code)
document.mark_as_failed()
except errors.ConversionException as e:
self.print_progress_trusted(document, True, str(e), 0)
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
except Exception as e:
log.exception(
f"An exception occurred while converting document '{document.id}'"
)
self.print_progress_trusted(document, True, str(e), 0)
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()

def doc_to_pixels(self, document: Document, tempdir: str) -> None:
Expand Down Expand Up @@ -120,7 +120,7 @@ def doc_to_pixels(self, document: Document, tempdir: str) -> None:
sw.start()
for page in range(1, n_pages + 1):
text = f"Converting page {page}/{n_pages} to pixels"
self.print_progress_trusted(document, False, text, self.percentage)
self.print_progress(document, False, text, self.percentage)

width = read_int(self.proc.stdout, timeout=sw.remaining)
height = read_int(self.proc.stdout, timeout=sw.remaining)
Expand Down Expand Up @@ -151,7 +151,7 @@ def doc_to_pixels(self, document: Document, tempdir: str) -> None:

# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress_trusted(document, False, text, self.percentage)
self.print_progress(document, False, text, self.percentage)

if getattr(sys, "dangerzone_dev", False):
assert self.proc.stderr is not None
Expand All @@ -168,11 +168,11 @@ def pixels_to_pdf(
) -> None:
pass

def _print_progress(
def print_progress(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
if error:
s += Fore.RED + text + Style.RESET_ALL
log.error(s)
Expand All @@ -183,19 +183,6 @@ def _print_progress(
if self.progress_callback:
self.progress_callback(error, text, percentage)

def print_progress_trusted(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
return self._print_progress(document, error, text, int(percentage))

def print_progress(
self, document: Document, error: bool, untrusted_text: str, percentage: float
) -> None:
text = replace_control_chars(untrusted_text)
return self.print_progress_trusted(
document, error, "UNTRUSTED> " + text, percentage
)

@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass
Expand Down
35 changes: 12 additions & 23 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,7 @@

from ..conversion import errors
from ..document import Document
from ..util import (
get_resource_path,
get_subprocess_startupinfo,
get_tmp_dir,
replace_control_chars,
)
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
from .base import IsolationProvider

# Define startupinfo for subprocesses
Expand Down Expand Up @@ -147,29 +142,22 @@ def assert_field_type(self, val: Any, _type: object) -> None:
if not type(val) == _type:
raise ValueError("Status field has incorrect type")

def parse_progress(self, document: Document, untrusted_line: str) -> None:
def parse_progress_trusted(self, document: Document, line: str) -> None:
"""
Parses a line returned by the container.
"""
try:
untrusted_status = json.loads(untrusted_line)

text = untrusted_status["text"]
status = json.loads(line)
text = status["text"]
self.assert_field_type(text, str)

error = untrusted_status["error"]
error = status["error"]
self.assert_field_type(error, bool)

percentage = untrusted_status["percentage"]
self.assert_field_type(percentage, int)

percentage = status["percentage"]
self.assert_field_type(percentage, float)
self.print_progress(document, error, text, percentage)
except Exception:
line = replace_control_chars(untrusted_line)
error_message = (
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}"
)
self.print_progress_trusted(document, True, error_message, -1)
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
self.print_progress(document, True, error_message, -1)

def exec(
self,
Expand Down Expand Up @@ -249,8 +237,9 @@ def pixels_to_pdf(
]

pixels_to_pdf_proc = self.exec_container(command, extra_args)
for line in pixels_to_pdf_proc.stdout:
self.parse_progress(document, line)
if pixels_to_pdf_proc.stdout:
for line in pixels_to_pdf_proc.stdout:
self.parse_progress_trusted(document, line.decode())
error_code = pixels_to_pdf_proc.wait()
if error_code != 0:
log.error("pixels-to-pdf failed")
Expand Down
2 changes: 1 addition & 1 deletion dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def pixels_to_pdf(
self, document: Document, tempdir: str, ocr_lang: Optional[str]
) -> None:
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
self.print_progress_trusted(document, error, text, percentage)
self.print_progress(document, error, text, percentage)

converter = PixelsToPDF(progress_callback=print_progress_wrapper)
try:
Expand Down
40 changes: 0 additions & 40 deletions tests/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,6 @@
)
@pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system")
class IsolationProviderTest:
def test_print_progress(
self,
provider: base.IsolationProvider,
uncommon_text: str,
sanitized_text: str,
mocker: MockerFixture,
) -> None:
"""Test that the print_progress() method of our isolation providers sanitizes text.
Iterate our isolation providers and make sure that their print_progress() methods
sanitizes the provided text, before passing it to the logging functions and other
callbacks.
"""
d = Document()
provider.progress_callback = mocker.MagicMock()
log_info_spy = mocker.spy(base.log, "info")
log_error_spy = mocker.spy(base.log, "error")
_print_progress_spy = mocker.spy(provider, "_print_progress")

for error, untrusted_text, sanitized_text in [
(True, "normal text", "UNTRUSTED> normal text"),
(False, "normal text", "UNTRUSTED> normal text"),
(True, uncommon_text, "UNTRUSTED> " + sanitized_text),
(False, uncommon_text, "UNTRUSTED> " + sanitized_text),
]:
log_info_spy.reset_mock()
log_error_spy.reset_mock()

provider.print_progress(d, error, untrusted_text, 0)
provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr]
_print_progress_spy.assert_called_with(d, error, sanitized_text, 0)
if error:
assert log_error_spy.call_args[0][0].endswith(
sanitized_text + Style.RESET_ALL
)
log_info_spy.assert_not_called()
else:
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
log_error_spy.assert_not_called()

def test_max_pages_received(
self,
pdf_11k_pages: str,
Expand Down
75 changes: 1 addition & 74 deletions tests/isolation_provider/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,77 +19,4 @@ def provider() -> Container:


class TestContainer(IsolationProviderTest):
def test_parse_progress(
self,
provider: Container,
uncommon_text: str,
sanitized_text: str,
mocker: MockerFixture,
) -> None:
"""Test that the `parse_progress()` function handles escape sequences properly."""
container = Container(enable_timeouts=False)
container.progress_callback = mocker.MagicMock()
print_progress_mock = mocker.patch.object(container, "_print_progress")
d = Document()

# Test 1 - Check that normal JSON values are printed as is.
simple_json = json.dumps({"text": "text", "error": False, "percentage": 0})
container.parse_progress(d, simple_json)
print_progress_mock.assert_called_with(d, False, "UNTRUSTED> text", 0)

# Test 2 - Check that a totally invalid string is reported as a failure. If this
# string contains escape characters, they should be sanitized as well.
def assert_invalid_json(text: str) -> None:
print_progress_mock.assert_called_with(
d,
True,
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {text}",
-1,
)

# Try first with a trivially invalid string.
invalid_json = "()"
container.parse_progress(d, invalid_json)
assert_invalid_json(invalid_json)

# Try next with an invalid string that contains uncommon text.
container.parse_progress(d, uncommon_text)
assert_invalid_json(sanitized_text)

# Test 3 - Check that a structurally valid JSON value with escape characters in it
# is sanitized.
valid_json = json.dumps(
{"text": uncommon_text, "error": False, "percentage": 0}
)
sanitized_json = json.dumps(
{"text": sanitized_text, "error": False, "percentage": 0}
)
container.parse_progress(d, valid_json)
print_progress_mock.assert_called_with(
d, False, "UNTRUSTED> " + sanitized_text, 0
)

# Test 4 - Check that a structurally valid JSON, that otherwise does not have the
# expected keys, or the expected value types, is reported as an error, and any
# escape sequences are sanitized.

keys = ["text", "error", "percentage", uncommon_text]
values = [uncommon_text, False, 0, None]
possible_kvs = list(itertools.product(keys, values, repeat=1))

# Based on the above keys and values, create any combination possible, from 0 to 3
# elements. Take extra care to:
#
# * Remove combinations with non-unique keys.
# * Ignore the sole valid combination (see `valid_json`), since we have already
# tested it above.
for i in range(len(keys)):
for combo in itertools.combinations(possible_kvs, i):
dict_combo: Dict[str, Any] = dict(combo) # type: ignore [arg-type]
if len(combo) == len(dict_combo.keys()):
bad_json = json.dumps(dict_combo)
sanitized_json = bad_json.replace(uncommon_text, sanitized_text)
if bad_json == valid_json:
continue
container.parse_progress(d, bad_json)
assert_invalid_json(sanitized_json)
pass

0 comments on commit abd3bab

Please sign in to comment.