Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TVMScript] Text underlining in DocPrinter based on Doc's source_paths #12344

Merged
merged 4 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions include/tvm/script/printer/doc_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ namespace printer {
* This function unpacks the DocPrinterOptions into function arguments
* to be FFI friendly.
*
* \param doc the doc to be converted
* \param indent_spaces the number of spaces used for indention
* \param doc Doc to be converted
* \param indent_spaces Number of spaces used for indentation
* \param print_line_numbers Whether to print line numbers
* \param num_context_lines Number of context lines to print around the underlined text
* \param path_to_underline Object path to be underlined
*/
String DocToPythonScript(Doc doc, int indent_spaces = 4);
String DocToPythonScript(Doc doc, int indent_spaces = 4, bool print_line_numbers = false,
int num_context_lines = -1,
Optional<ObjectPath> path_to_underline = NullOpt);

} // namespace printer
} // namespace script
Expand Down
22 changes: 20 additions & 2 deletions python/tvm/script/printer/doc_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,19 @@
# under the License.
"""Functions to print doc into text format"""

from typing import Optional
from tvm.runtime.object_path import ObjectPath
from . import _ffi_api
from .doc import Doc


def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
def to_python_script(
doc: Doc,
indent_spaces: int = 4,
print_line_numbers: bool = False,
num_context_lines: Optional[int] = None,
path_to_underline: Optional[ObjectPath] = None,
) -> str:
"""Convert Doc into Python script.

Parameters
Expand All @@ -29,10 +37,20 @@ def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
The doc to convert into Python script
indent_spaces : int
The number of indent spaces to use in the output
print_line_numbers: bool
Whether to print line numbers
num_context_lines : Optional[int]
Number of context lines to print around the underlined text
path_to_underline : Optional[ObjectPath]
Object path to be underlined

Returns
-------
script : str
The text representation of Doc in Python syntax
"""
return _ffi_api.DocToPythonScript(doc, indent_spaces) # type: ignore # pylint: disable=no-member
if num_context_lines is None:
num_context_lines = -1
return _ffi_api.DocToPythonScript(
doc, indent_spaces, print_line_numbers, num_context_lines, path_to_underline
) # type: ignore # pylint: disable=no-member
261 changes: 258 additions & 3 deletions src/script/printer/base_doc_printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,256 @@ namespace tvm {
namespace script {
namespace printer {

DocPrinter::DocPrinter(int indent_spaces) : indent_spaces_(indent_spaces) {}
namespace {

void DocPrinter::Append(const Doc& doc) { PrintDoc(doc); }
void SortAndMergeSpans(std::vector<ByteSpan>* spans) {
if (spans->empty()) {
return;
}
std::sort(spans->begin(), spans->end());
auto last = spans->begin();
for (auto cur = spans->begin() + 1; cur != spans->end(); ++cur) {
if (cur->first > last->second) {
*++last = *cur;
} else if (cur->second > last->second) {
last->second = cur->second;
}
}
spans->erase(++last, spans->end());
}

size_t GetTextWidth(const std::string& text, const ByteSpan& span) {
// FIXME: this only works for ASCII characters.
// To do this "correctly", we need to parse UTF-8 into codepoints
// and call wcwidth() or equivalent for every codepoint.
size_t ret = 0;
for (size_t i = span.first; i != span.second; ++i) {
if (isprint(text[i])) {
ret += 1;
}
}
return ret;
}

size_t MoveBack(size_t pos, size_t distance) { return distance > pos ? 0 : pos - distance; }

size_t MoveForward(size_t pos, size_t distance, size_t max) {
return distance > max - pos ? max : pos + distance;
}

size_t GetLineIndex(size_t byte_pos, const std::vector<size_t>& line_starts) {
auto it = std::upper_bound(line_starts.begin(), line_starts.end(), byte_pos);
return (it - line_starts.begin()) - 1;
}

using UnderlineIter = typename std::vector<ByteSpan>::const_iterator;

ByteSpan PopNextUnderline(UnderlineIter* next_underline, UnderlineIter end_underline) {
if (*next_underline == end_underline) {
return {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max()};
} else {
return *(*next_underline)++;
}
}

void PrintChunk(const std::pair<size_t, size_t>& lines,
gbonik marked this conversation as resolved.
Show resolved Hide resolved
const std::pair<UnderlineIter, UnderlineIter>& underlines, const std::string& text,
const std::vector<size_t>& line_starts, const DocPrinterOptions& options,
size_t line_number_width, std::string* out) {
UnderlineIter next_underline = underlines.first;
ByteSpan current_underline = PopNextUnderline(&next_underline, underlines.second);

for (size_t line_idx = lines.first; line_idx < lines.second; ++line_idx) {
if (options.print_line_numbers) {
std::string line_num_str = std::to_string(line_idx + 1);
line_num_str.push_back(' ');
for (size_t i = line_num_str.size(); i < line_number_width; ++i) {
out->push_back(' ');
}
*out += line_num_str;
}

size_t line_start = line_starts.at(line_idx);
size_t line_end =
line_idx + 1 == line_starts.size() ? text.size() : line_starts.at(line_idx + 1);
out->append(text.begin() + line_start, text.begin() + line_end);

bool printed_underline = false;
size_t line_pos = line_start;
bool printed_extra_caret = 0;
while (current_underline.first < line_end) {
if (!printed_underline) {
*out += std::string(line_number_width, ' ');
printed_underline = true;
}

size_t underline_end_for_line = std::min(line_end, current_underline.second);
size_t num_spaces = GetTextWidth(text, {line_pos, current_underline.first});
if (num_spaces > 0 && printed_extra_caret) {
num_spaces -= 1;
printed_extra_caret = false;
}
*out += std::string(num_spaces, ' ');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it skip printing carets under the indentation spaces for multi-line underline?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, I could see arguments either way.

for i in T.serial(10):
^^^^^^^^^^^^^^^^^^^^^^
    a[i] = 5
^^^^^^^^^^^^
for i in T.serial(10):
^^^^^^^^^^^^^^^^^^^^^^
    a[i] = 5
    ^^^^^^^^

I'd say that the first option can provide a better sense of "continuity". make it clear that this is one chunk of text being highlighted, as opposed to two different chunks. Also it seems to simplify the implementation :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense. What I was thinking about is the deeply nested code, like

                  for i in T.serial(10):
                  ^^^^^^^^^^^^^^^^^^^^^^
                     a[i] = 5
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     b[i] = 6
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

But it does add more complexity to the implementation. We can keep it as is for now and make improvement in the future if needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point about the nested code. But let's give the simpler implementation a shot and then add more complexity if people hate this behavior.


size_t num_carets = GetTextWidth(text, {current_underline.first, underline_end_for_line});
if (num_carets == 0 && !printed_extra_caret) {
// Special case: when underlineing an empty or unprintable string, make sure to print
// at least one caret still.
num_carets = 1;
printed_extra_caret = true;
} else if (num_carets > 0 && printed_extra_caret) {
num_carets -= 1;
printed_extra_caret = false;
}
*out += std::string(num_carets, '^');

line_pos = current_underline.first = underline_end_for_line;
if (current_underline.first == current_underline.second) {
current_underline = PopNextUnderline(&next_underline, underlines.second);
}
}

if (printed_underline) {
out->push_back('\n');
}
}
}

void PrintCut(size_t num_lines_skipped, std::string* out) {
if (num_lines_skipped != 0) {
std::ostringstream s;
s << "(... " << num_lines_skipped << " lines skipped ...)\n";
*out += s.str();
}
}

std::pair<size_t, size_t> GetLinesForUnderline(const ByteSpan& underline,
const std::vector<size_t>& line_starts,
size_t num_lines, const DocPrinterOptions& options) {
size_t first_line_of_underline = GetLineIndex(underline.first, line_starts);
size_t first_line_of_chunk = MoveBack(first_line_of_underline, options.num_context_lines);
size_t end_line_of_underline = GetLineIndex(underline.second - 1, line_starts) + 1;
size_t end_line_of_chunk =
MoveForward(end_line_of_underline, options.num_context_lines, num_lines);

return {first_line_of_chunk, end_line_of_chunk};
}

// If there is only one line between the chunks, it is better to print it as is,
// rather than something like "(... 1 line skipped ...)".
constexpr const size_t kMinLinesToCutOut = 2;

bool TryMergeChunks(std::pair<size_t, size_t>* cur_chunk,
const std::pair<size_t, size_t>& new_chunk) {
if (new_chunk.first < cur_chunk->second + kMinLinesToCutOut) {
cur_chunk->second = new_chunk.second;
return true;
} else {
return false;
}
}

size_t GetNumLines(const std::string& text, const std::vector<size_t>& line_starts) {
if (line_starts.back() == text.size()) {
// Final empty line doesn't count as a line
return line_starts.size() - 1;
} else {
return line_starts.size();
}
}

size_t GetLineNumberWidth(size_t num_lines, const DocPrinterOptions& options) {
if (options.print_line_numbers) {
return std::to_string(num_lines).size() + 1;
} else {
return 0;
}
}

std::string DecorateText(const std::string& text, const std::vector<size_t>& line_starts,
const DocPrinterOptions& options,
const std::vector<ByteSpan>& underlines) {
size_t num_lines = GetNumLines(text, line_starts);
size_t line_number_width = GetLineNumberWidth(num_lines, options);

std::string ret;
if (underlines.empty()) {
PrintChunk({0, num_lines}, {underlines.begin(), underlines.begin()}, text, line_starts, options,
line_number_width, &ret);
return ret;
}

size_t last_end_line = 0;
std::pair<size_t, size_t> cur_chunk =
GetLinesForUnderline(underlines[0], line_starts, num_lines, options);
if (cur_chunk.first < kMinLinesToCutOut) {
cur_chunk.first = 0;
}

auto first_underline_in_cur_chunk = underlines.begin();
for (auto underline_it = underlines.begin() + 1; underline_it != underlines.end();
++underline_it) {
std::pair<size_t, size_t> new_chunk =
GetLinesForUnderline(*underline_it, line_starts, num_lines, options);

if (!TryMergeChunks(&cur_chunk, new_chunk)) {
PrintCut(cur_chunk.first - last_end_line, &ret);
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underline_it}, text, line_starts,
options, line_number_width, &ret);
last_end_line = cur_chunk.second;
cur_chunk = new_chunk;
first_underline_in_cur_chunk = underline_it;
}
}

PrintCut(cur_chunk.first - last_end_line, &ret);
if (num_lines - cur_chunk.second < kMinLinesToCutOut) {
cur_chunk.second = num_lines;
}
PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underlines.end()}, text, line_starts,
options, line_number_width, &ret);
PrintCut(num_lines - cur_chunk.second, &ret);
return ret;
}

} // anonymous namespace

DocPrinter::DocPrinter(const DocPrinterOptions& options) : options_(options) {
line_starts_.push_back(0);
}

void DocPrinter::Append(const Doc& doc) { Append(doc, NullOpt); }

void DocPrinter::Append(const Doc& doc, Optional<ObjectPath> path_to_underline) {
path_to_underline_ = path_to_underline;
current_max_path_length_ = 0;
current_underline_candidates_.clear();
PrintDoc(doc);

underlines_.insert(underlines_.end(), current_underline_candidates_.begin(),
current_underline_candidates_.end());
}

String DocPrinter::GetString() const {
std::string text = output_.str();

// Remove any trailing indentation
while (!text.empty() && text.back() == ' ') {
text.pop_back();
}

if (!text.empty() && text.back() != '\n') {
text.push_back('\n');
}
return text;

std::vector<ByteSpan> underlines = underlines_;
SortAndMergeSpans(&underlines);
return DecorateText(text, line_starts_, options_, underlines);
}

void DocPrinter::PrintDoc(const Doc& doc) {
size_t start_pos = output_.tellp();

if (const auto* doc_node = doc.as<LiteralDocNode>()) {
PrintTypedDoc(GetRef<LiteralDoc>(doc_node));
} else if (const auto* doc_node = doc.as<IdDocNode>()) {
Expand Down Expand Up @@ -84,6 +321,24 @@ void DocPrinter::PrintDoc(const Doc& doc) {
LOG(FATAL) << "Do not know how to print " << doc->GetTypeKey();
throw;
}

size_t end_pos = output_.tellp();
for (const ObjectPath& path : doc->source_paths) {
MarkSpan({start_pos, end_pos}, path);
}
}

void DocPrinter::MarkSpan(const ByteSpan& span, const ObjectPath& path) {
if (path_to_underline_.defined()) {
if (path->Length() >= current_max_path_length_ &&
path->IsPrefixOf(path_to_underline_.value())) {
if (path->Length() > current_max_path_length_) {
current_max_path_length_ = path->Length();
current_underline_candidates_.clear();
}
current_underline_candidates_.push_back(span);
}
}
}

} // namespace printer
Expand Down
Loading