apache · junrushao · Aug 11, 2022 · Aug 8, 2022 · Aug 10, 2022 · Aug 10, 2022
diff --git a/include/tvm/script/printer/doc_printer.h b/include/tvm/script/printer/doc_printer.h
@@ -31,10 +31,15 @@ namespace printer {
  * This function unpacks the DocPrinterOptions into function arguments
  * to be FFI friendly.
  *
- * \param doc the doc to be converted
- * \param indent_spaces the number of spaces used for indention
+ * \param doc Doc to be converted
+ * \param indent_spaces Number of spaces used for indentation
+ * \param print_line_numbers Whether to print line numbers
+ * \param num_context_lines Number of context lines to print around the underlined text
+ * \param path_to_underline Object path to be underlined
  */
-String DocToPythonScript(Doc doc, int indent_spaces = 4);
+String DocToPythonScript(Doc doc, int indent_spaces = 4, bool print_line_numbers = false,
+                         int num_context_lines = -1,
+                         Optional<ObjectPath> path_to_underline = NullOpt);
 
 }  // namespace printer
 }  // namespace script

diff --git a/python/tvm/script/printer/doc_printer.py b/python/tvm/script/printer/doc_printer.py
@@ -16,11 +16,19 @@
 # under the License.
 """Functions to print doc into text format"""
 
+from typing import Optional
+from tvm.runtime.object_path import ObjectPath
 from . import _ffi_api
 from .doc import Doc
 
 
-def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
+def to_python_script(
+    doc: Doc,
+    indent_spaces: int = 4,
+    print_line_numbers: bool = False,
+    num_context_lines: Optional[int] = None,
+    path_to_underline: Optional[ObjectPath] = None,
+) -> str:
     """Convert Doc into Python script.
 
     Parameters
@@ -29,10 +37,20 @@ def to_python_script(doc: Doc, indent_spaces: int = 4) -> str:
         The doc to convert into Python script
     indent_spaces : int
         The number of indent spaces to use in the output
+    print_line_numbers: bool
+        Whether to print line numbers
+    num_context_lines : Optional[int]
+        Number of context lines to print around the underlined text
+    path_to_underline : Optional[ObjectPath]
+        Object path to be underlined
 
     Returns
     -------
     script : str
         The text representation of Doc in Python syntax
     """
-    return _ffi_api.DocToPythonScript(doc, indent_spaces)  # type: ignore # pylint: disable=no-member
+    if num_context_lines is None:
+        num_context_lines = -1
+    return _ffi_api.DocToPythonScript(  # type: ignore
+        doc, indent_spaces, print_line_numbers, num_context_lines, path_to_underline
+    )
diff --git a/src/script/printer/base_doc_printer.cc b/src/script/printer/base_doc_printer.cc
@@ -23,19 +23,256 @@ namespace tvm {
 namespace script {
 namespace printer {
 
-DocPrinter::DocPrinter(int indent_spaces) : indent_spaces_(indent_spaces) {}
+namespace {
 
-void DocPrinter::Append(const Doc& doc) { PrintDoc(doc); }
+void SortAndMergeSpans(std::vector<ByteSpan>* spans) {
+  if (spans->empty()) {
+    return;
+  }
+  std::sort(spans->begin(), spans->end());
+  auto last = spans->begin();
+  for (auto cur = spans->begin() + 1; cur != spans->end(); ++cur) {
+    if (cur->first > last->second) {
+      *++last = *cur;
+    } else if (cur->second > last->second) {
+      last->second = cur->second;
+    }
+  }
+  spans->erase(++last, spans->end());
+}
+
+size_t GetTextWidth(const std::string& text, const ByteSpan& span) {
+  // FIXME: this only works for ASCII characters.
+  // To do this "correctly", we need to parse UTF-8 into codepoints
+  // and call wcwidth() or equivalent for every codepoint.
+  size_t ret = 0;
+  for (size_t i = span.first; i != span.second; ++i) {
+    if (isprint(text[i])) {
+      ret += 1;
+    }
+  }
+  return ret;
+}
+
+size_t MoveBack(size_t pos, size_t distance) { return distance > pos ? 0 : pos - distance; }
+
+size_t MoveForward(size_t pos, size_t distance, size_t max) {
+  return distance > max - pos ? max : pos + distance;
+}
+
+size_t GetLineIndex(size_t byte_pos, const std::vector<size_t>& line_starts) {
+  auto it = std::upper_bound(line_starts.begin(), line_starts.end(), byte_pos);
+  return (it - line_starts.begin()) - 1;
+}
+
+using UnderlineIter = typename std::vector<ByteSpan>::const_iterator;
+
+ByteSpan PopNextUnderline(UnderlineIter* next_underline, UnderlineIter end_underline) {
+  if (*next_underline == end_underline) {
+    return {std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max()};
+  } else {
+    return *(*next_underline)++;
+  }
+}
+
+void PrintChunk(const std::pair<size_t, size_t>& lines_range,
+                const std::pair<UnderlineIter, UnderlineIter>& underlines, const std::string& text,
+                const std::vector<size_t>& line_starts, const DocPrinterOptions& options,
+                size_t line_number_width, std::string* out) {
+  UnderlineIter next_underline = underlines.first;
+  ByteSpan current_underline = PopNextUnderline(&next_underline, underlines.second);
+
+  for (size_t line_idx = lines_range.first; line_idx < lines_range.second; ++line_idx) {
+    if (options.print_line_numbers) {
+      std::string line_num_str = std::to_string(line_idx + 1);
+      line_num_str.push_back(' ');
+      for (size_t i = line_num_str.size(); i < line_number_width; ++i) {
+        out->push_back(' ');
+      }
+      *out += line_num_str;
+    }
+
+    size_t line_start = line_starts.at(line_idx);
+    size_t line_end =
+        line_idx + 1 == line_starts.size() ? text.size() : line_starts.at(line_idx + 1);
+    out->append(text.begin() + line_start, text.begin() + line_end);
+
+    bool printed_underline = false;
+    size_t line_pos = line_start;
+    bool printed_extra_caret = 0;
+    while (current_underline.first < line_end) {
+      if (!printed_underline) {
+        *out += std::string(line_number_width, ' ');
+        printed_underline = true;
+      }
+
+      size_t underline_end_for_line = std::min(line_end, current_underline.second);
+      size_t num_spaces = GetTextWidth(text, {line_pos, current_underline.first});
+      if (num_spaces > 0 && printed_extra_caret) {
+        num_spaces -= 1;
+        printed_extra_caret = false;
+      }
+      *out += std::string(num_spaces, ' ');
+
+      size_t num_carets = GetTextWidth(text, {current_underline.first, underline_end_for_line});
+      if (num_carets == 0 && !printed_extra_caret) {
+        // Special case: when underlineing an empty or unprintable string, make sure to print
+        // at least one caret still.
+        num_carets = 1;
+        printed_extra_caret = true;
+      } else if (num_carets > 0 && printed_extra_caret) {
+        num_carets -= 1;
+        printed_extra_caret = false;
+      }
+      *out += std::string(num_carets, '^');
+
+      line_pos = current_underline.first = underline_end_for_line;
+      if (current_underline.first == current_underline.second) {
+        current_underline = PopNextUnderline(&next_underline, underlines.second);
+      }
+    }
+
+    if (printed_underline) {
+      out->push_back('\n');
+    }
+  }
+}
+
+void PrintCut(size_t num_lines_skipped, std::string* out) {
+  if (num_lines_skipped != 0) {
+    std::ostringstream s;
+    s << "(... " << num_lines_skipped << " lines skipped ...)\n";
+    *out += s.str();
+  }
+}
+
+std::pair<size_t, size_t> GetLinesForUnderline(const ByteSpan& underline,
+                                               const std::vector<size_t>& line_starts,
+                                               size_t num_lines, const DocPrinterOptions& options) {
+  size_t first_line_of_underline = GetLineIndex(underline.first, line_starts);
+  size_t first_line_of_chunk = MoveBack(first_line_of_underline, options.num_context_lines);
+  size_t end_line_of_underline = GetLineIndex(underline.second - 1, line_starts) + 1;
+  size_t end_line_of_chunk =
+      MoveForward(end_line_of_underline, options.num_context_lines, num_lines);
+
+  return {first_line_of_chunk, end_line_of_chunk};
+}
+
+// If there is only one line between the chunks, it is better to print it as is,
+// rather than something like "(... 1 line skipped ...)".
+constexpr const size_t kMinLinesToCutOut = 2;
+
+bool TryMergeChunks(std::pair<size_t, size_t>* cur_chunk,
+                    const std::pair<size_t, size_t>& new_chunk) {
+  if (new_chunk.first < cur_chunk->second + kMinLinesToCutOut) {
+    cur_chunk->second = new_chunk.second;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+size_t GetNumLines(const std::string& text, const std::vector<size_t>& line_starts) {
+  if (line_starts.back() == text.size()) {
+    // Final empty line doesn't count as a line
+    return line_starts.size() - 1;
+  } else {
+    return line_starts.size();
+  }
+}
+
+size_t GetLineNumberWidth(size_t num_lines, const DocPrinterOptions& options) {
+  if (options.print_line_numbers) {
+    return std::to_string(num_lines).size() + 1;
+  } else {
+    return 0;
+  }
+}
+
+std::string DecorateText(const std::string& text, const std::vector<size_t>& line_starts,
+                         const DocPrinterOptions& options,
+                         const std::vector<ByteSpan>& underlines) {
+  size_t num_lines = GetNumLines(text, line_starts);
+  size_t line_number_width = GetLineNumberWidth(num_lines, options);
+
+  std::string ret;
+  if (underlines.empty()) {
+    PrintChunk({0, num_lines}, {underlines.begin(), underlines.begin()}, text, line_starts, options,
+               line_number_width, &ret);
+    return ret;
+  }
+
+  size_t last_end_line = 0;
+  std::pair<size_t, size_t> cur_chunk =
+      GetLinesForUnderline(underlines[0], line_starts, num_lines, options);
+  if (cur_chunk.first < kMinLinesToCutOut) {
+    cur_chunk.first = 0;
+  }
+
+  auto first_underline_in_cur_chunk = underlines.begin();
+  for (auto underline_it = underlines.begin() + 1; underline_it != underlines.end();
+       ++underline_it) {
+    std::pair<size_t, size_t> new_chunk =
+        GetLinesForUnderline(*underline_it, line_starts, num_lines, options);
+
+    if (!TryMergeChunks(&cur_chunk, new_chunk)) {
+      PrintCut(cur_chunk.first - last_end_line, &ret);
+      PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underline_it}, text, line_starts,
+                 options, line_number_width, &ret);
+      last_end_line = cur_chunk.second;
+      cur_chunk = new_chunk;
+      first_underline_in_cur_chunk = underline_it;
+    }
+  }
+
+  PrintCut(cur_chunk.first - last_end_line, &ret);
+  if (num_lines - cur_chunk.second < kMinLinesToCutOut) {
+    cur_chunk.second = num_lines;
+  }
+  PrintChunk(cur_chunk, {first_underline_in_cur_chunk, underlines.end()}, text, line_starts,
+             options, line_number_width, &ret);
+  PrintCut(num_lines - cur_chunk.second, &ret);
+  return ret;
+}
+
+}  // anonymous namespace
+
+DocPrinter::DocPrinter(const DocPrinterOptions& options) : options_(options) {
+  line_starts_.push_back(0);
+}
+
+void DocPrinter::Append(const Doc& doc) { Append(doc, NullOpt); }
+
+void DocPrinter::Append(const Doc& doc, Optional<ObjectPath> path_to_underline) {
+  path_to_underline_ = path_to_underline;
+  current_max_path_length_ = 0;
+  current_underline_candidates_.clear();
+  PrintDoc(doc);
+
+  underlines_.insert(underlines_.end(), current_underline_candidates_.begin(),
+                     current_underline_candidates_.end());
+}
 
 String DocPrinter::GetString() const {
   std::string text = output_.str();
+
+  // Remove any trailing indentation
+  while (!text.empty() && text.back() == ' ') {
+    text.pop_back();
+  }
+
   if (!text.empty() && text.back() != '\n') {
     text.push_back('\n');
   }
-  return text;
+
+  std::vector<ByteSpan> underlines = underlines_;
+  SortAndMergeSpans(&underlines);
+  return DecorateText(text, line_starts_, options_, underlines);
 }
 
 void DocPrinter::PrintDoc(const Doc& doc) {
+  size_t start_pos = output_.tellp();
+
   if (const auto* doc_node = doc.as<LiteralDocNode>()) {
     PrintTypedDoc(GetRef<LiteralDoc>(doc_node));
   } else if (const auto* doc_node = doc.as<IdDocNode>()) {
@@ -84,6 +321,24 @@ void DocPrinter::PrintDoc(const Doc& doc) {
     LOG(FATAL) << "Do not know how to print " << doc->GetTypeKey();
     throw;
   }
+
+  size_t end_pos = output_.tellp();
+  for (const ObjectPath& path : doc->source_paths) {
+    MarkSpan({start_pos, end_pos}, path);
+  }
+}
+
+void DocPrinter::MarkSpan(const ByteSpan& span, const ObjectPath& path) {
+  if (path_to_underline_.defined()) {
+    if (path->Length() >= current_max_path_length_ &&
+        path->IsPrefixOf(path_to_underline_.value())) {
+      if (path->Length() > current_max_path_length_) {
+        current_max_path_length_ = path->Length();
+        current_underline_candidates_.clear();
+      }
+      current_underline_candidates_.push_back(span);
+    }
+  }
 }
 
 }  // namespace printer