From 12c3378ab9a46fae416a7bb5549e3f123a11d22a Mon Sep 17 00:00:00 2001 From: Liang-Bo Wang Date: Thu, 19 May 2022 02:08:55 +0800 Subject: [PATCH] Pass language to Pygments formatter in CodeHilite * Add an extra option `lang_str` to pass the language of the code block to the specified Pygments formatter. * Include an example custom Pygments formatter in the documentation that includes the language of the code in the output using the new option. Resolves #1255. --- docs/change_log/release-3.4.md | 8 +- docs/extensions/code_hilite.md | 52 ++++++++++- markdown/extensions/codehilite.py | 16 +++- .../extensions/test_code_hilite.py | 86 +++++++++++++++++++ .../extensions/test_fenced_code.py | 46 ++++++++++ 5 files changed, 199 insertions(+), 9 deletions(-) diff --git a/docs/change_log/release-3.4.md b/docs/change_log/release-3.4.md index 6dc840182..7a30de538 100644 --- a/docs/change_log/release-3.4.md +++ b/docs/change_log/release-3.4.md @@ -10,7 +10,7 @@ PyPy3. ### The `table` extension now uses a `style` attribute instead of `align` attribute for alignment. The [HTML4 spec][spec4] specifically -deprecates the use of the `align` attribute and it does not appear at all in the +deprecates the use of the `align` attribute and it does not appear at all in the [HTML5 spec][spec5]. Therefore, by default, the [table] extension will now use the `style` attribute (setting just the `text-align` property) in `td` and `th` blocks. @@ -55,8 +55,10 @@ The following new features have been included in the 3.4 release: parameter which can be used to set the CSS class(es) on the `
` that contains the Table of Contents (#1224). -* The Codehilite extension now supports a `pygments_formatter` option that can be set to - use a custom formatter class with Pygments. +* The CodeHilite extension now supports a `pygments_formatter` option that can be set to + use a custom formatter class with Pygments (#1187). Additionally, the specified + Pygments formatter is passed an extra option `lang_str` to denote the language of + the code block (#1258). - If set to a string like `'html'`, we get the default formatter by that name. - If set to a class (or any callable), it is called with all the options to get a formatter instance. diff --git a/docs/extensions/code_hilite.md b/docs/extensions/code_hilite.md index 6fa6190c7..5179d0a6f 100644 --- a/docs/extensions/code_hilite.md +++ b/docs/extensions/code_hilite.md @@ -231,9 +231,6 @@ The following options are provided to configure the output: * **`lang_prefix`**{ #lang_prefix }: The prefix prepended to the language class assigned to the HTML `` tag. Default: `language-`. - This option only applies when `use_pygments` is `False` as Pygments does not provide an option to include a - language prefix. - * **`pygments_formatter`**{ #pygments_formatter }: This option can be used to change the Pygments formatter used for highlighting the code blocks. By default, this is set to the string `'html'`, which means it'll use the default `HtmlFormatter` provided by Pygments. @@ -241,6 +238,11 @@ The following options are provided to configure the output: This can be set to a string representing any of the other default formatters, or set to a formatter class (or any callable). + The code's language is always passed to the formatter as an extra option `lang_str`, with the value formatted as + `{lang_prefix}{lang}`. If the language is unspecified, the language guessed by Pygments will be used. While + this option has no effect to the Pygments's builtin formatters, a user can make use of the language in their custom + formatter. See an example below. + To see what formatters are available and how to subclass an existing formatter, please visit [Pygments documentation on this topic][pygments formatters]. @@ -256,6 +258,50 @@ A trivial example: markdown.markdown(some_text, extensions=['codehilite']) ``` +To keep the code block's language in the Pygments generated HTML output, one can provide a custom Pygments formatter +that takes the `lang_str` option. For example, + +```python +from pygments.formatters import HtmlFormatter +from markdown.extensions.codehilite import CodeHiliteExtension + + +class CustomHtmlFormatter(HtmlFormatter): + def __init__(self, lang_str='', **options): + super().__init__(**options) + # lang_str has the value {lang_prefix}{lang} + # specified by the CodeHilite's options + self.lang_str = lang_str + + def _wrap_code(self, source): + yield 0, f'' + yield from source + yield 0, '' + + +some_text = '''\ + :::python + print('hellow world') +''' + +markdown.markdown( + some_text, + extensions=[CodeHiliteExtension(pygments_formatter=CustomHtmlFormatter)], +) +``` + +The formatter above will output the following HTML structure for the code block: + +```html +
+
+        
+        ...
+        
+    
+
+``` + [html formatter]: https://pygments.org/docs/formatters/#HtmlFormatter [lexer]: https://pygments.org/docs/lexers/ [spec]: https://www.w3.org/TR/html5/text-level-semantics.html#the-code-element diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index b92ebdce7..4721c5994 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -64,12 +64,14 @@ class CodeHilite: * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is instead wrapped for highlighting by a JavaScript library. Default: `True`. + * pygments_formatter: The name of a Pygments formatter or a formatter class used for + highlighting the code blocks. Default: `html`. + * linenums: An alias to Pygments `linenos` formatter option. Default: `None`. * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. - * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`. - Default: "language-". + * lang_prefix: Prefix prepended to the language. Default: "language-". Other Options: Any other options are accepted and passed on to the lexer and formatter. Therefore, @@ -81,6 +83,10 @@ class CodeHilite: Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter Lexer Options: https://pygments.org/docs/lexers/ + Additionally, when Pygments is enabled, the code's language is passed to the + formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`. + This option has no effect to the Pygments's builtin formatters. + Advanced Usage: code = CodeHilite( src = some_code, @@ -141,13 +147,17 @@ def hilite(self, shebang=True): lexer = get_lexer_by_name('text', **self.options) except ValueError: # pragma: no cover lexer = get_lexer_by_name('text', **self.options) + if not self.lang: + # Use the guessed lexer's langauge instead + self.lang = lexer.aliases[0] + lang_str = f'{self.lang_prefix}{self.lang}' if isinstance(self.pygments_formatter, str): try: formatter = get_formatter_by_name(self.pygments_formatter, **self.options) except ClassNotFound: formatter = get_formatter_by_name('html', **self.options) else: - formatter = self.pygments_formatter(**self.options) + formatter = self.pygments_formatter(lang_str=lang_str, **self.options) return highlight(self.src, lexer, formatter) else: # just escape and build markup usable by JS highlighting libs diff --git a/tests/test_syntax/extensions/test_code_hilite.py b/tests/test_syntax/extensions/test_code_hilite.py index 41502d9f7..09dd523c1 100644 --- a/tests/test_syntax/extensions/test_code_hilite.py +++ b/tests/test_syntax/extensions/test_code_hilite.py @@ -354,6 +354,22 @@ def setUp(self): if has_pygments and pygments.__version__ != required_pygments_version: self.skipTest(f'Pygments=={required_pygments_version} is required') + # Define a custom Pygments formatter (same example in the documentation) + if has_pygments: + class CustomAddLangHtmlFormatter(pygments.formatters.HtmlFormatter): + def __init__(self, lang_str='', **options): + super().__init__(**options) + self.lang_str = lang_str + + def _wrap_code(self, source): + yield 0, f'' + yield from source + yield 0, '' + else: + CustomAddLangHtmlFormatter = None + + self.custom_pygments_formatter = CustomAddLangHtmlFormatter + maxDiff = None def testBasicCodeHilite(self): @@ -676,3 +692,73 @@ def testMultipleBlocksSameStyle(self): expected, extensions=[CodeHiliteExtension(pygments_style="native", noclasses=True)] ) + + def testFormatterLangStr(self): + if has_pygments: + expected = ( + '
'
+                '# A Code Comment\n'
+                '
' + ) + else: + expected = ( + '
# A Code Comment\n'
+                '
' + ) + + self.assertMarkdownRenders( + '\t:::Python\n' + '\t# A Code Comment', + expected, + extensions=[ + CodeHiliteExtension( + guess_lang=False, + pygments_formatter=self.custom_pygments_formatter + ) + ] + ) + + def testFormatterLangStrGuessLang(self): + if has_pygments: + expected = ( + '
'
+                '<?php '
+                'print('
+                '"Hello World"'
+                '); ?>\n'
+                '
' + ) + else: + expected = ( + '
<?php print("Hello World"); ?>\n'
+                '
' + ) + # Use PHP as the the starting `', + expected, + extensions=[CodeHiliteExtension(pygments_formatter=self.custom_pygments_formatter)] + ) + + def testFormatterLangStrEmptyLang(self): + if has_pygments: + expected = ( + '
'
+                '# A Code Comment\n'
+                '
' + ) + else: + expected = ( + '
# A Code Comment\n'
+                '
' + ) + self.assertMarkdownRenders( + '\t# A Code Comment', + expected, + extensions=[ + CodeHiliteExtension( + guess_lang=False, + pygments_formatter=self.custom_pygments_formatter, + ) + ] + ) diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py index f8c3e91d3..be3c2151d 100644 --- a/tests/test_syntax/extensions/test_fenced_code.py +++ b/tests/test_syntax/extensions/test_fenced_code.py @@ -896,6 +896,52 @@ def _wrap_code(self, source): ] ) + def testPygmentsAddLangClassFormatter(self): + if has_pygments: + class CustomAddLangHtmlFormatter(pygments.formatters.HtmlFormatter): + def __init__(self, lang_str='', **options): + super().__init__(**options) + self.lang_str = lang_str + + def _wrap_code(self, source): + yield 0, f'' + yield from source + yield 0, '' + + expected = ''' +
hello world
+                hello another world
+                
+ ''' + else: + CustomAddLangHtmlFormatter = None + expected = ''' +
hello world
+                hello another world
+                
+ ''' + + self.assertMarkdownRenders( + self.dedent( + ''' + ```text + hello world + hello another world + ``` + ''' + ), + self.dedent( + expected + ), + extensions=[ + markdown.extensions.codehilite.CodeHiliteExtension( + guess_lang=False, + pygments_formatter=CustomAddLangHtmlFormatter, + ), + 'fenced_code' + ] + ) + def testSvgCustomPygmentsFormatter(self): if has_pygments: expected = '''