Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embed the Pygments lexers in Catala #428

Merged
merged 3 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,13 @@ Next, install all the packages that Catala depends on with

This should ensure everything is set up for developing on the Catala compiler!
The Python dependencies are installed inside a local virtual environment
(`venv`), so for things like syntax coloration in Catala's literate output to
work, you will have to run
(`venv`). The Makefile rules will use it automatically when building the syntax
cheat-sheet, for example, but if you need to otherwise colorise Catala code, or
use generated Python code, you should run the following command once in every
new shell session:

. _python_venv/bin/activate

from the catala directory to enable it. This needs to be done in every new shell
session, unless you use the predefined `make` rules which already account for
that.

**Warning**: the `make dependencies` command does not include the `z3`
dependency required to enable the proof platform feature of Catala. If you wish
to enable support for the `Proof` command of the Catala compiler, you should
Expand Down
1 change: 1 addition & 0 deletions catala.opam
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ depends: [
"visitors" {>= "20200210"}
"zarith" {>= "1.12"}
"zarith_stubs_js" {>= "v0.14.1"}
"crunch" {>= "3.0.0"}
"alcotest" {with-test & >= "1.5.0"}
"odoc" {with-doc}
"ocamlformat" {cataladevmode & = "0.21.0"}
Expand Down
7 changes: 7 additions & 0 deletions compiler/catala_utils/cli.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ type backend_option_builtin =

type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]

(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", En; "fr", Fr; "pl", Pl]

let language_code =
let rl = List.map (fun (a, b) -> b, a) languages in
fun l -> List.assoc l rl

let backend_option_to_string = function
| `Interpret -> "Interpret"
| `Makefile -> "Makefile"
Expand Down
5 changes: 5 additions & 0 deletions compiler/catala_utils/cli.mli
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ type backend_option_builtin =

type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]

val languages : (string * backend_lang) list

val language_code : backend_lang -> string
(** Returns the lowercase two-letter language code *)

val backend_option_to_string : string backend_option -> string
(** [backend_option_to_string backend] returns the string representation of the
given [backend].*)
Expand Down
48 changes: 48 additions & 0 deletions compiler/catala_utils/file.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ let finally f k =
f ();
r

let temp_file pfx sfx =
let f = Filename.temp_file pfx sfx in
if not !Cli.debug_flag then
at_exit (fun () -> try Sys.remove f with _ -> ());
f

let with_out_channel filename f =
let oc = open_out filename in
finally (fun () -> close_out oc) (fun () -> f oc)
Expand Down Expand Up @@ -60,3 +66,45 @@ let get_out_channel ~source_file ~output_file ?ext () =
let get_formatter_of_out_channel ~source_file ~output_file ?ext () =
let f, with_ = get_out_channel ~source_file ~output_file ?ext () in
f, fun fmt -> with_ (fun oc -> with_formatter_of_out_channel oc fmt)

let with_temp_file pfx sfx ?contents f =
let filename = temp_file pfx sfx in
finally (fun () -> Sys.remove filename)
@@ fun () ->
Option.iter
(fun contents ->
with_out_channel filename (fun oc -> output_string oc contents))
contents;
f filename

let contents filename =
with_in_channel filename (fun ic ->
really_input_string ic (in_channel_length ic))

let process_out ?check_exit cmd args =
let check_exit =
let default n =
if n <> 0 then
Printf.ksprintf failwith "Sub-process %s returned with status %d" cmd n
in
Option.value check_exit ~default
in
let aargs = Array.of_list (cmd :: args) in
let ic =
try Unix.open_process_args_in cmd aargs
with Unix.Unix_error (Unix.ENOENT, _, _) ->
Printf.ksprintf failwith "ERROR: program %s not found" cmd
in
let buf = Buffer.create 4096 in
finally (fun () ->
match Unix.close_process_in ic with
| Unix.WEXITED n -> check_exit n
| Unix.WSIGNALED n | Unix.WSTOPPED n ->
Printf.ksprintf failwith "Sub-process %s was killed (%d)" cmd n)
@@ fun () ->
try
while true do
Buffer.add_channel buf ic 4096
done;
assert false
with End_of_file -> Buffer.contents buf
19 changes: 19 additions & 0 deletions compiler/catala_utils/file.mli
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,22 @@ val get_formatter_of_out_channel :
(** [get_output_format ~source_file ~output_file ?ext ()] returns the infered
filename and its corresponding [with_formatter_of_out_channel] function. If
the [output_file] is equal to [Some "-"] returns a wrapper around [stdout]. *)

val temp_file : string -> string -> string
(** Like [Filename.temp_file], but registers the file for deletion at program
exit unless Cli.debug_flag is set. *)

val with_temp_file :
string -> string -> ?contents:string -> (string -> 'a) -> 'a
(** Creates a temp file (with prefix and suffix like [temp_file], optionally
with the given contents, for the lifetime of the supplied function, then
remove it unconditionally *)

val contents : string -> string
(** Reads the contents of a file as a string *)

val process_out : ?check_exit:(int -> unit) -> string -> string list -> string
(** [process_out cmd args] executes the given command with the specified
arguments, and returns the stdout of the process as a string. [check_exit]
is called on the return code of the sub-process, the default is to fail on
anything but 0. *)
5 changes: 1 addition & 4 deletions compiler/driver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

open Catala_utils

(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", Cli.En; "fr", Cli.Fr; "pl", Cli.Pl]

(** Associates a file extension with its corresponding {!type: Cli.backend_lang}
string representation. *)
let extensions = [".catala_fr", "fr"; ".catala_en", "en"; ".catala_pl", "pl"]
Expand Down Expand Up @@ -59,7 +56,7 @@ let driver source_file (options : Cli.options) : int =
try List.assoc ext extensions with Not_found -> ext)
in
let language =
try List.assoc l languages
try List.assoc l Cli.languages
with Not_found ->
Errors.raise_error
"The selected language (%s) is not supported by Catala" l
Expand Down
15 changes: 15 additions & 0 deletions compiler/literate/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
(public_name catala.literate)
(libraries re catala_utils surface ubase uutf))

(rule
(target pygment_lexers.ml)
(action
(progn
(copy
../../syntax_highlighting/en/pygments/catala_en_lexer/lexer.py
lexer_en.py)
(copy
../../syntax_highlighting/fr/pygments/catala_fr_lexer/lexer.py
lexer_fr.py)
(copy
../../syntax_highlighting/pl/pygments/catala_pl_lexer/lexer.py
lexer_pl.py)
(run ocaml-crunch -e py -m plain -o %{target} .))))

(documentation
(package catala)
(mld_files literate))
79 changes: 23 additions & 56 deletions compiler/literate/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ module C = Cli
(** Converts double lines into HTML newlines. *)
let pre_html (s : string) = String.trim (run_pandoc s `Html)

(** Raise an error if pygments cannot be found *)
let raise_failed_pygments (command : string) (error_code : int) : 'a =
Errors.raise_error
"Weaving to HTML failed: pygmentize command \"%s\" returned with error \
code %d"
command error_code

(** Partial application allowing to remove first code lines of
[<td class="code">] and [<td class="linenos">] generated HTML. Basically,
remove all code block first lines. *)
Expand All @@ -57,28 +50,19 @@ let wrap_html
(language : Cli.backend_lang)
(fmt : Format.formatter)
(wrapped : Format.formatter -> unit) : unit =
let pygments = "pygmentize" in
let css_file = Filename.temp_file "catala_css_pygments" "" in
let pygments_args =
[| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |]
in
let cmd =
Format.sprintf "%s %s > %s" pygments
(String.concat " " (Array.to_list pygments_args))
css_file
let css_as_string =
call_pygmentize ["-f"; "html"; "-S"; "default"; "-a"; ".catala-code"]
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in css_file in
let css_as_string = really_input_string oc (in_channel_length oc) in
close_in oc;
Format.fprintf fmt
"<head>\n\
"<!DOCTYPE html>\n\
<html>\n\
<head>\n\
<style>\n\
%s\n\
</style>\n\
<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>\n\
</head>\n\
<body>\n\
denismerigoux marked this conversation as resolved.
Show resolved Hide resolved
<h1>%s<br />\n\
<small>%s Catala version %s</small>\n\
</h1>\n\
Expand Down Expand Up @@ -110,46 +94,29 @@ let wrap_html
(literal_last_modification language)
ftime)
source_files));
wrapped fmt
wrapped fmt;
Format.fprintf fmt "</body>\n</html>\n"

(** Performs syntax highlighting on a piece of code by using Pygments and the
special Catala lexer. *)
let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
=
let pygmentize_code (c : string Marked.pos) (lang : C.backend_lang) : string =
C.debug_print "Pygmenting the code chunk %s"
(Pos.to_string (Marked.get_mark c));
let temp_file_in = Filename.temp_file "catala_html_pygments" "in" in
let temp_file_out = Filename.temp_file "catala_html_pygments" "out" in
let oc = open_out temp_file_in in
Printf.fprintf oc "%s" (Marked.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_lexer = get_language_extension language in
let pygments_args =
[|
"-l";
pygments_lexer;
"-f";
"html";
"-O";
"style=colorful,anchorlinenos=True,lineanchors=\""
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ "\",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
"-o";
temp_file_out;
temp_file_in;
|]
in
let cmd =
Format.asprintf "%s %s" pygments
(String.concat " " (Array.to_list pygments_args))
let output =
File.with_temp_file "catala_html_pygments" "in" ~contents:(Marked.unmark c)
@@ fun temp_file_in ->
call_pygmentize ~lang
[
"-f";
"html";
"-O";
"anchorlinenos=True,lineanchors="
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ ",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
temp_file_in;
]
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in temp_file_out in
let output = really_input_string oc (in_channel_length oc) in
close_in oc;
(* Remove code blocks delimiters needed by [Pygments]. *)
let trimmed_output =
output |> remove_cb_first_lines |> remove_cb_last_lines
Expand Down
50 changes: 30 additions & 20 deletions compiler/literate/latex.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ let wrap_latex
\usepackage{fontspec}
\usepackage[hidelinks]{hyperref}
%s
\usepackage{minted}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage{longtable}
\usepackage{booktabs,tabularx}
\usepackage{newunicodechar}
Expand Down Expand Up @@ -122,8 +123,10 @@ let wrap_latex
\newunicodechar{→}{$\rightarrow$}
\newunicodechar{≠}{$\neq$}

\newcommand*\FancyVerbStartString{\PYG{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PYG{l+s}{```}}
%s

\newcommand*\FancyVerbStartString{\PY{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PY{l+s}{```}}

\fvset{
numbers=left,
Expand Down Expand Up @@ -157,6 +160,7 @@ codes={\catcode`\$=3\catcode`\^=7}
(match language with Fr -> "\\setmainfont{Marianne}" | _ -> "")
(* for France, we use the official font of the French state design system
https://gouvfr.atlassian.net/wiki/spaces/DB/pages/223019527/Typographie+-+Typography *)
(call_pygmentize ["-f"; "latex"; "-S"; "default"])
(literal_title language)
(literal_generated_by language)
Cli.version
Expand Down Expand Up @@ -185,6 +189,24 @@ codes={\catcode`\$=3\catcode`\^=7}

(** {1 Weaving} *)

let code_block ~meta lang fmt (code, pos) =
(* Pygments does'nt allow to specify multiple 'verboptions' (escaping bug ?)
so we call it with "nowrap" and write the FancyVrb wrapper ourselves. *)
let pygmentized_code =
let contents = String.concat "" ["```catala\n"; code; "```"] in
File.with_temp_file "catala_latex_pygments" "in" ~contents
@@ fun temp_file_in ->
call_pygmentize ~lang ["-f"; "latex"; "-O"; "nowrap=true"; temp_file_in]
in
Format.fprintf fmt
{latex|\begin{Verbatim}[commandchars=\\\{\},numbers=left,firstnumber=%d,stepnumber=1,label={\hspace*{\fill}\texttt{%s}}%s]|latex}
(Pos.get_start_line pos + 1)
(pre_latexify (Filename.basename (Pos.get_file pos)))
(if meta then ",numbersep=9mm" else "");
Format.pp_print_newline fmt ();
Format.pp_print_string fmt pygmentized_code;
Format.pp_print_string fmt "\\end{Verbatim}\n"

let rec law_structure_to_latex
(language : C.backend_lang)
(print_only_law : bool)
Expand Down Expand Up @@ -228,15 +250,7 @@ let rec law_structure_to_latex
let block_content = Marked.unmark c in
check_exceeding_lines start_line filename block_content;
update_lines_of_code c;
Format.fprintf fmt
"\\begin{minted}[label={\\hspace*{\\fill}\\texttt{%s}},firstnumber=%d]{%s}\n\
```catala\n\
%s```\n\
\\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file (Marked.get_mark c))))
(Pos.get_start_line (Marked.get_mark c) + 1)
(get_language_extension language)
(Marked.unmark c)
code_block ~meta:false language fmt c
| A.CodeBlock (_, c, true) when not print_only_law ->
let metadata_title =
match language with
Expand All @@ -253,15 +267,11 @@ let rec law_structure_to_latex
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
title=\\textcolor{black}{\\texttt{%s}},title after \
break=\\textcolor{black}{\\texttt{%s}},before skip=1em, after skip=1em]\n\
\\begin{minted}[numbersep=9mm, firstnumber=%d, \
label={\\hspace*{\\fill}\\texttt{%s}}]{%s}\n\
```catala\n\
%s```\n\
\\end{minted}\n\
%a\n\
\\end{tcolorbox}"
metadata_title metadata_title start_line (pre_latexify filename)
(get_language_extension language)
block_content
metadata_title metadata_title
(code_block ~meta:true language)
c
| A.CodeBlock _ -> ()

(** {1 API} *)
Expand Down
Loading