Skip to content

Commit

Permalink
Merge pull request #209 from Lupus/parse-error-improvements
Browse files Browse the repository at this point in the history
Improve parse error reporting
  • Loading branch information
c-cube authored Aug 28, 2023
2 parents 88f4e00 + a9f9793 commit ed168a9
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
#- macos-latest
#- windows-latest
ocaml-compiler:
- 4.03.x
- 4.08.x
- 4.12.x
runs-on: ${{ matrix.os }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion ocaml-protoc.opam
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ build: [
["dune" "build" "@doc" "-p" name "-j" jobs] {with-doc}
]
depends: [
"ocaml" {>="4.03.0"}
"ocaml" {>="4.08.0"}
"dune" {>="2.0"}
"stdlib-shims"
"pbrt" {= version}
Expand Down
24 changes: 15 additions & 9 deletions src/compilerlib/pb_exception.ml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ type error =
| Invalid_mutable_option of string option
| Missing_one_of_name of Loc.t
| Missing_field_label of string * string
| Parsing_error of string * Loc.t
| Parsing_error of {
msg: string;
context: string;
loc: Loc.t;
}
| Invalid_ppx_extension_option of string
| Invalid_protobuf_syntax of string
| Invalid_proto3_field_label of string * string
Expand Down Expand Up @@ -133,8 +137,8 @@ let string_of_error = function
("Missing field label for field: %s in message: %s. "
^^ "[required|repeated|optional] expected")
field_name message_name
| Parsing_error (detail, loc) ->
Printf.sprintf "%s%s." (Loc.to_string loc) detail
| Parsing_error { msg; context; loc } ->
Printf.sprintf "%s%s at `%s'." (Loc.to_string loc) msg context
| Invalid_enum_specification (enum_name, loc) ->
P.sprintf
"%sMissing enum specification (<identifier> = <id>;) for enum value: %s"
Expand Down Expand Up @@ -230,14 +234,16 @@ let missing_field_label ~field_name ~message_name =
let invalid_ppx_extension_option message_name =
raise (Compilation_error (Invalid_ppx_extension_option message_name))

let ocamlyacc_parsing_error loc =
raise (Compilation_error (Parsing_error ("Parsing error", loc)))
let ocamlyacc_parsing_error loc context =
raise
(Compilation_error (Parsing_error { msg = "Parsing error"; context; loc }))

let protoc_parsing_error e loc =
raise (Compilation_error (Parsing_error (string_of_error e, loc)))
let protoc_parsing_error e loc context =
raise
(Compilation_error (Parsing_error { msg = string_of_error e; context; loc }))

let unknown_parsing_error detail loc =
raise (Compilation_error (Parsing_error (detail, loc)))
let unknown_parsing_error ~msg ~context loc =
raise (Compilation_error (Parsing_error { msg; context; loc }))

let invalid_protobuf_syntax syntax =
raise (Compilation_error (Invalid_protobuf_syntax syntax))
Expand Down
6 changes: 3 additions & 3 deletions src/compilerlib/pb_exception.mli
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ val invalid_mutable_option : ?field_name:string -> unit -> 'a
val missing_one_of_name : Pb_location.t -> 'a
val missing_field_label : field_name:string -> message_name:string -> 'a
val invalid_ppx_extension_option : string -> 'a
val ocamlyacc_parsing_error : Pb_location.t -> 'a
val protoc_parsing_error : error -> Pb_location.t -> 'a
val unknown_parsing_error : string -> Pb_location.t -> 'a
val ocamlyacc_parsing_error : Pb_location.t -> string -> 'a
val protoc_parsing_error : error -> Pb_location.t -> string -> 'a
val unknown_parsing_error : msg:string -> context:string -> Pb_location.t -> 'a
val invalid_protobuf_syntax : string -> 'a
val invalid_proto3_field_label : field_name:string -> message_name:string -> 'a

Expand Down
13 changes: 8 additions & 5 deletions src/compilerlib/pb_location.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,22 @@
type t = {
file_name: string option;
line: int;
col: int;
}

let from_lexbuf lexbuf =
let open Lexing in
let file_name =
match lexbuf.Lexing.lex_curr_p.Lexing.pos_fname with
match lexbuf.lex_curr_p.pos_fname with
| "" -> None
| x -> Some x
in

let line = lexbuf.Lexing.lex_curr_p.Lexing.pos_lnum in
let line = lexbuf.lex_curr_p.pos_lnum in
let col = lexbuf.lex_curr_p.pos_cnum - lexbuf.lex_curr_p.pos_bol in

{ file_name; line }
{ file_name; line; col }

let to_string { file_name; line } =
Printf.sprintf "%s:%i:0: " (Pb_util.Option.default "" file_name) line
let to_string { file_name; line; col } =
Printf.sprintf "%s:%i:%i: " (Pb_util.Option.default "" file_name) line col
(* standard compilation error format *)
75 changes: 72 additions & 3 deletions src/compilerlib/pb_parsing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,88 @@ module Loc = Pb_location
module Parsing_util = Pb_parsing_util
module Pt = Pb_parsing_parse_tree

let string_of_token =
let open Pb_parsing_parser in
function
| T_required -> "required"
| T_optional -> "optional"
| T_repeated -> "repeated"
| T_one_of _ -> "oneof"
| T_message -> "message"
| T_enum -> "enum"
| T_package -> "package"
| T_import _ -> "import"
| T_public -> "public"
| T_option -> "option"
| T_extensions -> "extensions"
| T_extend -> "extend"
| T_reserved -> "reserved"
| T_returns -> "returns"
| T_rpc -> "rpc"
| T_service -> "service"
| T_stream -> "stream"
| T_syntax -> "syntax"
| T_to -> "to"
| T_max -> "max"
| T_map -> "map"
| T_rbrace -> "}"
| T_lbrace -> "{"
| T_rbracket -> "]"
| T_lbracket -> "["
| T_rparen -> ")"
| T_lparen -> "("
| T_greater -> ">"
| T_less -> "<"
| T_equal -> "="
| T_semi -> ";"
| T_colon -> ":"
| T_comma -> ","
| T_string s -> Printf.sprintf "%S" s
| T_int i -> string_of_int i
| T_float f -> string_of_float f
| T_ident (_, s) -> s
| T_eof -> "<EOF>"

(* Custom lexer that buffers tokens *)
let custom_lexer_with_buffer buf_size =
let token_buffer = Queue.create () in

let next_token lexbuf =
let token = Pb_parsing_lexer.lexer lexbuf in
Queue.add token token_buffer;
if Queue.length token_buffer > buf_size then
Queue.take token_buffer |> ignore;
token
in

let error_context_tokens () =
let context =
Queue.fold (fun acc tok -> tok :: acc) [] token_buffer
|> List.rev |> List.map string_of_token |> String.concat " "
in
Printf.sprintf "%s <<< HERE" context
in

next_token, error_context_tokens

let parse_single_file (file_name, file_content) =
let lexbuf = Lexing.from_string file_content in
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- Lexing.{ pos with pos_fname = file_name };
let buffer_size = 5 (* Adjust as needed *) in
let next_token, error_context_tokens = custom_lexer_with_buffer buffer_size in
let proto =
try Pb_parsing_parser.proto_ Pb_parsing_lexer.lexer lexbuf with
try Pb_parsing_parser.proto_ next_token lexbuf with
| Parsing.Parse_error ->
Pb_exception.ocamlyacc_parsing_error (Loc.from_lexbuf lexbuf)
(error_context_tokens ())
| Pb_exception.Compilation_error e ->
Pb_exception.protoc_parsing_error e (Loc.from_lexbuf lexbuf)
(error_context_tokens ())
| exn ->
let detail = Printexc.to_string exn in
Pb_exception.unknown_parsing_error detail (Loc.from_lexbuf lexbuf)
let msg = Printexc.to_string exn in
Pb_exception.unknown_parsing_error ~msg ~context:(error_context_tokens ())
(Loc.from_lexbuf lexbuf)
in
let proto = { proto with Pt.proto_file_name = Some file_name } in
Parsing_util.finalize_proto_value proto
Expand Down
22 changes: 22 additions & 0 deletions src/tests/expectation/tests.expected
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
====================== <PROTO> ======================

// Syntax error reporting check #1
syntax = "proto3"!#$%

===================== </PROTO> ======================
======================= <AST> =======================
[!] EXN: test.proto:3:24: Failure("Unknown character found !") at `syntax = "proto3" <<< HERE'.
====================== </AST> =======================


====================== <PROTO> ======================

// Syntax error reporting check #2
syntax = "proto3" , message int32 { }

===================== </PROTO> ======================
======================= <AST> =======================
[!] EXN: test.proto:3:25: Parsing error at `syntax = "proto3" , <<< HERE'.
====================== </AST> =======================


====================== <PROTO> ======================

syntax = "proto3";
Expand Down
40 changes: 28 additions & 12 deletions src/tests/expectation/tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,25 @@ module E = Pb_exception
module Pt = Pb_parsing_parse_tree

let run proto =
let protos =
Pb_parsing.parse_file
(fun f ->
match f with
| "test.proto" -> f, proto
| _ -> f, "")
"test.proto"
let maybe_protos =
try
Pb_parsing.parse_file
(fun f ->
match f with
| "test.proto" -> f, proto
| _ -> f, "")
"test.proto"
|> Result.ok
with e -> Error e
in
let pp_maybe_protos ppf = function
| Ok protos ->
Format.fprintf ppf "%a"
(Format.pp_print_list
~pp_sep:(fun ppf () -> Format.fprintf ppf "@.")
Pt.pp_proto)
protos
| Error exn -> Format.fprintf ppf "[!] EXN: %s" (Printexc.to_string exn)
in
let ppf = Format.std_formatter in
Format.set_margin 150;
Expand All @@ -18,14 +30,18 @@ let run proto =
======================@.======================= <AST> \
=======================@.%a@.====================== </AST> \
=======================@.@.@."
Format.pp_print_string proto
(Format.pp_print_list
~pp_sep:(fun ppf () -> Format.fprintf ppf "@.")
Pt.pp_proto)
protos
Format.pp_print_string proto pp_maybe_protos maybe_protos

let test_cases =
[
{|
// Syntax error reporting check #1
syntax = "proto3"!#$%
|};
{|
// Syntax error reporting check #2
syntax = "proto3" , message int32 { }
|};
{|
syntax = "proto3";

Expand Down

0 comments on commit ed168a9

Please sign in to comment.