From ccd3deaae7a223c36479151dd358e0a90a9b9111 Mon Sep 17 00:00:00 2001 From: hhugo Date: Mon, 16 Oct 2023 15:54:03 +0200 Subject: [PATCH] New functions to retrieve position (#155) Co-authored-by: Hugo Heuzard --- CHANGES.md | 3 ++ src/lib/sedlexing.ml | 66 +++++++++++++++++++++++-------------------- src/lib/sedlexing.mli | 18 +++++++++++- 3 files changed, 56 insertions(+), 31 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3f90d2e9..1f6f71e7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,6 @@ +# dev +- Add API for retrieving start and stop positions separately (#155) + # 3.2 (2023-06-28): - Restore compatibility with OCaml 4.08 - Use `Sedlexing.{Utf8,Utf16}.from_gen` to initialize UTF8 (resp. UTF16) lexing buffers from diff --git a/src/lib/sedlexing.ml b/src/lib/sedlexing.ml index 5f459291..b2268543 100644 --- a/src/lib/sedlexing.ml +++ b/src/lib/sedlexing.ml @@ -261,40 +261,46 @@ let lexeme lexbuf = let lexeme_char lexbuf pos = lexbuf.buf.(lexbuf.start_pos + pos) +let lexing_position_start lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.start_line; + pos_cnum = lexbuf.start_pos + lexbuf.offset; + pos_bol = lexbuf.start_bol; + } + +let lexing_position_curr lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.curr_line; + pos_cnum = lexbuf.pos + lexbuf.offset; + pos_bol = lexbuf.curr_bol; + } + let lexing_positions lexbuf = - let start_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.start_line; - pos_cnum = lexbuf.start_pos + lexbuf.offset; - pos_bol = lexbuf.start_bol; - } - and curr_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.curr_line; - pos_cnum = lexbuf.pos + lexbuf.offset; - pos_bol = lexbuf.curr_bol; - } - in + let start_p = lexing_position_start lexbuf + and curr_p = lexing_position_curr lexbuf in (start_p, curr_p) +let lexing_bytes_position_start lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.start_line; + pos_cnum = lexbuf.start_bytes_pos + lexbuf.bytes_offset; + pos_bol = lexbuf.start_bytes_bol; + } + +let lexing_bytes_position_curr lexbuf = + { + Lexing.pos_fname = lexbuf.filename; + pos_lnum = lexbuf.curr_line; + pos_cnum = lexbuf.bytes_pos + lexbuf.bytes_offset; + pos_bol = lexbuf.curr_bytes_bol; + } + let lexing_bytes_positions lexbuf = - let start_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.start_line; - pos_cnum = lexbuf.start_bytes_pos + lexbuf.bytes_offset; - pos_bol = lexbuf.start_bytes_bol; - } - and curr_p = - { - Lexing.pos_fname = lexbuf.filename; - pos_lnum = lexbuf.curr_line; - pos_cnum = lexbuf.bytes_pos + lexbuf.bytes_offset; - pos_bol = lexbuf.curr_bytes_bol; - } - in + let start_p = lexing_bytes_position_start lexbuf + and curr_p = lexing_bytes_position_curr lexbuf in (start_p, curr_p) let with_tokenizer lexer' lexbuf = diff --git a/src/lib/sedlexing.mli b/src/lib/sedlexing.mli index b129ff17..2332f7cd 100644 --- a/src/lib/sedlexing.mli +++ b/src/lib/sedlexing.mli @@ -133,12 +133,28 @@ val lexeme_bytes_length : lexbuf -> int by parsers like those generated by [Menhir]. *) val lexing_positions : lexbuf -> Lexing.position * Lexing.position +(** [Sedlexing.lexing_position_start lexbuf] returns the start + position, in code points, of the current token. *) +val lexing_position_start : lexbuf -> Lexing.position + +(** [Sedlexing.lexing_position_curr lexbuf] returns the end + position, in code points, of the current token. *) +val lexing_position_curr : lexbuf -> Lexing.position + (** [Sedlexing.lexing_bytes_positions lexbuf] returns the start and end positions, in bytes, of the current token, using a record of type [Lexing.position]. This is intended for consumption by parsers like those generated by [Menhir]. *) val lexing_bytes_positions : lexbuf -> Lexing.position * Lexing.position +(** [Sedlexing.lexing_bytes_position_start lexbuf] returns the start + position, in bytes, of the current token. *) +val lexing_bytes_position_start : lexbuf -> Lexing.position + +(** [Sedlexing.lexing_bytes_position_curr lexbuf] returns the end + position, in bytes, of the current token. *) +val lexing_bytes_position_curr : lexbuf -> Lexing.position + (** [Sedlexing.new_line lexbuf] increments the line count and sets the beginning of line to the current position, as though a newline character had been encountered in the input. *) @@ -189,7 +205,7 @@ val next : lexbuf -> Uchar.t option lexer buffer and increments to current position. If the input stream is exhausted, the function returns -1. If a ['\n'] is encountered, the tracked line number is incremented. - + This is a private API, it should not be used by code using this module's API and can be removed at any time. *) val __private__next_int : lexbuf -> int