Skip to content

Commit

Permalink
Use unsafe_read and conversions when size has already been verified, …
Browse files Browse the repository at this point in the history
…and simplify

read and write of varints. Using flambda, this has proven to be the fastest implementation.
  • Loading branch information
andersfugmann committed Jan 23, 2024
1 parent 94fe325 commit cd50f0d
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 118 deletions.
13 changes: 13 additions & 0 deletions src/ocaml_protoc_plugin/infix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,16 @@ module Int64 = struct
let ( * ) = mul
let (-) = sub
end

module Int = struct
open Int
let (land) = logand
let (lsl) = shift_left
let (lsr) = shift_right_logical
let (lor) = logor
let (lxor) = logxor
let (+) = add
let (/) = div
let ( * ) = mul
let (-) = sub
end
65 changes: 16 additions & 49 deletions src/ocaml_protoc_plugin/reader.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,72 +20,39 @@ let create ?(offset = 0) ?length data =
let reset t offset = t.offset <- offset
let offset { offset; _ } = offset

[@@inline]
let validate_capacity t count =
match t.offset + count <= t.end_offset with
| true -> ()
| false ->
Result.raise `Premature_end_of_input

[@@inline]
let has_more t = t.offset < t.end_offset

let has_more t = t.offset < t.end_offset
[@@inline]

let read_byte t =
validate_capacity t 1;
let v = Bytes.get_uint8 (Bytes.unsafe_of_string t.data) t.offset in
t.offset <- t.offset + 1;
v
match t.offset < t.end_offset with
| true ->
let v = String.unsafe_get t.data t.offset |> Char.code in
t.offset <- t.offset + 1;
v
| false -> Result.raise `Premature_end_of_input
[@@inline]

let read_varint_reference t =
let read_varint t =
let open Infix.Int64 in
let rec inner n acc =
let rec inner acc bit =
let v = read_byte t |> Int64.of_int in
let acc = acc + (v land 0x7fL) lsl n in
let acc = acc lor ((v land 0x7fL) lsl bit) in
match v land 0x80L = 0x80L with
| true ->
(* Still More data *)
inner (Int.add n 7) acc
inner acc (Int.add bit 7)
| false -> acc
in
inner 0 0L
inner 0L 0[@@unrolled 10]

[@@inline]
let read_varint t =
let rec inner n acc =
let v = read_byte t in
let acc = acc + (v land 0x7f) lsl n in
match v land 0x80 = 0x80 with
| true when acc < 0 -> begin
let accl = Int64.of_int acc in (* If bit63 was set, then bit63 and bit64 are now set *)
let accl = match read_byte t land 0x01 = 0x01 with
| true -> accl
| false -> Int64.logand accl 0x7fffffffffffffffL (* Apparently not a negative number after all *)
in
accl
end
| true -> inner (n + 7) acc
| false when acc < 0 -> (* Bit 63 is set, convert into a 64 bit integer, but clear bit64 *)
Int64.logand 0x7fffffffffffffffL (Int64.of_int acc)
| false -> Int64.of_int acc

in
inner 0 0

[@@inline]
let read_varint_unboxed t =
let rec inner n acc =
let v = read_byte t in
let acc = acc + (v land 0x7f) lsl n in
match v land 0x80 = 0x80 with
| true ->
(* Still More data *)
inner (n + 7) acc
| false -> acc
in
inner 0 0
let read_varint_unboxed t = read_varint t |> Int64.to_int

(* Implement little endian ourselves *)
let read_fixed32 t =
let size = 4 in
validate_capacity t size;
Expand Down Expand Up @@ -148,7 +115,7 @@ let%expect_test "varint boxed" =
Writer.contents writer
in
Printf.printf "0x%016LxL = 0x%016LxL\n"
(read_varint_reference (create buffer))
v
(read_varint (create buffer));
()
) values;
Expand Down
5 changes: 3 additions & 2 deletions src/ocaml_protoc_plugin/reader.mli
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ val read_field_header: t -> Field.field_type * int
val read_field_content : Field.field_type -> t -> Field.t
val has_more : t -> bool
val to_list : t -> (int * Field.t) list
val read_varint : t -> int64
val read_varint_unboxed : t -> int
val read_length_delimited : t -> Field.length_delimited
val read_fixed32 : t -> int32
val read_fixed64 : t -> int64

val read_varint : t -> int64
val read_varint_unboxed : t -> int
(**/**)
79 changes: 14 additions & 65 deletions src/ocaml_protoc_plugin/writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,56 +26,21 @@ let unused_space t =
in
inner t.data

let write_varint_reference buffer ~offset v =
let write_varint buffer ~offset v =
let rec inner ~offset v =
let next_offset = offset + 1 in
let open Infix.Int64 in
match v lsr 7 with
| 0L ->
Bytes.set_uint8 buffer offset (Int64.to_int v);
Bytes.unsafe_set buffer offset (Int64.to_int v |> Char.unsafe_chr);
next_offset
| rem ->
Bytes.set_uint8 buffer offset (Int.logor (Int64.to_int v) 0b1000_0000);
Bytes.unsafe_set buffer offset ((v land 0x7fL) lor 0b1000_0000L |> Int64.to_int |> Char.unsafe_chr);
inner ~offset:next_offset rem
in
inner ~offset v
inner ~offset v[@@unrolled 10]

(** Reference implementation. Uses a loop which is slower than the manually unrolled version *)
let write_varint_unboxed buffer ~offset v =
let rec inner ~is_negative ~offset v =
match v lsr 7 with
| 0 when is_negative ->
(* If the value was signed, set bit 64 also *)
inner ~is_negative:false ~offset (v lor 0b1000_0000)
| 0 ->
Bytes.set_uint8 buffer offset v;
offset + 1
| rem ->
Bytes.set_uint8 buffer offset (v lor 0b1000_0000);
inner ~is_negative ~offset:(offset + 1) rem
in
inner ~is_negative:(v < 0) ~offset v

let write_varint buffer ~offset v =
match Int64.shift_right_logical v 62 with
| 0b01L ->
(* Bit 63 set (and not bit 64).
Write as signed int, drop the last byte and clear the msb
*)
let v = Int64.to_int v in
let offset = write_varint_unboxed buffer ~offset v in
let byte = Bytes.get_uint8 buffer (offset - 2) land 0b0111_1111 in
Bytes.set_uint8 buffer (offset - 2) byte;
Bytes.set_uint8 buffer (offset - 1) 0;
offset - 1
| 0b10L ->
(* Only bit 64 is set. Set bit 63, and then clear it again in the output *)
let v = Int64.to_int v lor 0x4000_0000_0000_0000 in
let offset = write_varint_unboxed buffer ~offset v in
let byte = Bytes.get_uint8 buffer (offset - 2) land 0b1011_1111 in
Bytes.set_uint8 buffer (offset - 2) byte;
offset
| _ -> write_varint_unboxed buffer ~offset (Int64.to_int v)
let write_varint_unboxed buffer ~offset v = write_varint buffer ~offset (Int64.of_int v)

(* Write a field delimited length.
A delimited field length can be no larger than 2^31.
Expand Down Expand Up @@ -266,7 +231,7 @@ let%expect_test "fixed_size" =
Fixed field: 0x3fffffff: ff ff ff ff 03 ff ff ff ff ff |}]


let%test "varint unrolled" =
let%test "varint" =
let open Infix.Int64 in
let string_of_bytes b =
Bytes.to_seq b |> Seq.map Char.code |> Seq.map (Printf.sprintf "%02x") |> List.of_seq |> String.concat ~sep:" "
Expand All @@ -276,30 +241,14 @@ let%test "varint unrolled" =
in
List.fold_left ~init:true ~f:(fun acc v ->
List.fold_left ~init:acc ~f:(fun acc v ->

let acc =
let b1 = Bytes.make 10 '\000' in
let b2 = Bytes.make 10 '\000' in
write_varint_unboxed b1 ~offset:0 (Int64.to_int v) |> ignore;
write_varint b2 ~offset:0 (v) |> ignore;
match Bytes.equal b1 b2 || Int64.shift_right_logical v 63 != 0L with
| true -> acc
| false ->
Printf.printf "Unboxed: %16Lx (%20d): %S = %S\n" v (Int64.to_int v) (string_of_bytes b1) (string_of_bytes b2);
false
in
let acc =
let b1 = Bytes.make 10 '\000' in
let b2 = Bytes.make 10 '\000' in
write_varint_reference b1 ~offset:0 v |> ignore;
write_varint b2 ~offset:0 v |> ignore;
match Bytes.equal b1 b2 with
| true -> acc
| false ->
Printf.printf "Boxed: %16Lx: %S = %S\n" v (string_of_bytes b1) (string_of_bytes b2);
let b1 = Bytes.make 10 '\000' in
let b2 = Bytes.make 10 '\000' in
write_varint_unboxed b1 ~offset:0 (Int64.to_int v) |> ignore;
write_varint b2 ~offset:0 (v) |> ignore;
match Bytes.equal b1 b2 || Int64.shift_right_logical v 63 != 0L with
| true -> acc
| false ->
Printf.printf "Unboxed: %16Lx (%20d): %S = %S\n" v (Int64.to_int v) (string_of_bytes b1) (string_of_bytes b2);
false
in
acc

) [v-2L; v-1L; v; v+1L; v+2L]
) values
5 changes: 3 additions & 2 deletions src/ocaml_protoc_plugin/writer.mli
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ val contents : t -> string

(**/**)

(** Direct functions *)

(* Direct functions *)
val write_fixed32_value: int32 -> t -> unit
val write_fixed64_value: int64 -> t -> unit
val write_varint_unboxed_value: int -> t -> unit
Expand All @@ -36,4 +35,6 @@ val of_list: (int * Field.t) list -> t
val dump : t -> unit

val unused_space : t -> int
val write_varint: Bytes.t -> offset:int -> Int64.t -> int
val write_varint_unboxed: Bytes.t -> offset:int -> int -> int
(**/**)

0 comments on commit cd50f0d

Please sign in to comment.