Skip to content

Commit

Permalink
check in ocaml gimple parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Jul 29, 2024
1 parent 3f58986 commit 62a58f8
Show file tree
Hide file tree
Showing 13 changed files with 383 additions and 0 deletions.
6 changes: 6 additions & 0 deletions ocaml/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(executables
(names gcc2cryptoline)
(libraries parser main)
(public_names gcc2cryptoline)
(package gcc2cryptoline)
)
22 changes: 22 additions & 0 deletions ocaml/dune-project
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
(lang dune 2.0)

; Before changing the dune version, make sure that the new version
; is supported by most popular operating systems, for example
; Ubuntu 22.04 LTS.

(name gcc2cryptoline)

(generate_opam_files true)
(license MIT)
(authors "fmlab-iis")
(maintainers "fmlab-iis")

(package
(name gcc2cryptoline)
(synopsis "GCC gimple to CryptoLine Translator")
(homepage https://github.com/fmlab-iis/gcc2cryptoline)
(depends
num
zarith
)
)
5 changes: 5 additions & 0 deletions ocaml/gcc2cryptoline.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
open Arg

let _ =
parse Main.Std.args Main.Std.anon Main.Std.usage

26 changes: 26 additions & 0 deletions ocaml/gcc2cryptoline.opam
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "GCC gimple to CryptoLine Translator"
maintainer: ["fmlab-iis"]
authors: ["fmlab-iis"]
license: "MIT"
homepage: "https://github.com/fmlab-iis/gcc2cryptoline"
depends: [
"dune" {>= "2.0"}
"num"
"zarith"
]
build: [
["dune" "subst"] {pinned}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
4 changes: 4 additions & 0 deletions ocaml/gimple/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(library
(name gimple)
(public_name gcc2cryptoline.gimple)
)
5 changes: 5 additions & 0 deletions ocaml/main/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(library
(name main)
(libraries parser)
(flags (:standard -w -3))
)
20 changes: 20 additions & 0 deletions ocaml/main/std.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

let args = []

let usage = "Usage: gcc2cryptoline OPTIONS FILE\n"

let anon file =
let lexbuf = Lexing.from_channel ~with_positions:true (open_in file) in
let _ = Lexing.set_filename lexbuf file in
let _ast =
try
Parser.GimpleParser.gimple Parser.GimpleLexer.token lexbuf
with
| Failure msg ->
raise (Failure ("Error at line " ^
string_of_int ((Parsing.symbol_start_pos()).pos_lnum) ^
". " ^ msg))
| Parsing.Parse_error -> Parser.Common.raise_parse_error lexbuf
in
()

16 changes: 16 additions & 0 deletions ocaml/parser/common.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

open Parsing

let get_line_start () = (symbol_start_pos()).pos_lnum
let get_line_end () = (symbol_end_pos()).pos_lnum

let raise_parse_error lexbuf =
let curr = lexbuf.Lexing.lex_curr_p in
let fn = if String.length curr.Lexing.pos_fname > 0
then (curr.Lexing.pos_fname ^ ":") else "" in
let line = curr.Lexing.pos_lnum in
let cnum = curr.Lexing.pos_cnum - curr.Lexing.pos_bol in
let tok = Lexing.lexeme lexbuf in
let msg = Printf.sprintf "Parser error at %s(%d, %d): `%s`" fn line cnum tok in
raise (Failure msg)

8 changes: 8 additions & 0 deletions ocaml/parser/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(library
(name parser)
(public_name gcc2cryptoline.parser)
(libraries zarith)
)

(ocamllex gimpleLexer)
(ocamlyacc gimpleParser)
84 changes: 84 additions & 0 deletions ocaml/parser/gimpleLexer.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
open GimpleParser
exception Eof

let keywords = Hashtbl.create 100
let _ = List.iter (fun (keyword, token) -> Hashtbl.replace keywords keyword token)
[
(********** keywords **********)
"__attribute__" , ATTRIBUTE;
"access" , ACCESS;
"void" , VOID;
"int" , INT;
"const" , CONST;
"unsigned" , UNSIGNED;
"long" , LONG;
"MEM" , MEM;
"return" , RETURN;
"local" , LOCAL;
"count" , COUNT;
]
}

let letter = ['a'-'z' 'A'-'Z' '_']
let number = ['0'-'9']
let bin = ['0' '1']
let hex = ['0'-'9' 'a'-'f' 'A'-'F']
let identity = '_'? (letter | '_' | number)* ("(D)")?
let comment_line = (";;"([^ '\n' ]+))|('#'([^ '\n' ]+))

rule line_comment = parse
("\r\n"|'\n'|'\r') { Lexing.new_line lexbuf; token lexbuf }
| _ { line_comment lexbuf }

and

token = parse
[' ' '\t'] { token lexbuf }
| ("\r\n"|'\n'|'\r') { Lexing.new_line lexbuf; token lexbuf }
(* Others *)
| ";;" { line_comment lexbuf }
| "#" { line_comment lexbuf }
(* Symbols *)
| '(' { LPAREN }
| ')' { RPAREN }
| '[' { LSQUARE }
| ']' { RSQUARE }
| '{' { LBRACK }
| '}' { RBRACK }
| '<' { LESS }
| '>' { GREATER }
| ',' { COMMA }
| ':' { COLON }
| ';' { SEMICOLON }
| '"' { DQUOTE }
(* Operators *)
| '+' { ADDOP }
| '-' { SUBOP }
| '*' { MULOP }
| "w*" { WMULOP }
| '&' { ANDOP }
| '|' { OROP }
| '^' { XOROP }
| '=' { EQOP }
| "<<" { LSHIFT }
| ">>" { RSHIFT }
| "bb" { BB }
(* Types *)
| "uint" ((number+) as w) "_t" { UINT (int_of_string w) }
| "u" ((number+) as w) { UINT (int_of_string w) }
| "int" ((number+) as w) "_t" { SINT (int_of_string w) }
| "__int" ((number+) as w) { SINT (int_of_string w) }
(* Numbers *)
| (number+) as num { NUM (Z.of_string num) }
(* Offsets *)
| ((number+) as byte) "B" { BYTE (int_of_string byte) }
(* Strings *)
| '"' ((_+) as s) '"' { STRING s }
| identity as id { try
Hashtbl.find keywords id
with Not_found ->
ID id
}
| eof { EOF }
| _ { raise (Invalid_argument ("Unexpected character: " ^ Lexing.lexeme lexbuf ^ " at line " ^ string_of_int (Common.get_line_end ()))) }
124 changes: 124 additions & 0 deletions ocaml/parser/gimpleParser.mly
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
%{

(*
* Use raise_at_line or raise_at to raise an exception if the error location
* can be determined. Raise ParseError otherwise.
*)

open Syntax

%}

%token <string> COMMENT
%token <Z.t> NUM
%token <string> ID STRING
%token <int> UINT SINT BYTE

%token LPAREN RPAREN LSQUARE RSQUARE LBRACK RBRACK COMMA SEMICOLON COLON DQUOTE
/* Operators */
%token ADDOP SUBOP MULOP WMULOP ANDOP OROP XOROP LSHIFT RSHIFT EQOP LESS GREATER
/* Types */
%token CONST VOID INT UNSIGNED LONG
/* Others */
%token ATTRIBUTE ACCESS MEM EOF RETURN LOCAL COUNT BB

%start gimple
%type <Syntax.function_t list> gimple

%%

gimple:
funcs EOF { $1 }
;

funcs:
func funcs { $1::$2 }
| { [] }
;

func:
attribute typ ID LPAREN parameters RPAREN LBRACK vars instrs RBRACK
{ { attr = $1; fty = $2; fname = $3; params = $5; vars = $8; instrs = $9 } }
;

attribute:
ATTRIBUTE LPAREN LPAREN ACCESS LPAREN access_pat RPAREN RPAREN RPAREN
{ $6 }
;

direct_typ:
VOID { Void }
| SINT { Sint $1 }
| UINT { Uint $1 }
| UNSIGNED SINT { Uint $2 }
| CONST direct_typ { Const $2 }
;

typ:
| direct_typ { $1 }
| typ MULOP { Pointer $1 }
;

access_pat:
STRING COMMA access_pat { $1::$3 }
| { [] }
;

parameters:
parameter COMMA parameters { $1::$3 }
| parameter { [$1] }
| { [] }
;

parameter:
typ ID { { pty = $1; pname = $2 } }
;

vars:
var vars { $1::$2 }
| { [] }
;

var:
vtyp ID SEMICOLON { { vty = $1; vname = $2 } }
;

vtyp:
| INT { Int }
| LONG LONG UNSIGNED INT { Ullong }
| UNSIGNED LONG { Ulong }
| UINT { Uint $1 }
| SINT { Sint $1 }
| SINT UNSIGNED { Uint $1 }
;

instrs:
instr instrs { $1::$2 }
| { [] }
;

instr:
| ID EQOP LPAREN vtyp RPAREN ID SEMICOLON { Assign ($1, $4, Var $6) }
| ID EQOP ID ADDOP ID SEMICOLON { Add ($1, $3, Var $5) }
| ID EQOP ID ADDOP NUM SEMICOLON { Add ($1, $3, Const $5) }
| ID EQOP ID WMULOP ID SEMICOLON { Wmul ($1, $3, Var $5) }
| ID EQOP ID WMULOP NUM SEMICOLON { Wmul ($1, $3, Const $5) }
| ID EQOP ID MULOP ID SEMICOLON { Mul ($1, $3, Var $5) }
| ID EQOP ID MULOP NUM SEMICOLON { Mul ($1, $3, Const $5) }
| ID EQOP ID ANDOP ID SEMICOLON { And ($1, $3, Var $5) }
| ID EQOP ID ANDOP NUM SEMICOLON { And ($1, $3, Const $5) }
| ID EQOP ID RSHIFT ID SEMICOLON { Rshift ($1, $3, Var $5) }
| ID EQOP ID RSHIFT NUM SEMICOLON { Rshift ($1, $3, Const $5) }
| ID EQOP ID LSHIFT ID SEMICOLON { Lshift ($1, $3, Var $5) }
| ID EQOP ID LSHIFT NUM SEMICOLON { Lshift ($1, $3, Const $5) }
| LESS BB NUM GREATER LSQUARE LOCAL COUNT COLON NUM RSQUARE COLON { Label $3 }
| ID EQOP MULOP ID SEMICOLON { Load ($1, {vty = Void; vname = $4},
0) }
| ID EQOP MEM LSQUARE LPAREN typ RPAREN ID ADDOP BYTE RSQUARE SEMICOLON
{ Load ($1, { vty = $6; vname = $8 }, $10) }
| MULOP ID EQOP ID SEMICOLON { Store ($4, {vty = Void; vname = $2},
0) }
| MEM LSQUARE LPAREN typ RPAREN ID ADDOP BYTE RSQUARE EQOP ID SEMICOLON
{ Store ($11, { vty = $4; vname = $6 }, $8) }
| RETURN SEMICOLON { Return }
;
31 changes: 31 additions & 0 deletions ocaml/parser/syntax.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

type access_t = string

type attribute_t = access_t list

type type_t = Void | Int | Long | Ulong | Llong | Ullong
| Sint of int | Uint of int
| Const of type_t | Pointer of type_t

type param_t = { pty : type_t; pname : string }

type var_t = { vty : type_t; vname : string }

type operand_t = Var of string | Const of Z.t

type instr_t = Label of Z.t
| Assign of string * type_t * operand_t
| Add of string * string * operand_t
| Sub of string * string * operand_t
| Mul of string * string * operand_t
| Wmul of string * string * operand_t
| And of string * string * operand_t
| Rshift of string * string * operand_t
| Lshift of string * string * operand_t
| Load of string * var_t * int
| Store of string * var_t * int
| Return

type function_t = { attr : attribute_t; fty : type_t; fname : string;
params : param_t list; vars : var_t list; instrs : instr_t list }

32 changes: 32 additions & 0 deletions ocaml/parser/syntax.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

type access_t = string

type attribute_t = access_t list

type type_t = Void | Int | Long | Ulong | Llong | Ullong
| Sint of int | Uint of int
| Const of type_t | Pointer of type_t

type param_t = { pty : type_t; pname : string }

type var_t = { vty : type_t; vname : string }

type operand_t = Var of string | Const of Z.t

type instr_t = Label of Z.t
| Assign of string * type_t * operand_t
| Add of string * string * operand_t
| Sub of string * string * operand_t
| Mul of string * string * operand_t
| Wmul of string * string * operand_t
| And of string * string * operand_t
| Rshift of string * string * operand_t
| Lshift of string * string * operand_t
| Load of string * var_t * int
| Store of string * var_t * int
| Return

type function_t = { attr : attribute_t; fty : type_t; fname : string;
params : param_t list; vars : var_t list; instrs : instr_t list }


0 comments on commit 62a58f8

Please sign in to comment.