(* open Lexer *) type parser_context = { seq: (Token.t * Lexer.lexer_context) Seq.t; errors: string list; } (* The parser is a function that takes a parser_context and returns an option of a tuple of a value and a parser_context.*) type 'a parser = parser_context -> ('a * parser_context) option let return (a: 'a) = fun (ctx: parser_context) -> Some (a, ctx) let stop = fun (_: parser_context) -> None let fmap (f: 'a -> 'b) (p: 'a parser): 'b parser = fun (ctx: parser_context) -> match p ctx with | Some (a, ctx') -> Some (f a, ctx') | None -> None let bind (a: 'a parser) (b:'a -> 'b parser) = fun (ctx: parser_context) -> let p = a ctx in match p with | Some (a', ctx') -> b a' ctx' | None -> None let push_error (msg: string): unit parser = fun (ctx: parser_context) -> Some ((), { ctx with errors = msg::ctx.errors }) let (>>=) = bind let (let*) = bind let or_parser (a: 'a parser) (b: 'a parser): 'a parser = fun (ctx: parser_context) -> match a ctx with | Some _ as res -> res | None -> b ctx let (<|>) = or_parser let peek_token: Token.t parser = fun (ctx: parser_context) -> Seq.uncons ctx.seq |> Option.map (fun ((t, _),_) -> (t,ctx)) let next_token: Token.t parser = fun (ctx: parser_context) -> Seq.uncons ctx.seq |> Option.map (fun ((t,_), s) -> (t, { ctx with seq = s} )) let rec eat_until: (Token.t -> bool) -> unit parser = fun (filter) -> let* tt = peek_token in if not (filter tt) then let* _ = next_token in eat_until filter else return () let match_token (tt: Token.token_type) : Token.t parser = let* t = next_token in if t.token_type = tt then return t else stop let match_identifier: string parser = let* tt = next_token in match tt.token_type with | Token.Identifier id -> return id | _ -> stop let zero_or_one (p: 'a parser): ('a option) parser = ((fmap (fun x -> Some x) p) <|> return None ) let rec many (p: 'a parser): 'a list parser = let* a = zero_or_one p in match a with | Some a' -> ( let* as' = many p in return (a'::as') ) | None -> return [] let many1 (p: 'a parser): 'a list parser = let* a = p in let* as' = many p in return (a::as') (* BNF: type_parameter ::= [a-zA-Z][a-zA-Z0-9]* type_generic ::= ''' type_parameter type_declare ::= identifier | identifier -> type_declare | (type_declare) -> type_declare let_expr ::= let identifier (: type_declare)? = expr in expr fun_expr ::= fun (identifier | ('(' identifier (: type_declare)? ')'))? -> expr if_expr ::= if expr then expr else expr factor ::= (expr) | identifier | number call_expr ::= factor | factor factor level1 ::= call_expr | level1 + call_expr | level1 - call_expr level2 ::= level2 * level1 | level2 / level1 | level2 % level1 | level1 level3 ::= level2 ^ level3 | level2 expr ::= let_expr | fun_expr | if_expr | level3 type_alias ::= 'type' type_declare (type_generic)? = type_declare top ::= expr *) type bin_op_type = | Add | Sub | Mul | Div | Mod | Pow let token2op (t: Token.token_type): bin_op_type option = match t with | Token.Add -> Some Add | Token.Sub -> Some Sub | Token.Mul -> Some Mul | Token.Div -> Some Div | Token.Mod -> Some Mod | Token.Pow -> Some Pow | _ -> None let op2str (op: bin_op_type): string = match op with | Add -> "+" | Sub -> "-" | Mul -> "*" | Div -> "/" | Mod -> "%" | Pow -> "^" type mono_op_type = | Neg type type_tree = | TypeIdentifier of string | TypeArrow of type_tree * type_tree type let_expr_tree = { (* // TODO: add Pattern Matching *) name: string; type_declare: type_tree option; value_expr: expr_tree; in_expr: expr_tree; } and fun_expr_tree = { name: string; type_declare: type_tree option; body_expr: expr_tree; } and if_expr_tree = If of expr_tree * expr_tree * expr_tree and call_expr_tree = Call of expr_tree * expr_tree and expr_tree = | LetExpr of let_expr_tree | FunExpr of fun_expr_tree | IfExpr of if_expr_tree | CallExpr of call_expr_tree | BinOpExpr of bin_op_type * expr_tree * expr_tree | MonoOpExpr of bin_op_type * expr_tree | Identifier of string | Number of int let typeTree2str (t: type_tree): string = let rec aux t = match t with | TypeIdentifier id -> id | TypeArrow (t1, t2) -> Printf.sprintf "(%s -> %s)" (aux t1) (aux t2) in aux t let expr2str (e: expr_tree): string = let tab n = String.make (n * 2) ' ' in let rec aux e depth = match e with | LetExpr ({ name = id; value_expr = e1; in_expr = e2; type_declare = td; }) -> let type_declare_str = match td with | Some t -> Printf.sprintf ": %s" (typeTree2str t) | None -> "" in Printf.sprintf "let %s%s = %s in\n%s%s" id type_declare_str (aux e1 depth) (tab depth) (aux e2 (depth+1)) | FunExpr ({ name = id; body_expr = e; type_declare = td; }) -> let arg_str = match td with | Some t -> Printf.sprintf "(%s: %s)" id (typeTree2str t) | None -> id in Printf.sprintf "fun %s ->\n%s%s" arg_str (tab depth) (aux e (depth+1)) | IfExpr (If (e1, e2, e3)) -> Printf.sprintf "if %s then %s else %s" (aux e1 depth) (aux e2 depth) (aux e3 depth) | CallExpr (Call (e1, e2)) -> Printf.sprintf "%s(%s)" (aux e1 depth) (aux e2 depth) | BinOpExpr (op, e1, e2) -> Printf.sprintf "%s %s %s" (aux e1 depth) (op2str op) (aux e2 depth) | MonoOpExpr (op, e) -> Printf.sprintf "%s %s" (op2str op) (aux e depth) | Identifier id -> id | Number n -> string_of_int n in aux e 0 let rec parse_type_declare (): type_tree parser = let parse_simple_type () = let* tt = peek_token in match tt.token_type with | Token.Identifier x -> let* _ = next_token in return (TypeIdentifier x) | Token.LParen -> let* _ = match_token Token.LParen in let* t = parse_type_declare() in let* _ = match_token Token.RParen in return t | _ -> stop in let* base = parse_simple_type() in let* lookahead = peek_token in match lookahead.token_type with | Token.Arrow -> let* _ = next_token in (* // TODO: add error handling for invalid type declaration *) let* t = parse_type_declare() in return (TypeArrow (base, t)) | _ -> return base let parse_type_declare_with_colon (): type_tree option parser = let* tt = zero_or_one (match_token Token.Colon) in begin match tt with | Some _ -> let* t = zero_or_one (parse_type_declare()) in begin match t with | Some(t) -> return (Some t) | _ -> let* _ = (push_error "invalid type declare") in return None end | None -> return None end let rec parse_let_expr (): let_expr_tree parser = let* _ = match_token ( Token.Let) in let* tt = next_token in match tt.token_type with Token.Identifier(x) -> let id = x in let* type_declare = parse_type_declare_with_colon() in let* _ = eat_until (fun x -> x.token_type = Token.Equal) in let* _ = match_token Token.Equal in let* e1 = expr() in let* _ = match_token (Token.In) in let* e2 = expr() in return ({ name = id; value_expr = e1; in_expr = e2; type_declare = type_declare }) | _ -> stop and parse_fun_expr (): fun_expr_tree parser = let* _ = match_token (Token.Fun) in let* tt = next_token in begin match tt.token_type with Token.Identifier(x) -> let id = x in let* _ = match_token Token.Arrow in let* e = expr() in return ({ name = id; body_expr = e; type_declare = None }) | Token.LParen -> let* id = match_identifier in let* type_declare = parse_type_declare_with_colon() in let* _ = eat_until (fun x -> x.token_type = Token.RParen) in let* _ = match_token Token.RParen in let* _ = match_token Token.Arrow in let* e = expr() in return ({ name = id; body_expr = e; type_declare = type_declare }) | _ -> stop end and parse_if_expr (): if_expr_tree parser = let* _ = match_token (Token.If) in let* e1 = expr() in let* _ = match_token (Token.Then) in let* e2 = expr() in let* _ = match_token (Token.Else) in let* e3 = expr() in return (If (e1, e2, e3)) and parse_factor (): expr_tree parser = let* tt = peek_token in match tt.token_type with | Token.Identifier x -> let* _ = next_token in return (Identifier x) | Token.Digit x -> let* _ = next_token in return (Number (int_of_string x)) | Token.LParen -> let* _ = match_token Token.LParen in let* e = expr() in let* _ = match_token Token.RParen in return e | _ -> stop and parse_call_expr (): expr_tree parser = let* e1 = parse_factor() in let rec aux e1 = let* c = peek_token in match c.token_type with | Token.Identifier _ | Token.Digit _ | Token.LParen -> let* e2 = parse_factor() in aux (CallExpr (Call (e1, e2))) | _ -> return e1 in aux e1 and parse_level1 (): expr_tree parser = let* e1 = parse_call_expr() in let rec aux e1 = let* c = peek_token in let tt = c.token_type in match tt with | Token.Add | Token.Sub -> let* _ = next_token in let* e2 = parse_call_expr() in let op = match token2op tt with | Some x -> x | None -> failwith "unreachable" in aux (BinOpExpr (op, e1, e2)) | _ -> return e1 in aux e1 and parse_level2 (): expr_tree parser = let* e1 = parse_level1() in let rec aux e1 = let* c = peek_token in match c.token_type with | Token.Mul | Token.Div | Token.Mod -> let* _ = next_token in let* e2 = parse_level1() in let op = match token2op c.token_type with | Some x -> x | None -> failwith "unreachable" in aux (BinOpExpr (op, e1, e2)) | _ -> return e1 in aux e1 and parse_level3 (): expr_tree parser = let* e1 = parse_level2() in let rec aux e1 = let* c = peek_token in match c.token_type with | Token.Pow -> let* _ = next_token in let* e2 = parse_level3() in let op = match token2op c.token_type with | Some x -> x | None -> failwith "unreachable" in aux (BinOpExpr (op, e1, e2)) | _ -> return e1 in aux e1 and expr (): expr_tree parser = let* e = (parse_let_expr() |> fmap (fun x -> LetExpr x)) <|> (parse_fun_expr() |> fmap (fun x -> FunExpr x)) <|> (parse_if_expr() |> fmap (fun x -> IfExpr x)) <|> parse_level3() in return e let get_expr_tree_from_tokens (tokens: (Token.t * Lexer.lexer_context) Seq.t): expr_tree option = let ntokens = Seq.filter (fun ((token,_): Token.t * Lexer.lexer_context) -> match token.Token.token_type with | Token.Comment(_) -> false | _ -> true ) tokens in let ctx = { seq = ntokens; errors = [] } in match expr() ctx with | Some (e, _) -> Some e | None -> None let%test "test get_expr_tree_from_tokens 1" = let tokens = Lexer.lex_tokens_seq "let x = 1 in\n x" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "let x = 1 in\nx" | None -> false let%test "test get_expr_tree_from_tokens 2" = let tokens = Lexer.lex_tokens_seq "fun x -> x" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "fun x ->\nx" | None -> false let%test "test get_expr_tree_from_tokens 3" = let tokens = Lexer.lex_tokens_seq "if 1 then 2 else 3" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "if 1 then 2 else 3" | None -> false let%test "test get_expr_tree_from_tokens 4" = let tokens = Lexer.lex_tokens_seq "1 + 2 * 3" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "1 + 2 * 3" | None -> false let%test "test get_expr_tree_from_tokens 5" = let tokens = Lexer.lex_tokens_seq "x 1 2" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "x(1)(2)" | None -> false let%test "test get_expr_tree_from_tokens 6 with type" = let tokens = Lexer.lex_tokens_seq "let x: int = 1 in\n x" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "let x: int = 1 in\nx" | None -> false let%test "test get_expr_tree_from_tokens 7 with type" = let tokens = Lexer.lex_tokens_seq "fun (x: int) -> x" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "fun (x: int) ->\nx" | None -> false let%test "test get_expr_tree_from_tokens 8" = let tokens = Lexer.lex_tokens_seq "fun (x) -> x" in match get_expr_tree_from_tokens tokens with | Some e -> expr2str e = "fun x ->\nx" | None -> false