From 57cf1a703ad3a8bcd33a012fd9e73dd73293217c Mon Sep 17 00:00:00 2001 From: "Sandeep.C.R" Date: Thu, 7 Sep 2023 14:06:18 +0530 Subject: [PATCH 1/4] MIR: Update AST to support typechecking the example program Signed-off-by: Sandeep.C.R --- contrib/mir/src/ast.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 4fc66b37b375..04fe78f5c2ae 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -5,20 +5,22 @@ /* */ /******************************************************************************/ -#[derive(Debug)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum Type { Nat, Int, + Bool, } -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum Value { NumberValue(i32), + BooleanValue(bool) } pub type InstructionBlock = Vec; -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub enum Instruction { Add, Dip(InstructionBlock), @@ -34,3 +36,5 @@ pub enum Instruction { Push(Type, Value), Swap, } + +pub type AST = Vec; -- GitLab From 26df1afedaee4bdd7f3b432af7710faf3a6331bd Mon Sep 17 00:00:00 2001 From: "Sandeep.C.R" Date: Thu, 7 Sep 2023 14:09:04 +0530 Subject: [PATCH 2/4] MIR: Update parser to generate the updated AST Signed-off-by: Sandeep.C.R --- contrib/mir/src/syntax.lalrpop | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/contrib/mir/src/syntax.lalrpop b/contrib/mir/src/syntax.lalrpop index f04786d80e3d..48eafb5ba39f 100644 --- a/contrib/mir/src/syntax.lalrpop +++ b/contrib/mir/src/syntax.lalrpop @@ -27,13 +27,20 @@ usize: usize = { } type_: Type = { - "int" => Type::Int, - "nat" => Type::Nat, + "int" => Type::Int, + "nat" => Type::Nat, + "bool" => Type::Bool, +} + +boolean: bool = { + "True" => true, + "False" => false, } use Value::*; value: Value = { - => NumberValue(n) + => NumberValue(n), + => BooleanValue(b) } use Instruction::*; -- GitLab From 86ac64622498da108678cd59a801682b27e078ff Mon Sep 17 00:00:00 2001 From: "Sandeep.C.R" Date: Thu, 7 Sep 2023 14:09:27 +0530 Subject: [PATCH 3/4] MIR: Implement typechecker Signed-off-by: Sandeep.C.R --- contrib/mir/src/main.rs | 58 +++++-- contrib/mir/src/stack.rs | 16 ++ contrib/mir/src/typechecker.rs | 292 +++++++++++++++++++++++++++++++++ 3 files changed, 353 insertions(+), 13 deletions(-) create mode 100644 contrib/mir/src/stack.rs create mode 100644 contrib/mir/src/typechecker.rs diff --git a/contrib/mir/src/main.rs b/contrib/mir/src/main.rs index 6a6b2cad3fd8..4d1e0d56a6a9 100644 --- a/contrib/mir/src/main.rs +++ b/contrib/mir/src/main.rs @@ -7,18 +7,49 @@ mod ast; mod parser; +mod stack; mod syntax; +mod typechecker; fn main() {} #[cfg(test)] mod tests { + use std::collections::VecDeque; + + use crate::ast::*; use crate::parser; + use crate::typechecker; + + #[test] + fn typecheck_test_expect_success() { + let ast = parser::parse(&FIBONACCI_SRC).unwrap(); + let mut stack = VecDeque::from([Type::Nat]); + assert!(typechecker::typecheck(&ast, &mut stack)); + assert!(stack == VecDeque::from([Type::Int])); + } + + #[test] + fn typecheck_test_expect_fail() { + let ast = parser::parse(&FIBONACCI_ILLTYPED_SRC).unwrap(); + let mut stack = VecDeque::from([Type::Nat]); + assert!(!typechecker::typecheck(&ast, &mut stack)); + } #[test] fn parser_test_expect_success() { - let src = "{ INT ; PUSH int 0 ; DUP 2 ; GT ; - IF { DIP { PUSH int -1 ; ADD } ; + let ast = parser::parse(&FIBONACCI_SRC).unwrap(); + // use built in pretty printer to validate the expected AST. + assert_eq!(format!("{:#?}", ast), AST_PRETTY_EXPECTATION); + } + + #[test] + fn parser_test_expect_fail() { + assert_eq!(&parser::parse(&FIBONACCI_MALFORMED_SRC).unwrap_err().to_string(), "Unrecognized token `GT` found at 133:135\nExpected one of \";\" or \"}\""); + } + + const FIBONACCI_SRC: &str = "{ INT ; PUSH int 0 ; DUP 2 ; GT ; + IF { DIP { PUSH int -1 ; ADD } ; PUSH int 1 ; DUP 3 ; GT ; @@ -26,24 +57,25 @@ mod tests { DIP { DROP 2 } } { DIP { DROP } } }"; - // use built in pretty printer to validate the expected AST. - assert_eq!(format!("{:#?}", parser::parse(&src).unwrap()), EXPECTATION); - } + const FIBONACCI_ILLTYPED_SRC: &str = "{ INT ; PUSH int 0 ; DUP 2 ; GT ; + IF { DIP { PUSH int -1 ; ADD } ; + PUSH int 1 ; + DUP 4 ; + GT ; + LOOP { SWAP ; DUP 2 ; ADD ; DIP 2 { PUSH int -1 ; ADD } ; DUP 3 ; GT } ; + DIP { DROP 2 } } + { DIP { DROP } } }"; - #[test] - fn parser_test_expect_fail() { - let src = "{ INT ; PUSH int 0 ; DUP 2 ; GT ; - IF { DIP { PUSH int -1 ; ADD } ; + const FIBONACCI_MALFORMED_SRC: &str = "{ INT ; PUSH int 0 ; DUP 2 ; GT ; + IF { DIP { PUSH int -1 ; ADD } ; PUSH int 1 ; - DUP 3 + DUP 4 GT ; LOOP { SWAP ; DUP 2 ; ADD ; DIP 2 { PUSH int -1 ; ADD } ; DUP 3 ; GT } ; DIP { DROP 2 } } { DIP { DROP } } }"; - assert_eq!(&parser::parse(&src).unwrap_err().to_string(), "Unrecognized token `GT` found at 129:131\nExpected one of \";\" or \"}\""); - } - const EXPECTATION: &str = "[ + const AST_PRETTY_EXPECTATION: &str = "[ Int, Push( Int, diff --git a/contrib/mir/src/stack.rs b/contrib/mir/src/stack.rs new file mode 100644 index 000000000000..ab238aa611e5 --- /dev/null +++ b/contrib/mir/src/stack.rs @@ -0,0 +1,16 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +use std::collections::VecDeque; + +use crate::ast::*; + +pub type TypeStack = VecDeque; + +pub fn ensure_stack_len(stack: &TypeStack, l: usize) -> bool { + stack.len() >= l +} diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs new file mode 100644 index 000000000000..c7a5b28ad0a7 --- /dev/null +++ b/contrib/mir/src/typechecker.rs @@ -0,0 +1,292 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +use crate::ast::*; +use crate::stack::*; +use std::collections::VecDeque; + +pub fn typecheck(ast: &AST, stack: &mut TypeStack) -> bool { + for i in ast { + if !typecheck_instruction(&i, stack) { + return false; + } + } + return true; +} + +fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { + use Instruction::*; + use Type::*; + + match i { + Add => match stack.make_contiguous() { + [Type::Nat, Type::Nat, ..] => { + stack.pop_front(); + true + } + [Type::Int, Type::Int, ..] => { + stack.pop_front(); + true + } + _ => unimplemented!(), + }, + DipN(_, nested) | Dip(nested) => { + let h = { + // Extract the height of the protected stack if the + // instruction is DipN, or default to 1 if it is Dip. + if let DipN(h, _) = i { + *h + } else { + 1 + } + }; + let protected_height: usize = usize::try_from(h).unwrap(); + + if ensure_stack_len(stack, protected_height) { + // Here we split the stack into protected and live segments, and after typechecking + // nested code with the live segment, we append the protected and the potentially + // modified live segment as the result stack. + let mut live = stack.split_off(protected_height); + if typecheck(nested, &mut live) { + stack.append(&mut live); + true + } else { + false + } + } else { + false + } + } + DropN(_) | Drop => { + let h = { + if let DropN(h) = i { + *h + } else { + 1 + } + }; + let drop_height: usize = usize::try_from(h).unwrap(); + if ensure_stack_len(&stack, drop_height) { + *stack = stack.split_off(drop_height); + true + } else { + false + } + } + DupN(0) => { + // DUP instruction requires an argument that is > 0. + false + } + DupN(_) | Dup => { + let h = { + if let DupN(h) = i { + *h + } else { + 1 + } + }; + let dup_height: usize = usize::try_from(h).unwrap(); + if ensure_stack_len(stack, dup_height) { + stack.push_front(stack.get(dup_height - 1).unwrap().to_owned()); + true + } else { + false + } + } + Gt => { + match stack.make_contiguous() { + [Type::Int, ..] => { + stack[0] = Type::Bool; + true + } + _ => false, + } + } + If(nested_t, nested_f) => match stack.make_contiguous() { + // Check if top is bool and bind the tail to `t`. + [Type::Bool, t @ ..] => { + // Clone the stack so that we have two stacks to run + // the two branches with. + let mut t_stack: TypeStack = VecDeque::from(t.to_owned()); + let mut f_stack: TypeStack = VecDeque::from(t.to_owned()); + if typecheck(nested_t, &mut t_stack) && typecheck(nested_f, &mut f_stack) { + // If both stacks are same after typecheck, then make result + // stack using one of them and return success. + if t_stack == f_stack { + *stack = t_stack; + true + } else { + false + } + } else { + false + } + } + _ => false, + }, + Instruction::Int => match stack.make_contiguous() { + [val @ Type::Nat, ..] => { + *val = Type::Int; + true + } + } + Loop(nested) => match stack.make_contiguous() { + // Check if top is bool and bind the tail to `t`. + [Bool, t @ ..] => { + let mut live: TypeStack = VecDeque::from(t.to_owned()); + // Clone the tail and typecheck the nested body using it. + if typecheck(nested, &mut live) { + match live.make_contiguous() { + // ensure the result stack has a bool on top. + [Bool, r @ ..] => { + // If the starting tail and result tail match + // then the typecheck is complete. pop the bool + // off the original stack to form the final result. + if t == r { + stack.pop_front(); + true + } else { + false + } + } + _ => false, + } + } else { + false + } + } + _ => false, + }, + Push(t, v) => { + if typecheck_value(&t, &v) { + stack.push_front(t.to_owned()); + true + } else { + false + } + } + Swap => { + if stack.len() > 1 { + stack.swap(0, 1); + true + } else { + false + } + } + } +} + +fn typecheck_value(t: &Type, v: &Value) -> bool { + use Type::*; + use Value::*; + match (t, v) { + (Nat, NumberValue(n)) => n >= &0, + (Int, NumberValue(_)) => true, + (Bool, BooleanValue(_)) => true, + _ => false, + } +} + +#[cfg(test)] +mod typecheck_tests { + use std::collections::VecDeque; + + use crate::parser::*; + use crate::typechecker::*; + use Instruction::*; + + #[test] + fn test_dup() { + let mut stack = VecDeque::from([Type::Nat]); + let expected_stack = VecDeque::from([Type::Nat, Type::Nat]); + typecheck_instruction(&DupN(1), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_dup_n() { + let mut stack = VecDeque::from([Type::Nat, Type::Int]); + let expected_stack = VecDeque::from([Type::Int, Type::Nat, Type::Int]); + typecheck_instruction(&DupN(2), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_swap() { + let mut stack = VecDeque::from([Type::Nat, Type::Int]); + let expected_stack = VecDeque::from([Type::Int, Type::Nat]); + typecheck_instruction(&Swap, &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_int() { + let mut stack = VecDeque::from([Type::Nat]); + let expected_stack = VecDeque::from([Type::Int]); + typecheck_instruction(&Int, &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_drop() { + let mut stack = VecDeque::from([Type::Nat]); + let expected_stack = VecDeque::from([]); + typecheck(&parse("{DROP}").unwrap(), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_drop_n() { + let mut stack = VecDeque::from([Type::Nat, Type::Int]); + let expected_stack = VecDeque::from([]); + typecheck_instruction(&DropN(2), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_push() { + let mut stack = VecDeque::from([Type::Nat]); + let expected_stack = VecDeque::from([Type::Int, Type::Nat]); + typecheck_instruction(&Push(Type::Int, Value::NumberValue(1)), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_gt() { + let mut stack = VecDeque::from([Type::Int]); + let expected_stack = VecDeque::from([Type::Bool]); + typecheck_instruction(&Gt, &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_dip() { + let mut stack = VecDeque::from([Type::Int, Type::Bool]); + let expected_stack = VecDeque::from([Type::Int, Type::Nat, Type::Bool]); + typecheck_instruction(&DipN(1, parse("{PUSH nat 6}").unwrap()), &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_add() { + let mut stack = VecDeque::from([Type::Int, Type::Int]); + let expected_stack = VecDeque::from([Type::Int]); + typecheck_instruction(&Add, &mut stack); + assert!(stack == expected_stack); + } + + #[test] + fn test_loop() { + let mut stack = VecDeque::from([Type::Bool, Type::Int]); + let expected_stack = VecDeque::from([Type::Int]); + assert!(typecheck_instruction( + &Loop(parse("{PUSH bool True}").unwrap()), + &mut stack + )); + assert!(stack == expected_stack); + } +} -- GitLab From e0ee34e2753bdc01f6fb3d58ccaa466dc1972a9b Mon Sep 17 00:00:00 2001 From: "Sandeep.C.R" Date: Tue, 12 Sep 2023 11:17:32 +0530 Subject: [PATCH 4/4] MIR: Combine DupN and Dup instructions and remove redundant usize::try_from in typechecker --- contrib/mir/src/ast.rs | 9 ++--- contrib/mir/src/main.rs | 43 ++++++++++++++++------- contrib/mir/src/syntax.lalrpop | 9 ++--- contrib/mir/src/typechecker.rs | 62 +++++++++++----------------------- 4 files changed, 55 insertions(+), 68 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 04fe78f5c2ae..044698b73a68 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -23,12 +23,9 @@ pub type InstructionBlock = Vec; #[derive(Debug, Eq, PartialEq)] pub enum Instruction { Add, - Dip(InstructionBlock), - DipN(usize, InstructionBlock), - Drop, - DropN(usize), - Dup, - DupN(usize), + Dip(Option, InstructionBlock), + Drop(Option), + Dup(Option), Gt, If(InstructionBlock, InstructionBlock), Int, diff --git a/contrib/mir/src/main.rs b/contrib/mir/src/main.rs index 4d1e0d56a6a9..66bc648500a1 100644 --- a/contrib/mir/src/main.rs +++ b/contrib/mir/src/main.rs @@ -83,13 +83,16 @@ mod tests { 0, ), ), - DupN( - 2, + Dup( + Some( + 2, + ), ), Gt, If( [ Dip( + None, [ Push( Int, @@ -106,19 +109,25 @@ mod tests { 1, ), ), - DupN( - 3, + Dup( + Some( + 3, + ), ), Gt, Loop( [ Swap, - DupN( - 2, + Dup( + Some( + 2, + ), ), Add, - DipN( - 2, + Dip( + Some( + 2, + ), [ Push( Int, @@ -129,24 +138,32 @@ mod tests { Add, ], ), - DupN( - 3, + Dup( + Some( + 3, + ), ), Gt, ], ), Dip( + None, [ - DropN( - 2, + Drop( + Some( + 2, + ), ), ], ), ], [ Dip( + None, [ - Drop, + Drop( + None, + ), ], ), ], diff --git a/contrib/mir/src/syntax.lalrpop b/contrib/mir/src/syntax.lalrpop index 48eafb5ba39f..4095b9dd8b77 100644 --- a/contrib/mir/src/syntax.lalrpop +++ b/contrib/mir/src/syntax.lalrpop @@ -49,15 +49,12 @@ instruction: Instruction = { "INT" => Int, "GT" => Gt, "LOOP" => Loop(ib), - "DIP" => DipN(n, ib), - "DIP" => Dip(ib), + "DIP" => Dip(n, ib), "ADD" => Add, - "DROP" => DropN(n), - "DROP" => Drop, + "DROP" => Drop(n), "SWAP" => Swap, "IF" => If(t, f), - "DUP" => DupN(n), - "DUP" => Dup + "DUP" => Dup(n), } instructionSeq: Vec = { diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs index c7a5b28ad0a7..64e76bc8f533 100644 --- a/contrib/mir/src/typechecker.rs +++ b/contrib/mir/src/typechecker.rs @@ -34,17 +34,8 @@ fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { } _ => unimplemented!(), }, - DipN(_, nested) | Dip(nested) => { - let h = { - // Extract the height of the protected stack if the - // instruction is DipN, or default to 1 if it is Dip. - if let DipN(h, _) = i { - *h - } else { - 1 - } - }; - let protected_height: usize = usize::try_from(h).unwrap(); + Dip(opt_height, nested) => { + let protected_height: usize = opt_height.unwrap_or(1); if ensure_stack_len(stack, protected_height) { // Here we split the stack into protected and live segments, and after typechecking @@ -61,15 +52,8 @@ fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { false } } - DropN(_) | Drop => { - let h = { - if let DropN(h) = i { - *h - } else { - 1 - } - }; - let drop_height: usize = usize::try_from(h).unwrap(); + Drop(opt_height) => { + let drop_height: usize = opt_height.unwrap_or(1); if ensure_stack_len(&stack, drop_height) { *stack = stack.split_off(drop_height); true @@ -77,19 +61,12 @@ fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { false } } - DupN(0) => { + Dup(Some(0)) => { // DUP instruction requires an argument that is > 0. false } - DupN(_) | Dup => { - let h = { - if let DupN(h) = i { - *h - } else { - 1 - } - }; - let dup_height: usize = usize::try_from(h).unwrap(); + Dup(opt_height) => { + let dup_height: usize = opt_height.unwrap_or(1); if ensure_stack_len(stack, dup_height) { stack.push_front(stack.get(dup_height - 1).unwrap().to_owned()); true @@ -97,15 +74,13 @@ fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { false } } - Gt => { - match stack.make_contiguous() { - [Type::Int, ..] => { - stack[0] = Type::Bool; - true - } - _ => false, + Gt => match stack.make_contiguous() { + [Type::Int, ..] => { + stack[0] = Type::Bool; + true } - } + _ => false, + }, If(nested_t, nested_f) => match stack.make_contiguous() { // Check if top is bool and bind the tail to `t`. [Type::Bool, t @ ..] => { @@ -133,7 +108,8 @@ fn typecheck_instruction(i: &Instruction, stack: &mut TypeStack) -> bool { *val = Type::Int; true } - } + _ => false, + }, Loop(nested) => match stack.make_contiguous() { // Check if top is bool and bind the tail to `t`. [Bool, t @ ..] => { @@ -203,7 +179,7 @@ mod typecheck_tests { fn test_dup() { let mut stack = VecDeque::from([Type::Nat]); let expected_stack = VecDeque::from([Type::Nat, Type::Nat]); - typecheck_instruction(&DupN(1), &mut stack); + typecheck_instruction(&Dup(Some(1)), &mut stack); assert!(stack == expected_stack); } @@ -211,7 +187,7 @@ mod typecheck_tests { fn test_dup_n() { let mut stack = VecDeque::from([Type::Nat, Type::Int]); let expected_stack = VecDeque::from([Type::Int, Type::Nat, Type::Int]); - typecheck_instruction(&DupN(2), &mut stack); + typecheck_instruction(&Dup(Some(2)), &mut stack); assert!(stack == expected_stack); } @@ -243,7 +219,7 @@ mod typecheck_tests { fn test_drop_n() { let mut stack = VecDeque::from([Type::Nat, Type::Int]); let expected_stack = VecDeque::from([]); - typecheck_instruction(&DropN(2), &mut stack); + typecheck_instruction(&Drop(Some(2)), &mut stack); assert!(stack == expected_stack); } @@ -267,7 +243,7 @@ mod typecheck_tests { fn test_dip() { let mut stack = VecDeque::from([Type::Int, Type::Bool]); let expected_stack = VecDeque::from([Type::Int, Type::Nat, Type::Bool]); - typecheck_instruction(&DipN(1, parse("{PUSH nat 6}").unwrap()), &mut stack); + typecheck_instruction(&Dip(Some(1), parse("{PUSH nat 6}").unwrap()), &mut stack); assert!(stack == expected_stack); } -- GitLab