From 9944864840fbc0e15bbbfec7f64074fae031ebd9 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 18 Dec 2023 16:10:25 +0300 Subject: [PATCH 1/7] MIR: make annotations optionally-owning --- contrib/mir/src/ast/annotations.rs | 2 +- contrib/mir/src/lexer.rs | 29 ++++++++++++----- contrib/mir/src/parser.rs | 52 +++++++++++++++--------------- 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/contrib/mir/src/ast/annotations.rs b/contrib/mir/src/ast/annotations.rs index 74f41c126d4e..22d0440bf889 100644 --- a/contrib/mir/src/ast/annotations.rs +++ b/contrib/mir/src/ast/annotations.rs @@ -55,7 +55,7 @@ impl<'a> Annotations<'a> { self.0.iter() } - pub fn get_single_field_ann(&self) -> Result>, AnnotationError> { + pub fn get_single_field_ann(&'a self) -> Result>, AnnotationError> { use Annotation::*; let mut res = None; for i in &self.0 { diff --git a/contrib/mir/src/lexer.rs b/contrib/mir/src/lexer.rs index c75844e1faa3..8efb87fcf9aa 100644 --- a/contrib/mir/src/lexer.rs +++ b/contrib/mir/src/lexer.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +use std::borrow::Cow; + use logos::Logos; pub mod errors; pub mod macros; @@ -141,10 +143,21 @@ pub enum Noun { #[derive(Debug, Clone, PartialEq, Eq)] pub enum Annotation<'a> { - Special(&'a str), - Field(&'a str), - Variable(&'a str), - Type(&'a str), + Special(Cow<'a, str>), + Field(Cow<'a, str>), + Variable(Cow<'a, str>), + Type(Cow<'a, str>), +} + +impl Annotation<'_> { + pub fn into_owned(self) -> Annotation<'static> { + match self { + Annotation::Special(s) => Annotation::Special(Cow::Owned(s.into_owned())), + Annotation::Field(s) => Annotation::Field(Cow::Owned(s.into_owned())), + Annotation::Variable(s) => Annotation::Variable(Cow::Owned(s.into_owned())), + Annotation::Type(s) => Annotation::Type(Cow::Owned(s.into_owned())), + } + } } impl std::fmt::Display for Annotation<'_> { @@ -292,14 +305,14 @@ fn lex_bytes(lex: &mut Lexer) -> Result, LexerError> { fn lex_annotation<'a>(lex: &mut Lexer<'a>) -> Annotation<'a> { match lex.slice() { - s @ ("@%" | "@%%" | "%@") => Annotation::Special(s), + s @ ("@%" | "@%%" | "%@") => Annotation::Special(Cow::Borrowed(s)), s => { if let Some(s) = s.strip_prefix('@') { - Annotation::Variable(s) + Annotation::Variable(Cow::Borrowed(s)) } else if let Some(s) = s.strip_prefix('%') { - Annotation::Field(s) + Annotation::Field(Cow::Borrowed(s)) } else if let Some(s) = s.strip_prefix(':') { - Annotation::Type(s) + Annotation::Type(Cow::Borrowed(s)) } else { unreachable!("regex for Annotation ensures it's either one of three") } diff --git a/contrib/mir/src/parser.rs b/contrib/mir/src/parser.rs index 8833f7c67256..6ee15abce3bd 100644 --- a/contrib/mir/src/parser.rs +++ b/contrib/mir/src/parser.rs @@ -95,7 +95,7 @@ mod tests { assert_eq!(parse("EQ").unwrap(), app!(EQ)); assert_eq!( parse("EQ @a").unwrap(), - Micheline::App(Prim::EQ, &[], [Annotation::Variable("a")].into()) + Micheline::App(Prim::EQ, &[], [Annotation::Variable("a".into())].into()) ); } @@ -182,7 +182,7 @@ mod tests { Ok(Micheline::App( Prim::int, &[], - [Annotation::Type("p")].into() + [Annotation::Type("p".into())].into() )) ); assert_eq!( @@ -190,10 +190,10 @@ mod tests { Ok(Micheline::App( Prim::pair, &[ - Micheline::App(Prim::int, &[], [Annotation::Type("x_pos")].into()), - Micheline::App(Prim::int, &[], [Annotation::Type("y_pos")].into()), + Micheline::App(Prim::int, &[], [Annotation::Type("x_pos".into())].into()), + Micheline::App(Prim::int, &[], [Annotation::Type("y_pos".into())].into()), ], - [Annotation::Type("point")].into() + [Annotation::Type("point".into())].into() )) ); assert_eq!( @@ -201,7 +201,7 @@ mod tests { Ok(Micheline::App( Prim::string, &[], - [Annotation::Field("foo")].into() + [Annotation::Field("foo".into())].into() )) ); assert_eq!( @@ -210,9 +210,9 @@ mod tests { Prim::string, &[], [ - Annotation::Field("foo"), - Annotation::Type("bar"), - Annotation::Variable("baz") + Annotation::Field("foo".into()), + Annotation::Type("bar".into()), + Annotation::Variable("baz".into()) ] .into() )) @@ -222,7 +222,7 @@ mod tests { Ok(Micheline::App( Prim::string, &[], - [Annotation::Variable("foo")].into() + [Annotation::Variable("foo".into())].into() )) ); assert_eq!( @@ -230,10 +230,10 @@ mod tests { Ok(Micheline::App( Prim::pair, &[ - Micheline::App(Prim::int, &[], [Annotation::Field("b")].into()), - Micheline::App(Prim::int, &[], [Annotation::Field("c")].into()), + Micheline::App(Prim::int, &[], [Annotation::Field("b".into())].into()), + Micheline::App(Prim::int, &[], [Annotation::Field("c".into())].into()), ], - [Annotation::Field("a")].into() + [Annotation::Field("a".into())].into() )) ); assert_eq!( @@ -241,10 +241,10 @@ mod tests { Ok(Micheline::App( Prim::or, &[ - Micheline::App(Prim::int, &[], [Annotation::Field("b")].into()), - Micheline::App(Prim::int, &[], [Annotation::Field("c")].into()), + Micheline::App(Prim::int, &[], [Annotation::Field("b".into())].into()), + Micheline::App(Prim::int, &[], [Annotation::Field("c".into())].into()), ], - [Annotation::Field("a")].into() + [Annotation::Field("a".into())].into() )) ); assert_eq!( @@ -267,9 +267,9 @@ mod tests { Prim::PUSH, &[app!(int), 1.into()], [ - Annotation::Variable("var"), - Annotation::Type("ty"), - Annotation::Field("field") + Annotation::Variable("var".into()), + Annotation::Type("ty".into()), + Annotation::Field("field".into()) ] .into() ), @@ -280,12 +280,12 @@ mod tests { Prim::CAR, &[], [ - Annotation::Variable("var"), - Annotation::Type("ty"), - Annotation::Field("field"), - Annotation::Type("ty.2"), - Annotation::Variable("var.2"), - Annotation::Field("field.2"), + Annotation::Variable("var".into()), + Annotation::Type("ty".into()), + Annotation::Field("field".into()), + Annotation::Type("ty.2".into()), + Annotation::Variable("var.2".into()), + Annotation::Field("field.2".into()), ] .into() ), @@ -342,7 +342,7 @@ mod tests { Micheline::App( Prim::contract, &[app!(unit)], - [Annotation::Type("ct"), Annotation::Field("foo")].into(), + [Annotation::Type("ct".into()), Annotation::Field("foo".into())].into(), ), app!(Unit) ]) -- GitLab From 9d940633dcf6c33b3c9659d0fcb6b6ba2379f5fa Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 11 Dec 2023 23:22:43 +0300 Subject: [PATCH 2/7] MIR: unpack infra --- contrib/mir/Cargo.lock | 35 +- contrib/mir/Cargo.toml | 4 + contrib/mir/src/lexer.rs | 30 +- contrib/mir/src/serializer.rs | 1 + contrib/mir/src/serializer/decode.rs | 544 ++++++++++++++++++ .../mir/src/serializer/integration_tests.rs | 10 +- 6 files changed, 601 insertions(+), 23 deletions(-) create mode 100644 contrib/mir/src/serializer/decode.rs diff --git a/contrib/mir/Cargo.lock b/contrib/mir/Cargo.lock index fd0763c366ab..61e56fab5d3b 100644 --- a/contrib/mir/Cargo.lock +++ b/contrib/mir/Cargo.lock @@ -528,6 +528,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -797,6 +803,7 @@ name = "mir" version = "0.1.0" dependencies = [ "base58 0.2.0", + "bitvec", "blst", "checked", "chrono", @@ -808,6 +815,9 @@ dependencies = [ "logos", "num-bigint", "num-traits", + "smallvec", + "strum 0.25.0", + "strum_macros 0.25.3", "tezos_crypto_rs", "tezos_data_encoding", "thiserror", @@ -1324,18 +1334,37 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" + [[package]] name = "strum_macros" version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149" dependencies = [ - "heck", + "heck 0.3.3", "proc-macro2", "quote", "syn 1.0.109", ] +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.33", +] + [[package]] name = "subtle" version = "2.4.1" @@ -1414,8 +1443,8 @@ dependencies = [ "proptest", "rand 0.7.3", "serde", - "strum", - "strum_macros", + "strum 0.20.0", + "strum_macros 0.20.1", "thiserror", "zeroize", ] diff --git a/contrib/mir/Cargo.toml b/contrib/mir/Cargo.toml index 16044ffcdd48..2100c59d2833 100644 --- a/contrib/mir/Cargo.toml +++ b/contrib/mir/Cargo.toml @@ -22,6 +22,10 @@ num-traits = "0.2" chrono = "0.4" integer-sqrt = "0.1" blst = "0.3" +bitvec = "1.0" +strum = "0.25" +strum_macros = "0.25" +smallvec = { version = "1.11", features = [ "const_new" ] } [[bin]] name = "tzt_runner" diff --git a/contrib/mir/src/lexer.rs b/contrib/mir/src/lexer.rs index 8efb87fcf9aa..04bd8bcbdc26 100644 --- a/contrib/mir/src/lexer.rs +++ b/contrib/mir/src/lexer.rs @@ -14,6 +14,7 @@ pub mod macros; pub use errors::*; use macros::*; use num_bigint::BigInt; +use strum_macros::EnumCount; /// Expand to the first argument if not empty; otherwise, the second argument. macro_rules! coalesce { @@ -30,7 +31,7 @@ macro_rules! coalesce { /// representation of the identifiers. macro_rules! defprim { ($ty:ident; $($(#[token($str:expr)])? $prim:ident),* $(,)*) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] + #[derive(Debug, Clone, Copy, PartialEq, Eq, EnumCount)] #[allow(non_camel_case_types, clippy::upper_case_acronyms)] #[repr(u8)] pub enum $ty { @@ -171,6 +172,18 @@ impl std::fmt::Display for Annotation<'_> { } } +pub(crate) fn try_ann_from_str(value: &str) -> Option { + match value { + s @ ("@%" | "@%%" | "%@") => Some(Annotation::Special(Cow::Borrowed(s))), + s => match s.as_bytes()[0] { + b'@' => Some(Annotation::Variable(Cow::Borrowed(&s[1..]))), + b'%' => Some(Annotation::Field(Cow::Borrowed(&s[1..]))), + b':' => Some(Annotation::Type(Cow::Borrowed(&s[1..]))), + _ => None, + }, + } +} + #[derive(Debug, Clone, PartialEq, Eq, Logos)] #[logos(error = LexerError, skip r"[ \t\r\n\v\f]+|#[^\n]*\n")] pub enum Tok<'a> { @@ -304,20 +317,7 @@ fn lex_bytes(lex: &mut Lexer) -> Result, LexerError> { } fn lex_annotation<'a>(lex: &mut Lexer<'a>) -> Annotation<'a> { - match lex.slice() { - s @ ("@%" | "@%%" | "%@") => Annotation::Special(Cow::Borrowed(s)), - s => { - if let Some(s) = s.strip_prefix('@') { - Annotation::Variable(Cow::Borrowed(s)) - } else if let Some(s) = s.strip_prefix('%') { - Annotation::Field(Cow::Borrowed(s)) - } else if let Some(s) = s.strip_prefix(':') { - Annotation::Type(Cow::Borrowed(s)) - } else { - unreachable!("regex for Annotation ensures it's either one of three") - } - } - } + try_ann_from_str(lex.slice()).expect("regex from annotation ensures it's valid") } #[cfg(test)] diff --git a/contrib/mir/src/serializer.rs b/contrib/mir/src/serializer.rs index 91a1f8942a4b..554fb52a4dc5 100644 --- a/contrib/mir/src/serializer.rs +++ b/contrib/mir/src/serializer.rs @@ -1,2 +1,3 @@ +pub mod decode; pub mod encode; mod integration_tests; diff --git a/contrib/mir/src/serializer/decode.rs b/contrib/mir/src/serializer/decode.rs new file mode 100644 index 000000000000..f5909fa706fc --- /dev/null +++ b/contrib/mir/src/serializer/decode.rs @@ -0,0 +1,544 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* Copyright (c) [2022-2023] TriliTech */ +/* */ +/******************************************************************************/ + +//! Micheline deserialization. + +use bitvec::{order::Lsb0, vec::BitVec, view::BitView}; +use num_bigint::{BigInt, Sign}; +use smallvec::{smallvec, SmallVec}; +use strum::EnumCount; +use typed_arena::Arena; + +use crate::{ + ast::{ + annotations::{Annotations, NO_ANNS}, + Micheline, + }, + lexer::{try_ann_from_str, Annotation, Prim}, +}; + +#[derive(PartialEq, Debug, Clone, Copy, thiserror::Error)] +pub enum DecodeError { + #[error("trailing bytes after decoding the value")] + TrailingBytes, + #[error("PACK tag 0x05 not found")] + NoPackTag, + #[error("expected more data, but got EOF")] + UnexpectedEOF, + #[error("unknown tag: {0}")] + UnknownTag(u8), + #[error("forbidden character in string")] + ForbiddenStringCharacter, + #[error("unknown primitive tag: {0}")] + UnknownPrim(u8), + #[error("could not decode annotation")] + BadAnnotation, +} + +/// Prefix denoting an encoded number. +const NUMBER_TAG: u8 = 0x00; +/// Prefix denoting an encoded string. +const STRING_TAG: u8 = 0x01; +/// Prefix denoting an encoded sequence. +const SEQ_TAG: u8 = 0x02; +/// Prefix denoting an encoded bytes sequence. +const BYTES_TAG: u8 = 0x0a; + +// Tags for [Michelson::App]. +const APP_NO_ARGS_NO_ANNOTS_TAG: u8 = 0x03; +const APP_NO_ARGS_WITH_ANNOTS_TAG: u8 = 0x04; +const APP_ONE_ARG_NO_ANNOTS_TAG: u8 = 0x05; +const APP_ONE_ARG_WITH_ANNOTS_TAG: u8 = 0x06; +const APP_TWO_ARGS_NO_ANNOTS_TAG: u8 = 0x07; +const APP_TWO_ARGS_WITH_ANNOTS_TAG: u8 = 0x08; +const APP_GENERIC: u8 = 0x09; + +/// If the number of arguments is small, an allocation-avoiding optimization is +/// used. This constant specifies the upper bound for the number of arguments +/// where it triggers. +/// At most we expect primitives with 3 arguments. +const EXPECTED_MAX_APP_ARGS: usize = 3; + +/// If the number of arguments is small, an allocation-avoiding optimization is +/// used. This constant specifies the upper bound for the number of sequence +/// elements where it triggers. +/// 3 elements doesn't waste too much stack space and seems like a reasonable +/// optimization for small sequences. +const EXPECTED_MAX_SEQ_ELTS: usize = 3; + +impl<'a> Micheline<'a> { + /// Decode raw binary data. Same as `decode_packed`, but doesn't expect the + /// first byte to be `0x05` tag. + pub fn decode_raw( + arena: &'a Arena>, + bytes: &[u8], + ) -> Result, DecodeError> { + let mut it = bytes.into(); + let res = decode_micheline(arena, &mut it)?; + if it.peek().is_some() { + // didn't consume bytes entirely, fail + return Err(DecodeError::TrailingBytes); + } + Ok(res) + } + + /// Decode data that was previously `PACK`ed. Checks for `0x05` tag as the + /// first byte and strips it. + pub fn decode_packed( + arena: &'a Arena>, + bytes: &[u8], + ) -> Result, DecodeError> { + // PACK marker + if bytes.first() != Some(&0x05) { + return Err(DecodeError::NoPackTag); + } + Micheline::decode_raw(arena, &bytes[1..]) + } +} + +struct BytesIt<'a>(&'a [u8]); + +impl<'a> BytesIt<'a> { + fn take(&mut self, num: usize) -> Option<&'a [u8]> { + if self.0.len() < num { + return None; + } + let (cur, rest) = self.0.split_at(num); + self.0 = rest; + Some(cur) + } + + fn take_const(&mut self) -> Option<&'a [u8; N]> { + self.take(N).map(|x| x.try_into().unwrap()) + } + + fn next(&mut self) -> Option { + self.next_ref().copied() + } + + fn next_ref(&mut self) -> Option<&u8> { + if self.0.is_empty() { + return None; + } + let res = &self.0[0]; + self.0 = &self.0[1..]; + Some(res) + } + + fn peek(&self) -> Option { + self.0.first().copied() + } +} + +impl<'a> From<&'a [u8]> for BytesIt<'a> { + fn from(value: &'a [u8]) -> Self { + BytesIt(value) + } +} + +enum NumArgs { + Zero, + One, + Two, + Many, +} + +fn decode_micheline<'a>( + arena: &'a Arena>, + bytes: &mut BytesIt, +) -> Result, DecodeError> { + match bytes.next() { + None => Err(DecodeError::UnexpectedEOF), + Some(b) => match b { + NUMBER_TAG => decode_int(bytes), + STRING_TAG => decode_string(bytes), + SEQ_TAG => decode_seq(arena, bytes), + BYTES_TAG => decode_bytes(bytes), + APP_NO_ARGS_NO_ANNOTS_TAG => decode_app(NumArgs::Zero, false, arena, bytes), + APP_NO_ARGS_WITH_ANNOTS_TAG => decode_app(NumArgs::Zero, true, arena, bytes), + APP_ONE_ARG_NO_ANNOTS_TAG => decode_app(NumArgs::One, false, arena, bytes), + APP_ONE_ARG_WITH_ANNOTS_TAG => decode_app(NumArgs::One, true, arena, bytes), + APP_TWO_ARGS_NO_ANNOTS_TAG => decode_app(NumArgs::Two, false, arena, bytes), + APP_TWO_ARGS_WITH_ANNOTS_TAG => decode_app(NumArgs::Two, true, arena, bytes), + APP_GENERIC => decode_app(NumArgs::Many, true, arena, bytes), + b => Err(DecodeError::UnknownTag(b)), + }, + } +} + +fn get_len(bytes: &mut BytesIt) -> Result { + Ok(u32::from_be_bytes( + *bytes.take_const::<4>().ok_or(DecodeError::UnexpectedEOF)?, + )) +} + +fn decode_int(bytes: &mut BytesIt) -> Result, DecodeError> { + let mut bitvec: BitVec = BitVec::new(); + let mut sign = Sign::Plus; + let mut first = true; + loop { + let bits = bytes + .next_ref() + .ok_or(DecodeError::UnexpectedEOF)? + .view_bits::(); + let data_len = if first { + sign = if bits[6] { Sign::Minus } else { Sign::Plus }; + first = false; + 6 + } else { + 7 + }; + bitvec.extend_from_bitslice(&bits[..data_len]); + if !bits[7] { + break; + } + } + bitvec.set_uninitialized(false); + return Ok(Micheline::Int(BigInt::from_bytes_le( + sign, + &bitvec.into_vec(), + ))); +} + +fn get_bytes<'a>(bytes: &mut BytesIt<'a>) -> Result<&'a [u8], DecodeError> { + let len = get_len(bytes)? as usize; + bytes.take(len).ok_or(DecodeError::UnexpectedEOF) +} + +fn validate_str(bytes: &[u8]) -> Result<&str, DecodeError> { + // check if all characters are printable ASCII + if !bytes + .iter() + .all(|c| matches!(c, b' '..=b'~' | b'\n' | b'\r')) + { + return Err(DecodeError::ForbiddenStringCharacter); + } + // SAFETY: we just checked all characters are ASCII. + Ok(unsafe { std::str::from_utf8_unchecked(bytes) }) +} + +fn decode_string(bytes: &mut BytesIt) -> Result, DecodeError> { + Ok(Micheline::String( + validate_str(get_bytes(bytes)?)?.to_owned(), + )) +} + +fn decode_bytes(bytes: &mut BytesIt) -> Result, DecodeError> { + Ok(Micheline::Bytes(get_bytes(bytes)?.to_vec())) +} + +fn decode_seq_raw<'a, const EXPECTED_MAX_ELTS: usize>( + arena: &'a Arena>, + bytes: &mut BytesIt, +) -> Result; EXPECTED_MAX_ELTS]>, DecodeError> { + let mut bytes: BytesIt = get_bytes(bytes)?.into(); + let mut buf = SmallVec::new(); + while bytes.peek().is_some() { + buf.push(decode_micheline(arena, &mut bytes)?); + } + Ok(buf) +} + +fn decode_seq<'a>( + arena: &'a Arena>, + bytes: &mut BytesIt, +) -> Result, DecodeError> { + let buf = decode_seq_raw::(arena, bytes)?; + let res = Micheline::Seq(arena.alloc_extend(buf)); + Ok(res) +} + +fn validate_ann(bytes: &[u8]) -> Result, DecodeError> { + // @%|@%%|%@|[@:%][_0-9a-zA-Z][_0-9a-zA-Z\.%@]* + macro_rules! alpha_num { + () => { + b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' + } + } + match bytes { + b"@%" | b"@%%" | b"%@" => {} + [b'@' | b':' | b'%', alpha_num!(), rest @ ..] + if rest + .iter() + .all(|c| matches!(c, alpha_num!() | b'.' | b'%' | b'@')) => {} + _ => return Err(DecodeError::BadAnnotation), + } + // SAFETY: we just checked all bytes are ASCII + let str = unsafe { std::str::from_utf8_unchecked(bytes) }; + // unwrap is fine, we effectively validated against a regex + Ok(try_ann_from_str(str).unwrap().into_owned()) +} + +fn decode_app<'a>( + num_args: NumArgs, + annotations: bool, + arena: &'a Arena>, + bytes: &mut BytesIt, +) -> Result, DecodeError> { + let prim = bytes.next().ok_or(DecodeError::UnexpectedEOF)?; + if prim as usize >= Prim::COUNT { + return Err(DecodeError::UnknownPrim(prim)); + } + // SAFETY: Prim is repr(u8), and we checked it's within bounds. + let prim: Prim = unsafe { std::mem::transmute(prim) }; + let args: SmallVec<[_; EXPECTED_MAX_APP_ARGS]> = match num_args { + NumArgs::Zero => SmallVec::new(), + NumArgs::One => smallvec![decode_micheline(arena, bytes)?], + NumArgs::Two => smallvec![ + decode_micheline(arena, bytes)?, + decode_micheline(arena, bytes)?, + ], + NumArgs::Many => decode_seq_raw(arena, bytes)?, + }; + let anns = if annotations { + let str = get_bytes(bytes)?; + if str.is_empty() { + NO_ANNS + } else { + str.split(|c| c == &b' ') + .map(validate_ann) + .collect::>()? + } + } else { + NO_ANNS + }; + Ok(Micheline::App(prim, arena.alloc_extend(args), anns)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[track_caller] + fn check<'a>(v: impl Into>, hex_bytes: &str) { + let arena = Arena::new(); + let hex_bytes: &str = hex_bytes + .strip_prefix("0x") + .expect("The `expected` argument must start from 0x"); + assert_eq!( + Micheline::decode_raw( + &arena, + &hex::decode(hex_bytes).expect("Bad hex string in `expected` argument") + ), + Ok(v.into()) + ); + } + + fn check_err(hex_bytes: &str, err: DecodeError) { + let arena = Arena::new(); + let hex_bytes: &str = hex_bytes + .strip_prefix("0x") + .expect("The `expected` argument must start from 0x"); + assert_eq!( + Micheline::decode_raw( + &arena, + &hex::decode(hex_bytes).expect("Bad hex string in `expected` argument") + ), + Err(err) + ); + } + // To figure out the expected bytes, use + // octez-client convert data 'VALUE' from michelson to binary + + mod value { + use crate::ast::micheline::test_helpers::{app, seq}; + + use super::*; + + #[test] + fn primitive_values() { + check((), "0x030b"); + check(true, "0x030a"); + check(false, "0x0303"); + } + + #[test] + fn errors() { + check_err("0x030b00", DecodeError::TrailingBytes); + check_err("0x", DecodeError::UnexpectedEOF); + check_err("0x03", DecodeError::UnexpectedEOF); + check_err("0x02", DecodeError::UnexpectedEOF); + check_err("0x09", DecodeError::UnexpectedEOF); + check_err("0xff", DecodeError::UnknownTag(0xff)); + check_err("0x03ff", DecodeError::UnknownPrim(0xff)); + check_err("0x010000000100", DecodeError::ForbiddenStringCharacter); + } + + mod number { + use super::*; + + #[test] + fn zero() { + check(0, "0x0000"); + } + + #[test] + fn few_trivial_samples() { + check(1, "0x0001"); + check(13, "0x000d"); + } + + #[test] + fn largest_1_byte_long() { + check(63, "0x003f"); + } + + #[test] + fn smallest_2_bytes_long() { + check(64, "0x008001"); + } + + #[test] + fn large() { + check(123456789, "0x0095b4de75"); + } + + #[test] + fn negative() { + check(-1, "0x0041"); + check(-36, "0x0064"); + } + + // Don't mind this "largest", it is in absolute numeric value sense + #[test] + fn negative_largest_1_byte_long() { + check(-63, "0x007f"); + } + + #[test] + fn negative_smallest_2_bytes_long() { + check(-64, "0x00c001"); + } + + #[test] + fn negative_large() { + check(-987654321, "0x00f1a2f3ad07"); + } + } + + #[test] + fn simple_nested() { + check(app!(Pair[true, ""]), "0x0707030a0100000000"); + check(app!(None[]), "0x0306"); + check(app!(Some[app!(Unit)]), "0x0509030b"); + check(app!(Elt[true, ()]), "0x0704030a030b"); + check( + seq! { app!(DROP); app!(LAMBDA[app!(unit), app!(unit), seq!{}]) }, + "0x02000000150320093100000009036c036c020000000000000000", + ); + } + + #[test] + fn string() { + check("", "0x0100000000"); + check("abc", "0x0100000003616263"); + check( + "123456789123456789123456789", + "0x010000001b313233343536373839313233343536373839313233343536373839", + ); + } + + #[test] + fn very_long_string() { + // Using "\"$(printf 'x%.0s' {1..1000})\"" as a value + // Verifies that length is encoded as a fixed-length number, not as zarith + check( + "x".repeat(1000), + "0x01000003e878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878787878" + ); + } + + #[test] + fn bytes() { + check(hex::decode("").unwrap(), "0x0a00000000"); + check(hex::decode("001234abff").unwrap(), "0x0a00000005001234abff"); + } + + #[test] + fn list() { + check(seq! {}, "0x0200000000"); + check(seq! {true; false}, "0x0200000004030a0303"); + } + + #[test] + fn deeply_nested_list() { + check( + seq! {seq!{}; seq!{true}}, + "0x020000000c02000000000200000002030a", + ); + } + + #[test] + fn list_with_applications() { + check( + seq! {app!(Pair[3, 4]); app!(Pair[5, 6])}, + "0x020000000c070700030004070700050006", + ) + } + + #[test] + fn very_long_list() { + // Using "{ $(printf 'Unit;%.0s' {1..1000}) }" as a value + // Verifies that length is encoded as a fixed-length number, not as zarith + check( + Micheline::Seq(&vec![app!(Unit); 1000]), + "0x02000007d0030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b030b", + ); + } + } + + mod annotations { + use crate::parser::test_helpers::*; + + use super::*; + + #[test] + fn trivial() { + check(parse("(int %a)").unwrap(), "0x045b000000022561"); + check(parse("(int :a)").unwrap(), "0x045b000000023a61"); + check( + parse("(int @abc123)").unwrap(), + "0x045b0000000740616263313233", + ); + } + + #[test] + fn several_annotations() { + check( + parse("(int %a :b @c %d)").unwrap(), + "0x045b0000000b2561203a62204063202564", + ); + } + + #[test] + fn nested_entries() { + check( + parse("(pair %a (int %b))").unwrap(), + "0x0665045b000000022562000000022561", + ); + } + + #[test] + fn generic_case() { + check( + parse("LAMBDA (int %a) (int :b) {}").unwrap(), + "0x093100000015045b000000022561045b000000023a62020000000000000000", + ); + check( + parse("LAMBDA (int %a %b %c %d) int {}").unwrap(), + "0x093100000018045b0000000b2561202562202563202564035b020000000000000000", + ); + } + + #[test] + fn bad_annotations() { + check_err("0x045b00000002257f", DecodeError::BadAnnotation); + check_err("0x045b000000026161", DecodeError::BadAnnotation); + } + } +} diff --git a/contrib/mir/src/serializer/integration_tests.rs b/contrib/mir/src/serializer/integration_tests.rs index 3aab3bd1c4a4..134e93a5f1b5 100644 --- a/contrib/mir/src/serializer/integration_tests.rs +++ b/contrib/mir/src/serializer/integration_tests.rs @@ -14,7 +14,7 @@ mod test_typed_encode { use typed_arena::Arena; - use crate::ast::{byte_repr_trait::*, IntoMicheline, KeyHash}; + use crate::ast::{byte_repr_trait::*, IntoMicheline, KeyHash, Micheline}; use crate::ast::{Address, TypedValue}; // Expected bytes to be produced with @@ -29,12 +29,12 @@ mod test_typed_encode { .strip_prefix("0x") .expect("The `expected` argument must start from 0x"); + let bytes = &hex::decode(hex_bytes).expect("Bad hex string in `expected` argument"); + let arena = Arena::new(); let micheline = v.into_micheline_optimized_legacy(&arena); - assert_eq!( - micheline.encode_for_pack(), - hex::decode(hex_bytes).expect("Bad hex string in `expected` argument") - ) + assert_eq!(&micheline.encode_for_pack(), bytes); + assert_eq!(Micheline::decode_packed(&arena, bytes), Ok(micheline),); } #[test] -- GitLab From beb279361f7de287f67dbf4c86280a64cb2527d5 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Mon, 18 Dec 2023 16:51:09 +0300 Subject: [PATCH 3/7] MIR: pass Micheline arena through the interpreter --- contrib/mir/src/ast/michelson_lambda.rs | 12 +++-- contrib/mir/src/interpreter.rs | 62 +++++++++++++++++-------- contrib/mir/src/lib.rs | 46 +++++++++++++----- contrib/mir/src/tzt.rs | 15 ++++-- contrib/mir/tzt_runner/main.rs | 9 +++- 5 files changed, 104 insertions(+), 40 deletions(-) diff --git a/contrib/mir/src/ast/michelson_lambda.rs b/contrib/mir/src/ast/michelson_lambda.rs index 237a6b0ab6b5..c1dfd2a760d6 100644 --- a/contrib/mir/src/ast/michelson_lambda.rs +++ b/contrib/mir/src/ast/michelson_lambda.rs @@ -131,12 +131,14 @@ mod tests { #[test] fn apply_micheline() { let parser = Parser::new(); + let arena = Arena::new(); let code = parser.parse("{ LAMBDA (pair int nat unit) unit { DROP; UNIT }; PUSH int 1; APPLY; PUSH nat 2; APPLY }").unwrap(); let code = code .typecheck_instruction(&mut Ctx::default(), None, &[]) .unwrap(); let mut stack = stk![]; - code.interpret(&mut Ctx::default(), &mut stack).unwrap(); + code.interpret(&mut Ctx::default(), &arena, &mut stack) + .unwrap(); let closure = irrefutable_match!(stack.pop().unwrap(); TypedValue::Lambda); let arena = Arena::new(); assert_eq!( @@ -161,12 +163,14 @@ mod tests { #[test] fn apply_micheline_rec() { let parser = Parser::new(); + let arena = Arena::new(); let code = parser.parse("{ LAMBDA_REC (pair int nat unit) unit { DROP 2; UNIT }; PUSH int 1; APPLY; PUSH nat 2; APPLY }").unwrap(); let code = code .typecheck_instruction(&mut Ctx::default(), None, &[]) .unwrap(); let mut stack = stk![]; - code.interpret(&mut Ctx::default(), &mut stack).unwrap(); + code.interpret(&mut Ctx::default(), &arena, &mut stack) + .unwrap(); let closure = irrefutable_match!(stack.pop().unwrap(); TypedValue::Lambda); let arena = Arena::new(); assert_eq!( @@ -200,6 +204,7 @@ mod tests { // PACK always encodes pair values as right-combs, and always encodes // pair types as a flat sequence. Test we're doing the same. let parser = Parser::new(); + let arena = Arena::new(); let code = parser .parse( r#" @@ -217,7 +222,8 @@ mod tests { .typecheck_instruction(&mut Ctx::default(), None, &[]) .unwrap(); let mut stack = stk![]; - code.interpret(&mut Ctx::default(), &mut stack).unwrap(); + code.interpret(&mut Ctx::default(), &arena, &mut stack) + .unwrap(); let closure = irrefutable_match!(stack.pop().unwrap(); TypedValue::Lambda); let arena = Arena::new(); assert_eq!( diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index 74ad32b7e8a3..b2b0de5b8405 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -51,6 +51,7 @@ impl<'a> ContractScript<'a> { pub fn interpret( &self, ctx: &mut crate::context::Ctx, + arena: &'a Arena>, parameter: Micheline<'a>, storage: Micheline<'a>, ) -> Result<(impl Iterator>, TypedValue<'a>), ContractInterpretError<'a>> @@ -59,7 +60,7 @@ impl<'a> ContractScript<'a> { let storage = typecheck_value(&storage, ctx, &self.storage)?; let tc_val = TypedValue::new_pair(parameter, storage); let mut stack = stk![tc_val]; - self.code.interpret(ctx, &mut stack)?; + self.code.interpret(ctx, arena, &mut stack)?; use TypedValue as V; match stack.pop().expect("empty execution stack") { V::Pair(p) => match *p { @@ -86,19 +87,21 @@ impl<'a> Instruction<'a> { pub fn interpret( &self, ctx: &mut Ctx, + arena: &'a Arena>, stack: &mut IStack<'a>, ) -> Result<(), InterpretError<'a>> { - interpret_one(self, ctx, stack) + interpret_one(self, ctx, arena, stack) } } fn interpret<'a>( ast: &[Instruction<'a>], ctx: &mut Ctx, + arena: &'a Arena>, stack: &mut IStack<'a>, ) -> Result<(), InterpretError<'a>> { for i in ast { - i.interpret(ctx, stack)?; + i.interpret(ctx, arena, stack)?; } ctx.gas.consume(interpret_cost::INTERPRET_RET)?; Ok(()) @@ -114,6 +117,7 @@ fn unreachable_state() -> ! { fn interpret_one<'a>( i: &Instruction<'a>, ctx: &mut Ctx, + arena: &'a Arena>, stack: &mut IStack<'a>, ) -> Result<(), InterpretError<'a>> { use Instruction as I; @@ -447,7 +451,7 @@ fn interpret_one<'a>( ctx.gas.consume(interpret_cost::dip(*opt_height)?)?; let protected_height: u16 = opt_height.unwrap_or(1); let mut protected = stack.split_off(protected_height as usize); - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; ctx.gas.consume(interpret_cost::undip(protected_height)?)?; stack.append(&mut protected); } @@ -508,9 +512,9 @@ fn interpret_one<'a>( I::If(nested_t, nested_f) => { ctx.gas.consume(interpret_cost::IF)?; if pop!(V::Bool) { - interpret(nested_t, ctx, stack)?; + interpret(nested_t, ctx, arena, stack)?; } else { - interpret(nested_f, ctx, stack)?; + interpret(nested_f, ctx, arena, stack)?; } } I::IfNone(when_none, when_some) => { @@ -518,9 +522,9 @@ fn interpret_one<'a>( match pop!(V::Option) { Some(x) => { stack.push(*x); - interpret(when_some, ctx, stack)? + interpret(when_some, ctx, arena, stack)? } - None => interpret(when_none, ctx, stack)?, + None => interpret(when_none, ctx, arena, stack)?, } } I::IfCons(when_cons, when_nil) => { @@ -529,11 +533,11 @@ fn interpret_one<'a>( match lst.uncons() { Some(x) => { stack.push(x); - interpret(when_cons, ctx, stack)? + interpret(when_cons, ctx, arena, stack)? } None => { pop!(); - interpret(when_nil, ctx, stack)?; + interpret(when_nil, ctx, arena, stack)?; } } } @@ -543,11 +547,11 @@ fn interpret_one<'a>( match or { Or::Left(x) => { stack.push(x); - interpret(when_left, ctx, stack)? + interpret(when_left, ctx, arena, stack)? } Or::Right(x) => { stack.push(x); - interpret(when_right, ctx, stack)?; + interpret(when_right, ctx, arena, stack)?; } } } @@ -608,7 +612,7 @@ fn interpret_one<'a>( loop { ctx.gas.consume(interpret_cost::LOOP)?; if pop!(V::Bool) { - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; } else { ctx.gas.consume(interpret_cost::LOOP_EXIT)?; break; @@ -622,7 +626,7 @@ fn interpret_one<'a>( match *pop!(V::Or) { Or::Left(x) => { stack.push(x); - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; } Or::Right(x) => { stack.push(x); @@ -640,7 +644,7 @@ fn interpret_one<'a>( for i in lst { ctx.gas.consume(interpret_cost::PUSH)?; stack.push(i); - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; } } overloads::Iter::Set => { @@ -648,7 +652,7 @@ fn interpret_one<'a>( for v in set { ctx.gas.consume(interpret_cost::PUSH)?; stack.push(v); - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; } } overloads::Iter::Map => { @@ -656,7 +660,7 @@ fn interpret_one<'a>( for (k, v) in map { ctx.gas.consume(interpret_cost::PUSH)?; stack.push(V::new_pair(k, v)); - interpret(nested, ctx, stack)?; + interpret(nested, ctx, arena, stack)?; } } } @@ -995,12 +999,12 @@ fn interpret_one<'a>( // See Note: Rc in lambdas let code = Rc::clone(code); let mut stk = stk![V::Lambda(closure), arg]; - interpret(&code, ctx, &mut stk)?; + interpret(&code, ctx, arena, &mut stk)?; stk } Lambda::Lambda { code, .. } => { let mut stk = stk![arg]; - interpret(code, ctx, &mut stk)?; + interpret(code, ctx, arena, &mut stk)?; stk } }; @@ -1200,7 +1204,7 @@ fn interpret_one<'a>( let res = bls::pairing::pairing_check(it); stack.push(V::Bool(res)); } - I::Seq(nested) => interpret(nested, ctx, stack)?, + I::Seq(nested) => interpret(nested, ctx, arena, stack)?, } Ok(()) } @@ -1224,6 +1228,24 @@ mod interpreter_tests { V::Bytes(hex::decode(hex).unwrap_or_else(|e| panic!("Invalid hex: {e}"))) } + fn interpret<'a>( + ast: &[Instruction<'a>], + ctx: &mut Ctx, + stack: &mut IStack<'a>, + ) -> Result<(), InterpretError<'a>> { + let temp = Box::leak(Box::default()); + super::interpret(ast, ctx, temp, stack) + } + + fn interpret_one<'a>( + i: &Instruction<'a>, + ctx: &mut Ctx, + stack: &mut IStack<'a>, + ) -> Result<(), InterpretError<'a>> { + let temp = Box::leak(Box::default()); + super::interpret_one(i, ctx, temp, stack) + } + #[test] fn test_add() { let mut stack = stk![V::nat(10), V::nat(20)]; diff --git a/contrib/mir/src/lib.rs b/contrib/mir/src/lib.rs index fca2f8ff696b..4e0b88f33328 100644 --- a/contrib/mir/src/lib.rs +++ b/contrib/mir/src/lib.rs @@ -22,6 +22,8 @@ pub mod tzt; #[cfg(test)] mod tests { + use typed_arena::Arena; + use crate::ast::micheline::test_helpers::*; use crate::ast::*; use crate::context::Ctx; @@ -46,7 +48,10 @@ mod tests { .typecheck_instruction(&mut Ctx::default(), None, &[app!(nat)]) .unwrap(); let mut istack = stk![TypedValue::nat(10)]; - assert!(ast.interpret(&mut Ctx::default(), &mut istack).is_ok()); + let temp = Arena::new(); + assert!(ast + .interpret(&mut Ctx::default(), &temp, &mut istack) + .is_ok()); assert!(istack.len() == 1 && istack[0] == TypedValue::int(55)); } @@ -56,7 +61,8 @@ mod tests { let mut ctx = Ctx::default(); let ast = ast.typecheck_instruction(&mut ctx, None, &[]).unwrap(); let mut istack = stk![]; - assert!(ast.interpret(&mut ctx, &mut istack).is_ok()); + let temp = Arena::new(); + assert!(ast.interpret(&mut ctx, &temp, &mut istack).is_ok()); assert_eq!(istack, stk![TypedValue::Mutez(600)]); } @@ -68,8 +74,9 @@ mod tests { .unwrap(); let mut istack = stk![TypedValue::nat(5)]; let mut ctx = Ctx::default(); + let temp = Arena::new(); report_gas(&mut ctx, |ctx| { - assert!(ast.interpret(ctx, &mut istack).is_ok()); + assert!(ast.interpret(ctx, &temp, &mut istack).is_ok()); }); assert_eq!(Gas::default().milligas() - ctx.gas.milligas(), 1287); } @@ -83,8 +90,9 @@ mod tests { let mut istack = stk![TypedValue::nat(5)]; let ctx = &mut Ctx::default(); ctx.gas = Gas::new(1); + let temp = Arena::new(); assert_eq!( - ast.interpret(ctx, &mut istack), + ast.interpret(ctx, &temp, &mut istack), Err(interpreter::InterpretError::OutOfGas(crate::gas::OutOfGas)), ); } @@ -96,7 +104,10 @@ mod tests { .typecheck_instruction(&mut Ctx::default(), None, &[app!(option[app!(nat)])]) .unwrap(); let mut istack = stk![TypedValue::new_option(Some(TypedValue::nat(5)))]; - assert!(ast.interpret(&mut Ctx::default(), &mut istack).is_ok()); + let temp = Arena::new(); + assert!(ast + .interpret(&mut Ctx::default(), &temp, &mut istack) + .is_ok()); assert_eq!(istack, stk![TypedValue::nat(6)]); } @@ -221,12 +232,14 @@ mod tests { let arena = typed_arena::Arena::new(); use crate::lexer::Prim; use Micheline as M; + let temp = Arena::new(); let interp_res = parse_contract_script(VOTE_SRC) .unwrap() .typecheck_script(ctx) .unwrap() .interpret( ctx, + &temp, "foo".into(), M::seq( &arena, @@ -251,15 +264,19 @@ mod tests { use std::collections::HashMap; #[track_caller] - fn run_e2e_test<'a>( - instr: &'a str, + fn run_e2e_test( + instr: &str, input_type_stack: TypeStack, output_type_stack: TypeStack, - mut input_stack: Stack>, - output_stack: Stack>, + input_stack: Stack, + output_stack: Stack, mut ctx: Ctx, ) { - let ast = parse(instr).unwrap(); + let instr = instr.to_owned(); + let temp = Arena::new(); + // NB: required to appease the borrow checker + let mut input_stack = input_stack; + let ast = parse(&instr).unwrap(); let mut input_failing_type_stack = FailingTypeStack::Ok(input_type_stack); let ast = typecheck_instruction(&ast, &mut ctx, None, &mut input_failing_type_stack).unwrap(); @@ -267,7 +284,7 @@ mod tests { input_failing_type_stack, FailingTypeStack::Ok(output_type_stack) ); - assert!(ast.interpret(&mut ctx, &mut input_stack).is_ok()); + assert!(ast.interpret(&mut ctx, &temp, &mut input_stack).is_ok()); assert_eq!(input_stack, output_stack); } @@ -869,6 +886,7 @@ mod multisig_tests { use crate::lexer::Prim; use crate::parser::test_helpers::parse_contract_script; use num_bigint::BigUint; + use typed_arena::Arena; use Type as T; use TypedValue as TV; @@ -954,6 +972,7 @@ mod multisig_tests { let transfer_amount = 123; let transfer_destination = "tz1WrbkDrzKVqcGXkjw4Qk4fXkjXpAJuNP1j"; let signature = "edsigu1GCyS754UrkFLng9P5vG5T51Hs8TcgZoV7fPfj5qeXYzC1JKuUYzyowpfGghEEqUyPxpUdU7WRFrdxad5pnspQg9hwk6v"; + let temp = Arena::new(); let interp_res = parse_contract_script(MULTISIG_SRC) .unwrap() @@ -961,6 +980,7 @@ mod multisig_tests { .unwrap() .interpret( &mut ctx, + &temp, pair( // :payload pair( @@ -1024,6 +1044,7 @@ mod multisig_tests { */ let new_delegate = "tz1V8fDHpHzN8RrZqiYCHaJM9EocsYZch5Cy"; let signature = "edsigtXyZmxgR3MDhDRdtAtopHNNE8rPsPRHgPXurkMacmRLvbLyBCTjtBFNFYHEcLTjx94jdvUf81Wd7uybJNGn5phJYaPAJST"; + let temp = Arena::new(); let interp_res = parse_contract_script(MULTISIG_SRC) .unwrap() @@ -1031,6 +1052,7 @@ mod multisig_tests { .unwrap() .interpret( &mut ctx, + &temp, pair( // :payload pair( @@ -1077,6 +1099,7 @@ mod multisig_tests { let threshold = 1; let new_delegate = "tz1V8fDHpHzN8RrZqiYCHaJM9EocsYZch5Cy"; let invalid_signature = "edsigtt6SusfFFqwKqJNDuZMbhP6Q8f6zu3c3q7W6vPbjYKpv84H3hfXhRyRvAXHzNYSwBNNqjmf5taXKd2ZW3Rbix78bhWjxg5"; + let temp = Arena::new(); let interp_res = parse_contract_script(MULTISIG_SRC) .unwrap() @@ -1084,6 +1107,7 @@ mod multisig_tests { .unwrap() .interpret( &mut ctx, + &temp, pair( // :payload pair( diff --git a/contrib/mir/src/tzt.rs b/contrib/mir/src/tzt.rs index d5f0aed4f11b..d71b203285bd 100644 --- a/contrib/mir/src/tzt.rs +++ b/contrib/mir/src/tzt.rs @@ -7,10 +7,10 @@ mod expectation; -use std::fmt; - use num_bigint::BigInt; use std::collections::HashMap; +use std::fmt; +use typed_arena::Arena; use crate::ast::michelson_address::entrypoint::Entrypoints; use crate::ast::michelson_address::AddressHash; @@ -340,6 +340,7 @@ pub enum TztOutput<'a> { fn execute_tzt_test_code<'a>( code: Micheline<'a>, ctx: &mut Ctx, + arena: &'a Arena>, m_parameter: Option, input: Vec<(Type, TypedValue<'a>)>, ) -> Result<(FailingTypeStack, IStack<'a>), TestError<'a>> { @@ -359,11 +360,14 @@ fn execute_tzt_test_code<'a>( // the test was a success or a fail. let typechecked_code = typecheck_instruction(&code, ctx, Some(¶meter), &mut t_stack)?; let mut i_stack: IStack = TopIsFirst::from(vals).0; - typechecked_code.interpret(ctx, &mut i_stack)?; + typechecked_code.interpret(ctx, arena, &mut i_stack)?; Ok((t_stack, i_stack)) } -pub fn run_tzt_test(test: TztTest) -> Result<(), TztTestError> { +pub fn run_tzt_test<'a>( + test: TztTest<'a>, + arena: &'a Arena>, +) -> Result<(), TztTestError<'a>> { // Here we compare the outcome of the interpreting with the // expectation from the test, and declare the result of the test // accordingly. @@ -383,6 +387,7 @@ pub fn run_tzt_test(test: TztTest) -> Result<(), TztTestError> { test.other_contracts.clone(), ); - let execution_result = execute_tzt_test_code(test.code, &mut ctx, test.parameter, test.input); + let execution_result = + execute_tzt_test_code(test.code, &mut ctx, arena, test.parameter, test.input); check_expectation(&mut ctx, test.output, execution_result) } diff --git a/contrib/mir/tzt_runner/main.rs b/contrib/mir/tzt_runner/main.rs index c2579b899fd8..eefae7012aa4 100644 --- a/contrib/mir/tzt_runner/main.rs +++ b/contrib/mir/tzt_runner/main.rs @@ -10,6 +10,7 @@ use std::fs::read_to_string; use mir::parser::Parser; use mir::tzt::*; +use typed_arena::Arena; fn run_test(file: &str) -> Result<(), String> { let contents = read_to_string(file).map_err(|e| e.to_string())?; @@ -18,7 +19,8 @@ fn run_test(file: &str) -> Result<(), String> { .parse_tzt_test(&contents) .map_err(|e| e.to_string())?; - run_tzt_test(tzt_test).map_err(|e| format!("{}", e)) + let arena = Arena::new(); + run_tzt_test(tzt_test, &arena).map_err(|e| format!("{}", e)) } fn main() { @@ -55,6 +57,11 @@ mod tztrunner_tests { parser.parse_tzt_test(s) } + pub fn run_tzt_test(test: TztTest) -> Result<(), TztTestError> { + let temp = Box::leak(Box::default()); + mir::tzt::run_tzt_test(test, temp) + } + #[test] fn test_runner_success() { let tzt_test = parse_tzt_test(TZT_SAMPLE_ADD).unwrap(); -- GitLab From 5425f9afc265ded426b1ae28d993ffb4e74908ed Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 12 Dec 2023 18:38:11 +0300 Subject: [PATCH 4/7] MIR: UNPACK instruction --- contrib/mir/src/ast.rs | 1 + contrib/mir/src/ast/micheline.rs | 3 +- contrib/mir/src/gas.rs | 5 +++ contrib/mir/src/interpreter.rs | 34 ++++++++++++++++++ contrib/mir/src/typechecker.rs | 60 ++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 2 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 81133a300610..353bc5f7bb56 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -496,6 +496,7 @@ pub enum Instruction<'a> { /// `ISelf` because `Self` is a reserved keyword ISelf(Entrypoint), Pack, + Unpack(Type), CheckSignature, TransferTokens, SetDelegate, diff --git a/contrib/mir/src/ast/micheline.rs b/contrib/mir/src/ast/micheline.rs index 7796b7ca78e3..f178c2a64ed8 100644 --- a/contrib/mir/src/ast/micheline.rs +++ b/contrib/mir/src/ast/micheline.rs @@ -202,8 +202,7 @@ macro_rules! micheline_fields { /// supported. Useful for total match in the typechecker. macro_rules! micheline_unsupported_instructions { () => { - Prim::UNPACK - | Prim::SUB + Prim::SUB | Prim::EDIV | Prim::LSL | Prim::LSR diff --git a/contrib/mir/src/gas.rs b/contrib/mir/src/gas.rs index 69fef61ca429..c0ae7814bce8 100644 --- a/contrib/mir/src/gas.rs +++ b/contrib/mir/src/gas.rs @@ -790,6 +790,11 @@ pub mod interpret_cost { let size = Checked::from(int.byte_size()); (75 + (size * 3)).as_gas_cost() } + + pub fn unpack(bytes: &[u8]) -> Result { + let size = Checked::from(bytes.len()); + (260 + (size >> 1)).as_gas_cost() + } } #[cfg(test)] diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index b2b0de5b8405..0c5f50454a29 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -890,6 +890,15 @@ fn interpret_one<'a>( let encoded = mich.encode_for_pack(); stack.push(V::Bytes(encoded)); } + I::Unpack(ty) => { + let bytes = pop!(V::Bytes); + ctx.gas.consume(interpret_cost::unpack(bytes.as_slice())?)?; + let mut try_unpack = || -> Option { + let mich = Micheline::decode_packed(arena, &bytes).ok()?; + crate::interpreter::typecheck_value(&mich, ctx, ty).ok() + }; + stack.push(V::new_option(try_unpack())); + } I::CheckSignature => { let key = pop!(V::Key); let sig = pop!(V::Signature); @@ -4430,4 +4439,29 @@ mod interpreter_tests { assert!(interpret_one(&Dug(4), &mut ctx, &mut stack).is_ok()); assert_eq!(stack, expected_stack); } + + #[test] + fn unpack() { + let mut stack = stk![V::Bytes(hex::decode("0500f1a2f3ad07").unwrap())]; + let ctx = &mut Ctx::default(); + assert_eq!(interpret_one(&Unpack(Type::Int), ctx, &mut stack), Ok(())); + assert_eq!(stack, stk![V::new_option(Some(V::int(-987654321)))]); + assert!(ctx.gas.milligas() < Ctx::default().gas.milligas()); + } + + #[test] + fn unpack_bad_input() { + let mut stack = stk![V::Bytes(hex::decode("05ffff").unwrap())]; + let ctx = &mut Ctx::default(); + assert_eq!(interpret_one(&Unpack(Type::Int), ctx, &mut stack), Ok(())); + assert_eq!(stack, stk![V::new_option(None)]); + } + + #[test] + fn unpack_bad_type() { + let mut stack = stk![V::Bytes(hex::decode("0500f1a2f3ad07").unwrap())]; + let ctx = &mut Ctx::default(); + assert_eq!(interpret_one(&Unpack(Type::Unit), ctx, &mut stack), Ok(())); + assert_eq!(stack, stk![V::new_option(None)]); + } } diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs index 807131db18c5..6d114a77175e 100644 --- a/contrib/mir/src/typechecker.rs +++ b/contrib/mir/src/typechecker.rs @@ -1412,6 +1412,22 @@ pub(crate) fn typecheck_instruction<'a>( (App(PACK, [], _), []) => no_overload!(PACK, len 1), (App(PACK, expect_args!(0), _), _) => unexpected_micheline!(), + (App(UNPACK, [ty], _), [.., T::Bytes]) => { + let ty = parse_ty(ctx, ty)?; + // NB: one would suppose the type needs to be packable, but that's + // not quite correct, as `contract _` is forbidden. The correct + // constraint is seemingly "pushable", as "pushable" is just + // "packable" without `contract _` + ty.ensure_prop(&mut ctx.gas, TypeProperty::Pushable)?; + stack[0] = T::new_option(ty.clone()); + I::Unpack(ty) + } + (App(UNPACK, [_], _), [.., ty]) => { + no_overload!(UNPACK, TypesNotEqual(T::Bytes, ty.clone())) + } + (App(UNPACK, [_], _), []) => no_overload!(UNPACK, len 1), + (App(UNPACK, expect_args!(1), _), _) => unexpected_micheline!(), + (App(TRANSFER_TOKENS, [], _), [.., T::Contract(ct), T::Mutez, arg_t]) => { ensure_ty_eq(&mut ctx.gas, ct, arg_t)?; stack.drop_top(3); @@ -6623,4 +6639,48 @@ mod typecheck_tests { &tc_stk![Type::Unit, Type::Int, Type::String, Type::Nat] ); } + + #[test] + fn unpack() { + let stk = &mut tc_stk![Type::Bytes]; + assert_eq!( + typecheck_instruction(&parse("UNPACK int").unwrap(), &mut Ctx::default(), stk), + Ok(Unpack(Type::Int)) + ); + assert_eq!(stk, &tc_stk![Type::new_option(Type::Int)]); + } + + #[test] + fn unpack_contract() { + let stk = &mut tc_stk![Type::Bytes]; + assert_eq!( + typecheck_instruction( + &parse("UNPACK (contract unit)").unwrap(), + &mut Ctx::default(), + stk + ), + Err(TcError::InvalidTypeProperty( + TypeProperty::Pushable, + Type::new_contract(Type::Unit) + )) + ); + } + + #[test] + fn unpack_bad_stack() { + let stk = &mut tc_stk![Type::Unit]; + assert_eq!( + typecheck_instruction(&parse("UNPACK int").unwrap(), &mut Ctx::default(), stk), + Err(TcError::NoMatchingOverload { + instr: Prim::UNPACK, + stack: stk![Type::Unit], + reason: Some(TypesNotEqual(Type::Bytes, Type::Unit).into()) + }) + ); + } + + #[test] + fn unpack_short_stack() { + too_short_test(&app!(UNPACK[app!(unit)]), Prim::UNPACK, 1) + } } -- GitLab From bd36a77fd5cfaf6f8182ae44608ad92fc1373dd6 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 19 Dec 2023 22:09:06 +0300 Subject: [PATCH 5/7] MIR: implement size_hint for MichelsonList iterators --- contrib/mir/src/ast/michelson_list.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/mir/src/ast/michelson_list.rs b/contrib/mir/src/ast/michelson_list.rs index 6c6665f272d8..166a474b65f8 100644 --- a/contrib/mir/src/ast/michelson_list.rs +++ b/contrib/mir/src/ast/michelson_list.rs @@ -50,6 +50,9 @@ impl Iterator for IntoIter { fn next(&mut self) -> Option { self.0.next() } + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } } pub struct Iter<'a, T>(std::iter::Rev>); @@ -59,6 +62,9 @@ impl<'a, T> Iterator for Iter<'a, T> { fn next(&mut self) -> Option { self.0.next() } + fn size_hint(&self) -> (usize, Option) { + self.0.size_hint() + } } impl IntoIterator for MichelsonList { -- GitLab From 90c90c29d7978c754004a6e304569f2ebf43b036 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 20 Dec 2023 16:48:01 +0300 Subject: [PATCH 6/7] MIR: define PACK/UNPACK constants in one place --- contrib/mir/src/serializer.rs | 1 + contrib/mir/src/serializer/constants.rs | 25 ++++++++++++++++++++++++ contrib/mir/src/serializer/decode.rs | 19 +----------------- contrib/mir/src/serializer/encode.rs | 26 +++++++++---------------- 4 files changed, 36 insertions(+), 35 deletions(-) create mode 100644 contrib/mir/src/serializer/constants.rs diff --git a/contrib/mir/src/serializer.rs b/contrib/mir/src/serializer.rs index 554fb52a4dc5..ed2f8dec8b4c 100644 --- a/contrib/mir/src/serializer.rs +++ b/contrib/mir/src/serializer.rs @@ -1,3 +1,4 @@ +pub(self) mod constants; pub mod decode; pub mod encode; mod integration_tests; diff --git a/contrib/mir/src/serializer/constants.rs b/contrib/mir/src/serializer/constants.rs new file mode 100644 index 000000000000..d319b86bbf56 --- /dev/null +++ b/contrib/mir/src/serializer/constants.rs @@ -0,0 +1,25 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* Copyright (c) [2022-2023] TriliTech */ +/* */ +/******************************************************************************/ + +/// Prefix denoting an encoded number. +pub const NUMBER_TAG: u8 = 0x00; +/// Prefix denoting an encoded string. +pub const STRING_TAG: u8 = 0x01; +/// Prefix denoting an encoded sequence. +pub const SEQ_TAG: u8 = 0x02; +/// Prefix denoting an encoded bytes sequence. +pub const BYTES_TAG: u8 = 0x0a; + +// Tags for [Michelson::App]. +pub const APP_NO_ARGS_NO_ANNOTS_TAG: u8 = 0x03; +pub const APP_NO_ARGS_WITH_ANNOTS_TAG: u8 = 0x04; +pub const APP_ONE_ARG_NO_ANNOTS_TAG: u8 = 0x05; +pub const APP_ONE_ARG_WITH_ANNOTS_TAG: u8 = 0x06; +pub const APP_TWO_ARGS_NO_ANNOTS_TAG: u8 = 0x07; +pub const APP_TWO_ARGS_WITH_ANNOTS_TAG: u8 = 0x08; +pub const APP_GENERIC: u8 = 0x09; diff --git a/contrib/mir/src/serializer/decode.rs b/contrib/mir/src/serializer/decode.rs index f5909fa706fc..b86f567ec609 100644 --- a/contrib/mir/src/serializer/decode.rs +++ b/contrib/mir/src/serializer/decode.rs @@ -8,6 +8,7 @@ //! Micheline deserialization. +use super::constants::*; use bitvec::{order::Lsb0, vec::BitVec, view::BitView}; use num_bigint::{BigInt, Sign}; use smallvec::{smallvec, SmallVec}; @@ -40,24 +41,6 @@ pub enum DecodeError { BadAnnotation, } -/// Prefix denoting an encoded number. -const NUMBER_TAG: u8 = 0x00; -/// Prefix denoting an encoded string. -const STRING_TAG: u8 = 0x01; -/// Prefix denoting an encoded sequence. -const SEQ_TAG: u8 = 0x02; -/// Prefix denoting an encoded bytes sequence. -const BYTES_TAG: u8 = 0x0a; - -// Tags for [Michelson::App]. -const APP_NO_ARGS_NO_ANNOTS_TAG: u8 = 0x03; -const APP_NO_ARGS_WITH_ANNOTS_TAG: u8 = 0x04; -const APP_ONE_ARG_NO_ANNOTS_TAG: u8 = 0x05; -const APP_ONE_ARG_WITH_ANNOTS_TAG: u8 = 0x06; -const APP_TWO_ARGS_NO_ANNOTS_TAG: u8 = 0x07; -const APP_TWO_ARGS_WITH_ANNOTS_TAG: u8 = 0x08; -const APP_GENERIC: u8 = 0x09; - /// If the number of arguments is small, an allocation-avoiding optimization is /// used. This constant specifies the upper bound for the number of arguments /// where it triggers. diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index 47d85dfb0b9e..dc415e538372 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -10,20 +10,12 @@ use std::mem::size_of; use tezos_data_encoding::{enc::BinWriter, types::Zarith}; +use super::constants::*; use crate::{ ast::{annotations::Annotations, Micheline}, lexer::{Annotation, Prim}, }; -/// Prefix denoting an encoded number. -const NUMBER_TAG: u8 = 0x00; -/// Prefix denoting an encoded string. -const STRING_TAG: u8 = 0x01; -/// Prefix denoting an encoded sequence. -const SEQ_TAG: u8 = 0x02; -/// Prefix denoting an encoded bytes sequence. -const BYTES_TAG: u8 = 0x0a; - trait AppEncoder<'a>: IntoIterator> + Sized { const NO_ANNOTS_TAG: u8; const WITH_ANNOTS_TAG: u8; @@ -44,23 +36,23 @@ trait AppEncoder<'a>: IntoIterator> + Sized { } impl<'a> AppEncoder<'a> for [&'a Micheline<'a>; 0] { - const NO_ANNOTS_TAG: u8 = 0x03; - const WITH_ANNOTS_TAG: u8 = 0x04; + const NO_ANNOTS_TAG: u8 = APP_NO_ARGS_NO_ANNOTS_TAG; + const WITH_ANNOTS_TAG: u8 = APP_NO_ARGS_WITH_ANNOTS_TAG; } impl<'a> AppEncoder<'a> for [&'a Micheline<'a>; 1] { - const NO_ANNOTS_TAG: u8 = 0x05; - const WITH_ANNOTS_TAG: u8 = 0x06; + const NO_ANNOTS_TAG: u8 = APP_ONE_ARG_NO_ANNOTS_TAG; + const WITH_ANNOTS_TAG: u8 = APP_ONE_ARG_WITH_ANNOTS_TAG; } impl<'a> AppEncoder<'a> for [&'a Micheline<'a>; 2] { - const NO_ANNOTS_TAG: u8 = 0x07; - const WITH_ANNOTS_TAG: u8 = 0x08; + const NO_ANNOTS_TAG: u8 = APP_TWO_ARGS_NO_ANNOTS_TAG; + const WITH_ANNOTS_TAG: u8 = APP_TWO_ARGS_WITH_ANNOTS_TAG; } impl<'a> AppEncoder<'a> for &'a [Micheline<'a>] { - const NO_ANNOTS_TAG: u8 = 0x09; - const WITH_ANNOTS_TAG: u8 = 0x09; + const NO_ANNOTS_TAG: u8 = APP_GENERIC; + const WITH_ANNOTS_TAG: u8 = APP_GENERIC; fn encode(prim: &Prim, args: Self, annots: &Annotations, out: &mut Vec) { match args { [] => AppEncoder::encode(prim, [], annots, out), -- GitLab From 0144953e14d48b956390fbc4903d0a955500a66a Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 20 Dec 2023 21:01:14 +0300 Subject: [PATCH 7/7] MIR: update DESIGN.md --- contrib/mir/DESIGN.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/contrib/mir/DESIGN.md b/contrib/mir/DESIGN.md index 07639e9567d5..0fe793b49497 100644 --- a/contrib/mir/DESIGN.md +++ b/contrib/mir/DESIGN.md @@ -29,6 +29,44 @@ Currently, for the sake of simplicity, MIR parser is a bit more lenient wrt non- Additionally, annotations are currently ignored completely; thus, annotation rules are not verified. +#### Micheline + +Micheline is implemented using non-owning approach for sequences and primitive applications, +actual nodes are allocated in an arena. + +This is done mostly to avoid costs associated with allocating and subsequently +freeing many small `Vec`s via the system allocator during +parsing/deserialization, but it also simplifies pattern-matching in the +typechecker somewhat. + +#### Lambdas + +Typechecked lambdas carry their Micheline representation. This is done to avoid +dealing with (deprecated) annotations in the lambda body (e.g. on instructions +etc), which have to be `PACK`ed. The protocol uses the same approach. + +##### Known differences from the protocol + +Lambda code (in Micheline representation) isn't normalized. As an example where this makes a difference, consider running `PACK` on the following lambda: `{PUSH (pair nat int bool) {0; -3; False}; DROP}`. The protocol normalizes `{0; -3; False}` to `Pair 0 (Pair -3 False)`; MIR doesn't, and instead uses `{0; -3; False}` verbatim. + +#### UNPACK/deserialization + +Implemented via a pretty run-of-the-mill recursive descent parser. + +`BytesIt` is introduced for simplicity (and avoiding allocations), +`Iterator` doesn't quite fit, because we need to chomp exact number +of bytes (take will produce at most the requested number, which is not what we +want), and producing variable-length slices from Iterator would require +allocations. + +Extra care is taken to avoid unnecessary allocations in the common cases, to that effect `SmallVec` with a sensible on-stack buffer is used for variable-length fields. This wastes some on-stack memory, but this shouldn't be an issue. One kink is annotations are still always-allocating, amending this is left for future work. + +`BigInt` parser is reimplemented, as going through `Zarith` from +`tezos_data_encoding` is more involved than it's worth. The code is loosely +based on the one from `tezos_data_encoding`. + +There's an interaction with lambdas (and how they carry raw Micheline around): unpacking requires allocating Micheline long-term, which means interpreter needs access to an arena. Carrying it in Ctx doesn't quite pan out due to borrow checker (also, it doesn't quite match the lifetime semantics of the tzt runner), so it's passed around the interpreter via an extra argument. + #### Gas consumption Gas counter is represented as the type `Gas`, containing an `Option` with the current milligas amount (or `None` after gas exhaustion). A mutable reference to the gas counter is passed to typechecker and interpreter. -- GitLab