diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index efd1ed0e362a4a9738cc15be53dea7e63d13ed3b..24b9e0b947d25fb7e7636f919ba64c70f9806d9c 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -138,13 +138,18 @@ pub enum TypedValue { Operation(Box), } -pub fn typed_value_to_value_optimized<'a>( +/// Untypes a value using optimized representation in legacy mode. +/// +/// This differs from plain optimized representation in that it always +/// represents tuples as nested binary pairs (right combs). This is, for +/// instance, what `PACK` uses. +pub fn typed_value_to_value_optimized_legacy<'a>( arena: &'a Arena>, tv: TypedValue, ) -> Micheline<'a> { use Micheline as V; use TypedValue as TV; - let go = |x| typed_value_to_value_optimized(arena, x); + let go = |x| typed_value_to_value_optimized_legacy(arena, x); match tv { TV::Int(i) => V::Int(i), TV::Nat(u) => V::Int(u.try_into().unwrap()), @@ -256,6 +261,7 @@ pub enum Instruction { ChainId, /// `ISelf` because `Self` is a reserved keyword ISelf(Entrypoint), + Pack, CheckSignature, TransferTokens, SetDelegate, diff --git a/contrib/mir/src/ast/annotations.rs b/contrib/mir/src/ast/annotations.rs index 623964267947d265b54cef2aefb7ecf73177ca9f..74f41c126d4e01550a2f2b733329202f67fa5925 100644 --- a/contrib/mir/src/ast/annotations.rs +++ b/contrib/mir/src/ast/annotations.rs @@ -88,3 +88,13 @@ impl<'a> FromIterator> for Annotations<'a> { Annotations(Vec::from_iter(iter)) } } + +/// Returns all annotations in the same order in which they were given +/// initially. +impl<'a> IntoIterator for &'a Annotations<'a> { + type Item = &'a Annotation<'a>; + type IntoIter = std::slice::Iter<'a, Annotation<'a>>; + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} diff --git a/contrib/mir/src/gas.rs b/contrib/mir/src/gas.rs index e27ddf56a95b4a87406e9ca0901100b28a78fbcf..615e51f2e157cef8345fa8c7eb3b7c53388da345 100644 --- a/contrib/mir/src/gas.rs +++ b/contrib/mir/src/gas.rs @@ -162,7 +162,7 @@ pub mod interpret_cost { use checked::Checked; use super::{AsGasCost, OutOfGas}; - use crate::ast::{Key, KeyHash, Or, TypedValue}; + use crate::ast::{Key, Micheline, KeyHash, Or, TypedValue}; pub const DIP: u32 = 10; pub const DROP: u32 = 10; @@ -190,6 +190,7 @@ pub mod interpret_cost { pub const NIL: u32 = 10; pub const CONS: u32 = 15; pub const CHAIN_ID: u32 = 15; + pub const PACK: u32 = 0; pub const SELF: u32 = 10; // Gas costs obtained from https://gitlab.com/tezos/tezos/-/blob/9875fbebe032a8c5ce62b3b3cb1588ca9855a37e/src/proto_017_PtNairob/lib_protocol/michelson_v1_gas_costs_generated.ml pub const TRANSFER_TOKENS: u32 = 60; @@ -364,6 +365,73 @@ pub mod interpret_cost { (80 + 2 * lookup_cost).as_gas_cost() } + /// Measures size of Michelson using several metrics. + pub struct MichelineSize { + /// Total number of nodes (including leaves). + nodes_num: Checked, + + /// Total size of string and bytes literals. + str_byte: Checked, + + /// Total size of zarith numbers, in bytes. + zariths: Checked, + } + + impl Default for MichelineSize { + fn default() -> Self { + MichelineSize { + nodes_num: Checked::from(0), + str_byte: Checked::from(0), + zariths: Checked::from(0), + } + } + } + + pub fn micheline_encoding<'a>(mich: &'a Micheline<'a>) -> Result { + let mut size = MichelineSize::default(); + collect_micheline_size(mich, &mut size); + micheline_encoding_by_size(size) + } + + fn micheline_encoding_by_size(size: MichelineSize) -> Result { + (size.nodes_num * 100 + size.zariths * 25 + size.str_byte * 10).as_gas_cost() + } + + fn collect_micheline_size<'a>(mich: &'a Micheline<'a>, size: &mut MichelineSize) { + size.nodes_num += 1; + match mich { + Micheline::String(s) => size.str_byte += s.len(), + Micheline::Bytes(bs) => size.str_byte += bs.len(), + Micheline::Int(i) => { + // NB: eventually when using BigInts, use BigInt::bits() &c + let bits = std::mem::size_of_val(i); + let bytes = (bits + 7) / 8; + size.zariths += bytes; + } + Micheline::Seq(ms) => { + for m in *ms { + collect_micheline_size(m, size) + } + } + Micheline::App(_prim, args, annots) => { + for arg in *args { + collect_micheline_size(arg, size) + } + for annot in annots { + // Annotations are accounted as simple string literals + use crate::lexer::Annotation as Ann; + size.str_byte += match annot { + // Including annotation prefix into the size too + Ann::Field(a) => a.len() + 1, + Ann::Variable(a) => a.len() + 1, + Ann::Type(a) => a.len() + 1, + Ann::Special(a) => a.len(), + } + } + } + } + } + pub fn check_signature(k: &Key, msg: &[u8]) -> Result { let len = Checked::from(msg.len()); match k { diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index ffecc739a5a22a8b6df4e29ae2d23b275d3d560d..30e3468b97eae1bb289e77a0e5bb4d34227c0342 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +use typed_arena::Arena; + use crate::ast::annotations::NO_ANNS; use crate::ast::*; use crate::context::Ctx; @@ -381,6 +383,18 @@ fn interpret_one(i: &Instruction, ctx: &mut Ctx, stack: &mut IStack) -> Result<( entrypoint: entrypoint.clone(), })); } + I::Pack => { + ctx.gas.consume(interpret_cost::PACK)?; + let v = pop!(); + let arena = Arena::new(); + // In the Tezos implementation they also charge gas for the pass + // that strips locations. We don't have it. + let mich = typed_value_to_value_optimized_legacy(&arena, v); + ctx.gas + .consume(interpret_cost::micheline_encoding(&mich)?)?; + let encoded = mich.encode_for_pack(); + stack.push(V::Bytes(encoded)); + } I::CheckSignature => { let key = pop!(V::Key); let sig = pop!(V::Signature); @@ -1401,6 +1415,16 @@ mod interpreter_tests { ); } + #[test] + fn pack_instr() { + let stack = &mut stk![TypedValue::new_pair(TypedValue::Int(12), TypedValue::Unit)]; + assert_eq!(interpret(&vec![Pack], &mut Ctx::default(), stack), Ok(())); + assert_eq!( + stack, + &stk![TypedValue::Bytes(hex::decode("050707000c030b").unwrap())] + ); + } + #[test] fn self_instr() { let stk = &mut stk![]; diff --git a/contrib/mir/src/serializer.rs b/contrib/mir/src/serializer.rs index 7087d404217bf42626bf95a3399f2fac47a8c28c..91a1f8942a4b8ad56a763d6b3bf8b9d4e0f24e77 100644 --- a/contrib/mir/src/serializer.rs +++ b/contrib/mir/src/serializer.rs @@ -1 +1,2 @@ -mod encode; +pub mod encode; +mod integration_tests; diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index 4f8bae38d7d4720cfee10d00a3a13b233fd29580..a24b371b90d5ed180538412bb6120bc1dbda81e1 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -175,15 +175,19 @@ fn encode_micheline(mich: &Micheline, out: &mut Vec) { } impl<'a> Micheline<'a> { - /// Serialize value. - #[allow(dead_code)] // Until we add PACK - fn encode(&self) -> Vec { + /// Serialize a value. + pub fn encode(&self) -> Vec { self.encode_starting_with(&[]) } + /// Serialize a value like PACK does. + pub fn encode_for_pack(&self) -> Vec { + self.encode_starting_with(&[0x05]) + } + /// Like [Value::encode], but allows specifying a prefix, useful for /// `PACK` implementation. - fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { + pub(crate) fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { let mut out = Vec::from(start_bytes); encode_micheline(self, &mut out); out diff --git a/contrib/mir/src/serializer/integration_tests.rs b/contrib/mir/src/serializer/integration_tests.rs new file mode 100644 index 0000000000000000000000000000000000000000..0b651f94d910a97c7e01c763217c17e53b390c43 --- /dev/null +++ b/contrib/mir/src/serializer/integration_tests.rs @@ -0,0 +1,71 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Tests that cover several components, including Micheline encoding. + +/// Covers key points of typed values serialization where ambiguity is possible +/// (e.g. optimized vs readable representations). +/// Full coverage of PACK we expect from the TZT tests. +#[cfg(test)] +mod test_typed_encode { + use typed_arena::Arena; + + use crate::ast::{self, byte_repr_trait::*, KeyHash}; + use crate::ast::{Address, TypedValue}; + + // Expected bytes to be produced with + // octez-client --mode mockup run script "parameter unit; storage bytes; code { DROP; PUSH $ty $val; PACK; NIL operation; PAIR }" on storage 0x and input Unit + // + // Or in 19.0-rc1 version of octez-client simply: + // octez-client --mode mockup run michelson code PACK on stack "{Stack_elt $ty $val}" + + #[track_caller] + fn check(v: TypedValue, hex_bytes: &str) { + let hex_bytes: &str = hex_bytes + .strip_prefix("0x") + .expect("The `expected` argument must start from 0x"); + + let arena = Arena::new(); + let micheline = ast::typed_value_to_value_optimized_legacy(&arena, v); + assert_eq!( + micheline.encode_for_pack(), + hex::decode(hex_bytes).expect("Bad hex string in `expected` argument") + ) + } + + #[test] + fn test_address() { + check( + TypedValue::Address( + Address::from_base58_check("tz1NyAf1KeeFCCPPAZ9ard9YVshVGFibzVKa").unwrap(), + ), + "0x050a0000001600002486eda3c7bbbe6be511b46d6deeb1594258a7fd", + ) + } + + #[test] + fn test_key_hash() { + check( + TypedValue::KeyHash( + KeyHash::from_base58_check("tz1NyAf1KeeFCCPPAZ9ard9YVshVGFibzVKa").unwrap(), + ), + "0x050a00000015002486eda3c7bbbe6be511b46d6deeb1594258a7fd", + ) + } + + #[test] + fn test_comb_pair() { + // Should be encoded as a tree + check( + TypedValue::new_pair( + TypedValue::Nat(1), + TypedValue::new_pair(TypedValue::Nat(2), TypedValue::Nat(3)), + ), + "0x0507070001070700020003", + ) + } +} diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs index c27a54fe8576a21845df30282af639fa6574f9bb..6a00be80e41bdff3ddb52ad9c608424288944643 100644 --- a/contrib/mir/src/typechecker.rs +++ b/contrib/mir/src/typechecker.rs @@ -955,6 +955,15 @@ pub(crate) fn typecheck_instruction( } (App(SELF, expect_args!(0), _), _) => unexpected_micheline!(), + (App(PACK, [], _), [.., _]) => { + let t = pop!(); + t.ensure_prop(&mut ctx.gas, TypeProperty::Packable)?; + stack.push(T::Bytes); + I::Pack + } + (App(PACK, [], _), []) => no_overload!(PACK, len 1), + (App(PACK, expect_args!(0), _), _) => unexpected_micheline!(), + (App(TRANSFER_TOKENS, [], _), [.., T::Contract(ct), T::Mutez, arg_t]) => { ensure_ty_eq(ctx, ct, arg_t)?; stack.drop_top(3); @@ -3131,6 +3140,31 @@ mod typecheck_tests { ); } + #[test] + fn pack_instr() { + let stk = &mut tc_stk![Type::new_pair(Type::Int, Type::Unit)]; + assert_eq!( + super::typecheck_instruction(&parse("PACK").unwrap(), &mut Ctx::default(), None, stk), + Ok(Instruction::Pack) + ); + assert_eq!(stk, &tc_stk![Type::Bytes]); + } + + #[test] + fn pack_instr_non_packable() { + assert_eq!( + typecheck_instruction( + &parse("PACK").unwrap(), + &mut Ctx::default(), + &mut tc_stk![Type::Operation] + ), + Err(TcError::InvalidTypeProperty( + TypeProperty::Packable, + Type::Operation + )) + ); + } + #[test] fn self_instr() { let stk = &mut tc_stk![]; diff --git a/contrib/mir/src/tzt/expectation.rs b/contrib/mir/src/tzt/expectation.rs index d62cee4f310819dc5630e11e6f5fd98c1c5d0b7d..310bbd06c81f97b05d817674f3e79567bc084f0f 100644 --- a/contrib/mir/src/tzt/expectation.rs +++ b/contrib/mir/src/tzt/expectation.rs @@ -51,9 +51,13 @@ fn unify_interpreter_error( // context of the interpreter, though here we have full type information for // both values being compared, so it is probably safe to compare typed // representation as well. + + // For now we compare values untyped to optimized form with a `PACK` flavor. + // This means that comb pairs given in different forms (tree / linear) will + // be considered distinct. This should be fine in most cases though. let arena = typed_arena::Arena::new(); - typed_value_to_value_optimized(&arena, exp_typed_val) - == typed_value_to_value_optimized(&arena, failed_typed_value.clone()) + typed_value_to_value_optimized_legacy(&arena, exp_typed_val) + == typed_value_to_value_optimized_legacy(&arena, failed_typed_value.clone()) } Err(_) => false, }