From 5bd00693b8d5f9ac74775172ee92bb5b40c3be20 Mon Sep 17 00:00:00 2001 From: martoon Date: Wed, 22 Nov 2023 16:05:44 +0400 Subject: [PATCH 1/4] MIR: Rename untyper to have PACK flavor Problem: PACK assumes untypeing to not strictly an optimized form. True optimized form assumes linearization of comb pairs which PACK does not do. Solution: we decided that we don't strictly need two separate untyper - for true optimized form and PACK-like form, we can live with only the latter. So I rename the untyping function respectively and leave clarifying comments. --- contrib/mir/src/ast.rs | 9 +++++++-- contrib/mir/src/tzt/expectation.rs | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index efd1ed0e362a..5adc8606245f 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -138,13 +138,18 @@ pub enum TypedValue { Operation(Box), } -pub fn typed_value_to_value_optimized<'a>( +/// Untypes a value using optimized representation in legacy mode. +/// +/// This differs from plain optimized representation in that it always +/// represents tuples as nested binary pairs (right combs). This is, for +/// instance, what `PACK` uses. +pub fn typed_value_to_value_optimized_legacy<'a>( arena: &'a Arena>, tv: TypedValue, ) -> Micheline<'a> { use Micheline as V; use TypedValue as TV; - let go = |x| typed_value_to_value_optimized(arena, x); + let go = |x| typed_value_to_value_optimized_legacy(arena, x); match tv { TV::Int(i) => V::Int(i), TV::Nat(u) => V::Int(u.try_into().unwrap()), diff --git a/contrib/mir/src/tzt/expectation.rs b/contrib/mir/src/tzt/expectation.rs index d62cee4f3108..310bbd06c81f 100644 --- a/contrib/mir/src/tzt/expectation.rs +++ b/contrib/mir/src/tzt/expectation.rs @@ -51,9 +51,13 @@ fn unify_interpreter_error( // context of the interpreter, though here we have full type information for // both values being compared, so it is probably safe to compare typed // representation as well. + + // For now we compare values untyped to optimized form with a `PACK` flavor. + // This means that comb pairs given in different forms (tree / linear) will + // be considered distinct. This should be fine in most cases though. let arena = typed_arena::Arena::new(); - typed_value_to_value_optimized(&arena, exp_typed_val) - == typed_value_to_value_optimized(&arena, failed_typed_value.clone()) + typed_value_to_value_optimized_legacy(&arena, exp_typed_val) + == typed_value_to_value_optimized_legacy(&arena, failed_typed_value.clone()) } Err(_) => false, } -- GitLab From e59bb669597494cc0663d486941de834a06ca2f3 Mon Sep 17 00:00:00 2001 From: martoon Date: Wed, 22 Nov 2023 16:27:25 +0400 Subject: [PATCH 2/4] MIR: Add tests for encoding typed values --- contrib/mir/src/serializer.rs | 3 +- contrib/mir/src/serializer/encode.rs | 12 ++-- .../mir/src/serializer/integration_tests.rs | 71 +++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 contrib/mir/src/serializer/integration_tests.rs diff --git a/contrib/mir/src/serializer.rs b/contrib/mir/src/serializer.rs index 7087d404217b..91a1f8942a4b 100644 --- a/contrib/mir/src/serializer.rs +++ b/contrib/mir/src/serializer.rs @@ -1 +1,2 @@ -mod encode; +pub mod encode; +mod integration_tests; diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index 4f8bae38d7d4..6ae21ff7df1e 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -175,15 +175,19 @@ fn encode_micheline(mich: &Micheline, out: &mut Vec) { } impl<'a> Micheline<'a> { - /// Serialize value. - #[allow(dead_code)] // Until we add PACK - fn encode(&self) -> Vec { + /// Serialize a value. + pub fn encode(&self) -> Vec { self.encode_starting_with(&[]) } + /// Serialize a value like PACK does. + pub fn encode_for_pack(&self) -> Vec { + self.encode_starting_with(&[0x05]) + } + /// Like [Value::encode], but allows specifying a prefix, useful for /// `PACK` implementation. - fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { + pub fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { let mut out = Vec::from(start_bytes); encode_micheline(self, &mut out); out diff --git a/contrib/mir/src/serializer/integration_tests.rs b/contrib/mir/src/serializer/integration_tests.rs new file mode 100644 index 000000000000..0b651f94d910 --- /dev/null +++ b/contrib/mir/src/serializer/integration_tests.rs @@ -0,0 +1,71 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Tests that cover several components, including Micheline encoding. + +/// Covers key points of typed values serialization where ambiguity is possible +/// (e.g. optimized vs readable representations). +/// Full coverage of PACK we expect from the TZT tests. +#[cfg(test)] +mod test_typed_encode { + use typed_arena::Arena; + + use crate::ast::{self, byte_repr_trait::*, KeyHash}; + use crate::ast::{Address, TypedValue}; + + // Expected bytes to be produced with + // octez-client --mode mockup run script "parameter unit; storage bytes; code { DROP; PUSH $ty $val; PACK; NIL operation; PAIR }" on storage 0x and input Unit + // + // Or in 19.0-rc1 version of octez-client simply: + // octez-client --mode mockup run michelson code PACK on stack "{Stack_elt $ty $val}" + + #[track_caller] + fn check(v: TypedValue, hex_bytes: &str) { + let hex_bytes: &str = hex_bytes + .strip_prefix("0x") + .expect("The `expected` argument must start from 0x"); + + let arena = Arena::new(); + let micheline = ast::typed_value_to_value_optimized_legacy(&arena, v); + assert_eq!( + micheline.encode_for_pack(), + hex::decode(hex_bytes).expect("Bad hex string in `expected` argument") + ) + } + + #[test] + fn test_address() { + check( + TypedValue::Address( + Address::from_base58_check("tz1NyAf1KeeFCCPPAZ9ard9YVshVGFibzVKa").unwrap(), + ), + "0x050a0000001600002486eda3c7bbbe6be511b46d6deeb1594258a7fd", + ) + } + + #[test] + fn test_key_hash() { + check( + TypedValue::KeyHash( + KeyHash::from_base58_check("tz1NyAf1KeeFCCPPAZ9ard9YVshVGFibzVKa").unwrap(), + ), + "0x050a00000015002486eda3c7bbbe6be511b46d6deeb1594258a7fd", + ) + } + + #[test] + fn test_comb_pair() { + // Should be encoded as a tree + check( + TypedValue::new_pair( + TypedValue::Nat(1), + TypedValue::new_pair(TypedValue::Nat(2), TypedValue::Nat(3)), + ), + "0x0507070001070700020003", + ) + } +} -- GitLab From d42355974d92d74935411561dcdcae71a27b3e87 Mon Sep 17 00:00:00 2001 From: martoon Date: Wed, 22 Nov 2023 16:51:16 +0400 Subject: [PATCH 3/4] MIR: Add `PACK` instruction --- contrib/mir/src/ast.rs | 1 + contrib/mir/src/interpreter.rs | 20 ++++++++++++++++ contrib/mir/src/serializer/encode.rs | 2 +- contrib/mir/src/typechecker.rs | 34 ++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 5adc8606245f..24b9e0b947d2 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -261,6 +261,7 @@ pub enum Instruction { ChainId, /// `ISelf` because `Self` is a reserved keyword ISelf(Entrypoint), + Pack, CheckSignature, TransferTokens, SetDelegate, diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index ffecc739a5a2..b2acaec5c6c1 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +use typed_arena::Arena; + use crate::ast::annotations::NO_ANNS; use crate::ast::*; use crate::context::Ctx; @@ -381,6 +383,14 @@ fn interpret_one(i: &Instruction, ctx: &mut Ctx, stack: &mut IStack) -> Result<( entrypoint: entrypoint.clone(), })); } + I::Pack => { + ctx.gas.consume(0)?; // TODO + let v = pop!(); + let arena = Arena::new(); + let mich = typed_value_to_value_optimized_legacy(&arena, v); + let encoded = mich.encode_for_pack(); + stack.push(V::Bytes(encoded)); + } I::CheckSignature => { let key = pop!(V::Key); let sig = pop!(V::Signature); @@ -1401,6 +1411,16 @@ mod interpreter_tests { ); } + #[test] + fn pack_instr() { + let stack = &mut stk![TypedValue::new_pair(TypedValue::Int(12), TypedValue::Unit)]; + assert_eq!(interpret(&vec![Pack], &mut Ctx::default(), stack), Ok(())); + assert_eq!( + stack, + &stk![TypedValue::Bytes(hex::decode("050707000c030b").unwrap())] + ); + } + #[test] fn self_instr() { let stk = &mut stk![]; diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index 6ae21ff7df1e..a24b371b90d5 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -187,7 +187,7 @@ impl<'a> Micheline<'a> { /// Like [Value::encode], but allows specifying a prefix, useful for /// `PACK` implementation. - pub fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { + pub(crate) fn encode_starting_with(&self, start_bytes: &[u8]) -> Vec { let mut out = Vec::from(start_bytes); encode_micheline(self, &mut out); out diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs index c27a54fe8576..6a00be80e41b 100644 --- a/contrib/mir/src/typechecker.rs +++ b/contrib/mir/src/typechecker.rs @@ -955,6 +955,15 @@ pub(crate) fn typecheck_instruction( } (App(SELF, expect_args!(0), _), _) => unexpected_micheline!(), + (App(PACK, [], _), [.., _]) => { + let t = pop!(); + t.ensure_prop(&mut ctx.gas, TypeProperty::Packable)?; + stack.push(T::Bytes); + I::Pack + } + (App(PACK, [], _), []) => no_overload!(PACK, len 1), + (App(PACK, expect_args!(0), _), _) => unexpected_micheline!(), + (App(TRANSFER_TOKENS, [], _), [.., T::Contract(ct), T::Mutez, arg_t]) => { ensure_ty_eq(ctx, ct, arg_t)?; stack.drop_top(3); @@ -3131,6 +3140,31 @@ mod typecheck_tests { ); } + #[test] + fn pack_instr() { + let stk = &mut tc_stk![Type::new_pair(Type::Int, Type::Unit)]; + assert_eq!( + super::typecheck_instruction(&parse("PACK").unwrap(), &mut Ctx::default(), None, stk), + Ok(Instruction::Pack) + ); + assert_eq!(stk, &tc_stk![Type::Bytes]); + } + + #[test] + fn pack_instr_non_packable() { + assert_eq!( + typecheck_instruction( + &parse("PACK").unwrap(), + &mut Ctx::default(), + &mut tc_stk![Type::Operation] + ), + Err(TcError::InvalidTypeProperty( + TypeProperty::Packable, + Type::Operation + )) + ); + } + #[test] fn self_instr() { let stk = &mut tc_stk![]; -- GitLab From 93eb5c333f0ae744b27c6df619ce40cff1def2e3 Mon Sep 17 00:00:00 2001 From: martoon Date: Thu, 23 Nov 2023 01:03:30 +0400 Subject: [PATCH 4/4] MIR: Add gas consumption for PACK --- contrib/mir/src/ast/annotations.rs | 10 +++++ contrib/mir/src/gas.rs | 70 +++++++++++++++++++++++++++++- contrib/mir/src/interpreter.rs | 6 ++- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/contrib/mir/src/ast/annotations.rs b/contrib/mir/src/ast/annotations.rs index 623964267947..74f41c126d4e 100644 --- a/contrib/mir/src/ast/annotations.rs +++ b/contrib/mir/src/ast/annotations.rs @@ -88,3 +88,13 @@ impl<'a> FromIterator> for Annotations<'a> { Annotations(Vec::from_iter(iter)) } } + +/// Returns all annotations in the same order in which they were given +/// initially. +impl<'a> IntoIterator for &'a Annotations<'a> { + type Item = &'a Annotation<'a>; + type IntoIter = std::slice::Iter<'a, Annotation<'a>>; + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} diff --git a/contrib/mir/src/gas.rs b/contrib/mir/src/gas.rs index e27ddf56a95b..615e51f2e157 100644 --- a/contrib/mir/src/gas.rs +++ b/contrib/mir/src/gas.rs @@ -162,7 +162,7 @@ pub mod interpret_cost { use checked::Checked; use super::{AsGasCost, OutOfGas}; - use crate::ast::{Key, KeyHash, Or, TypedValue}; + use crate::ast::{Key, Micheline, KeyHash, Or, TypedValue}; pub const DIP: u32 = 10; pub const DROP: u32 = 10; @@ -190,6 +190,7 @@ pub mod interpret_cost { pub const NIL: u32 = 10; pub const CONS: u32 = 15; pub const CHAIN_ID: u32 = 15; + pub const PACK: u32 = 0; pub const SELF: u32 = 10; // Gas costs obtained from https://gitlab.com/tezos/tezos/-/blob/9875fbebe032a8c5ce62b3b3cb1588ca9855a37e/src/proto_017_PtNairob/lib_protocol/michelson_v1_gas_costs_generated.ml pub const TRANSFER_TOKENS: u32 = 60; @@ -364,6 +365,73 @@ pub mod interpret_cost { (80 + 2 * lookup_cost).as_gas_cost() } + /// Measures size of Michelson using several metrics. + pub struct MichelineSize { + /// Total number of nodes (including leaves). + nodes_num: Checked, + + /// Total size of string and bytes literals. + str_byte: Checked, + + /// Total size of zarith numbers, in bytes. + zariths: Checked, + } + + impl Default for MichelineSize { + fn default() -> Self { + MichelineSize { + nodes_num: Checked::from(0), + str_byte: Checked::from(0), + zariths: Checked::from(0), + } + } + } + + pub fn micheline_encoding<'a>(mich: &'a Micheline<'a>) -> Result { + let mut size = MichelineSize::default(); + collect_micheline_size(mich, &mut size); + micheline_encoding_by_size(size) + } + + fn micheline_encoding_by_size(size: MichelineSize) -> Result { + (size.nodes_num * 100 + size.zariths * 25 + size.str_byte * 10).as_gas_cost() + } + + fn collect_micheline_size<'a>(mich: &'a Micheline<'a>, size: &mut MichelineSize) { + size.nodes_num += 1; + match mich { + Micheline::String(s) => size.str_byte += s.len(), + Micheline::Bytes(bs) => size.str_byte += bs.len(), + Micheline::Int(i) => { + // NB: eventually when using BigInts, use BigInt::bits() &c + let bits = std::mem::size_of_val(i); + let bytes = (bits + 7) / 8; + size.zariths += bytes; + } + Micheline::Seq(ms) => { + for m in *ms { + collect_micheline_size(m, size) + } + } + Micheline::App(_prim, args, annots) => { + for arg in *args { + collect_micheline_size(arg, size) + } + for annot in annots { + // Annotations are accounted as simple string literals + use crate::lexer::Annotation as Ann; + size.str_byte += match annot { + // Including annotation prefix into the size too + Ann::Field(a) => a.len() + 1, + Ann::Variable(a) => a.len() + 1, + Ann::Type(a) => a.len() + 1, + Ann::Special(a) => a.len(), + } + } + } + } + } + pub fn check_signature(k: &Key, msg: &[u8]) -> Result { let len = Checked::from(msg.len()); match k { diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index b2acaec5c6c1..30e3468b97ea 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -384,10 +384,14 @@ fn interpret_one(i: &Instruction, ctx: &mut Ctx, stack: &mut IStack) -> Result<( })); } I::Pack => { - ctx.gas.consume(0)?; // TODO + ctx.gas.consume(interpret_cost::PACK)?; let v = pop!(); let arena = Arena::new(); + // In the Tezos implementation they also charge gas for the pass + // that strips locations. We don't have it. let mich = typed_value_to_value_optimized_legacy(&arena, v); + ctx.gas + .consume(interpret_cost::micheline_encoding(&mich)?)?; let encoded = mich.encode_for_pack(); stack.push(V::Bytes(encoded)); } -- GitLab