From 7054a1158aef44b38f82599c969838cb046417ec Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 17:12:00 +0300 Subject: [PATCH 01/20] MIR: fix clippy lint --- contrib/mir/src/ast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index a90a138f5507..9083304144b5 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -680,7 +680,7 @@ pub mod test_strategies { .ensure_prop(&mut Gas::default(), TypeProperty::Comparable) .is_ok()) .prop_map(Type::new_ticket), - (inner.clone(), inner.clone()) + (inner.clone(), inner) .prop_filter("Key must be comparable", |(k, _)| k .ensure_prop(&mut Gas::default(), TypeProperty::Comparable) .is_ok()) -- GitLab From a76db2deae197e59b241fc72371f0ff5e00f0927 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 20:59:57 +0300 Subject: [PATCH 02/20] MIR: move operations to a separate module + docs --- contrib/mir/src/ast.rs | 48 +----------- contrib/mir/src/ast/michelson_operation.rs | 88 ++++++++++++++++++++++ 2 files changed, 89 insertions(+), 47 deletions(-) create mode 100644 contrib/mir/src/ast/michelson_operation.rs diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 9083304144b5..1249b3fbcf72 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -15,6 +15,7 @@ pub mod michelson_key; pub mod michelson_key_hash; pub mod michelson_lambda; pub mod michelson_list; +pub mod michelson_operation; pub mod michelson_signature; pub mod or; pub mod overloads; @@ -45,53 +46,6 @@ pub use michelson_list::MichelsonList; pub use michelson_signature::Signature; pub use or::Or; -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct TransferTokens<'a> { - pub param: TypedValue<'a>, - pub destination_address: Address, - pub amount: i64, -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct SetDelegate(pub Option); - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct Emit<'a> { - pub tag: Option>, - pub value: TypedValue<'a>, - - // Here an `Or` type is used, (instead of a single `Type` or `Micheline` field), because of two - // reasons. - // 1. We need to carry annotations for this type, so at least for now, that requires carrying Micheline. - // 2. If the type is implicit, and comes from the stack, then we cannot make an equalent - // Micheline<'a> from it since it requires an Arena (at least for now), which is not - // available in the typechecker. - pub arg_ty: Or>, -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct CreateContract<'a> { - pub delegate: Option, - pub amount: i64, - pub storage: TypedValue<'a>, - pub code: Rc>, - pub micheline_code: &'a Micheline<'a>, -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum Operation<'a> { - TransferTokens(TransferTokens<'a>), - SetDelegate(SetDelegate), - Emit(Emit<'a>), - CreateContract(CreateContract<'a>), -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct OperationInfo<'a> { - pub operation: Operation<'a>, - pub counter: u128, -} - #[derive(Debug, Clone, Eq, PartialEq)] pub struct Ticket<'a> { pub ticketer: AddressHash, diff --git a/contrib/mir/src/ast/michelson_operation.rs b/contrib/mir/src/ast/michelson_operation.rs new file mode 100644 index 000000000000..f110353c3831 --- /dev/null +++ b/contrib/mir/src/ast/michelson_operation.rs @@ -0,0 +1,88 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Representation for typed Michelson `operation` values. + +use std::rc::Rc; + +use super::{Address, ContractScript, FieldAnnotation, KeyHash, Micheline, Or, Type, TypedValue}; + +/// Representation of token transfer operation, created by `TRANSFER_TOKENS` +/// instruction. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct TransferTokens<'a> { + /// Transfer parameter. + pub param: TypedValue<'a>, + /// Transfer destination. + pub destination_address: Address, + /// Transfer amount. + pub amount: i64, +} + +/// Representation of set delegate operation, created by `SET_DELEGATE` instruction. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct SetDelegate(pub Option); + +/// Representation of emit operation, created by `EMIT` instruction. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct Emit<'a> { + /// Event tag. + pub tag: Option>, + /// Event value. + pub value: TypedValue<'a>, + + /// Event type. + /// + /// Here an `Or` type is used, (instead of a single `Type` or `Micheline` + /// field), for two reasons: + /// + /// 1. Emit type annotation is special when explicit in that it must + /// preserve annotations. That is currently only representable as [Micheline]. + /// 2. If the type is implicit, and comes from the stack, we must use the + /// [Type] from the typechecker. Converting it to [Micheline] is + /// complicated, as [Micheline] requires an arena. + pub arg_ty: Or>, +} + +/// Representation of create contract operation, created by `CREATE_CONTRACT` instruction. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct CreateContract<'a> { + /// Contract's optional delegate. + pub delegate: Option, + /// Contract's inital balance. + pub amount: i64, + /// Contract's initial storage. + pub storage: TypedValue<'a>, + /// Contract's typechecked code. + pub code: Rc>, + /// Raw [Micheline] representation of the contract's code. The operation + /// encoding must survive round-trip via `PACK`/`UNPACK`, so raw code has to + /// be stored. + pub micheline_code: &'a Micheline<'a>, +} + +/// Enum corresponding to values of the `operation` Michelson type. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Operation<'a> { + /// Transfer tokens operation. + TransferTokens(TransferTokens<'a>), + /// Set delegate operation. + SetDelegate(SetDelegate), + /// Emit operation. + Emit(Emit<'a>), + /// Create contract operation. + CreateContract(CreateContract<'a>), +} + +/// Operation with a nonce attached. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct OperationInfo<'a> { + /// Operation. + pub operation: Operation<'a>, + /// Nonce. + pub counter: u128, +} -- GitLab From 3572f64a4dd12029f77e73f1c75c8d95aed99ac9 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 21:41:03 +0300 Subject: [PATCH 03/20] MIR: remove unused ParserError:ExpectedU10 --- contrib/mir/src/parser.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/mir/src/parser.rs b/contrib/mir/src/parser.rs index 6ee15abce3bd..95449255cb68 100644 --- a/contrib/mir/src/parser.rs +++ b/contrib/mir/src/parser.rs @@ -17,8 +17,6 @@ use typed_arena::Arena; #[derive(Debug, PartialEq, thiserror::Error)] pub enum ParserError { - #[error("expected a natural from 0 to 1023 inclusive, but got {0}")] - ExpectedU10(i128), #[error(transparent)] LexerError(#[from] LexerError), #[error(transparent)] -- GitLab From ee681fea03588f333109524bcfb47513dd10be7f Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 18:47:39 +0300 Subject: [PATCH 04/20] MIR: ast module documentation --- contrib/mir/src/ast.rs | 68 ++++++++++++++++++- contrib/mir/src/ast/annotations.rs | 21 ++++++ contrib/mir/src/ast/big_map.rs | 18 ++++- contrib/mir/src/ast/byte_repr_trait.rs | 16 +++++ contrib/mir/src/ast/comparable.rs | 7 ++ contrib/mir/src/ast/micheline.rs | 18 +++++ contrib/mir/src/ast/michelson_address.rs | 9 +++ .../src/ast/michelson_address/address_hash.rs | 23 +++++-- .../src/ast/michelson_address/entrypoint.rs | 11 +++ contrib/mir/src/ast/michelson_key.rs | 15 +++- contrib/mir/src/ast/michelson_key_hash.rs | 18 ++++- contrib/mir/src/ast/michelson_lambda.rs | 15 ++++ contrib/mir/src/ast/michelson_list.rs | 10 +++ contrib/mir/src/ast/michelson_signature.rs | 64 +++++++++++++++-- contrib/mir/src/ast/or.rs | 11 +++ contrib/mir/src/ast/overloads.rs | 6 ++ contrib/mir/src/lib.rs | 10 ++- 17 files changed, 318 insertions(+), 22 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 1249b3fbcf72..916e80bf6b9b 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -5,10 +5,13 @@ /* */ /******************************************************************************/ +//! AST definitions for raw ([Micheline]) and typed representations of +//! Michelson. + pub mod annotations; pub mod big_map; pub mod byte_repr_trait; -pub mod comparable; +mod comparable; pub mod micheline; pub mod michelson_address; pub mod michelson_key; @@ -26,6 +29,8 @@ use std::{ collections::{BTreeMap, BTreeSet}, rc::Rc, }; +/// Reexported from [tezos_crypto_rs::hash]. Typechecked values of the Michelson +/// type `chain_id`. pub use tezos_crypto_rs::hash::ChainId; use typed_arena::Arena; @@ -43,17 +48,32 @@ pub use michelson_key::Key; pub use michelson_key_hash::KeyHash; pub use michelson_lambda::{Closure, Lambda}; pub use michelson_list::MichelsonList; +pub use michelson_operation::{ + CreateContract, Emit, Operation, OperationInfo, SetDelegate, TransferTokens, +}; pub use michelson_signature::Signature; pub use or::Or; +/// Representation for values of the Michelson `ticket` type. #[derive(Debug, Clone, Eq, PartialEq)] pub struct Ticket<'a> { + /// Ticketer, the address of the contract that issued the ticket. pub ticketer: AddressHash, + /// Ticket payload. pub content: TypedValue<'a>, + /// Ticket amount. pub amount: BigUint, } +/// Representation for a Michelson type. Used primarily in the typechecker. Note +/// this representation doesn't store annotations, as annotations are mostly +/// deprecated and ingored. For entrypoints, see +/// [crate::ast::michelson_address::entrypoint]. +/// +/// The names of the variants correspond to the names of Michelson types, but +/// snake_case is converted to PascalCase. #[derive(Debug, Clone, Eq, PartialEq)] +#[allow(missing_docs)] pub enum Type { Nat, Int, @@ -101,42 +121,52 @@ impl Type { } } + /// Convenience function to construct a new [Self::Pair]. Allocates a new [Rc]. pub fn new_pair(l: Self, r: Self) -> Self { Self::Pair(Rc::new((l, r))) } + /// Convenience function to construct a new [Self::Option]. Allocates a new [Rc]. pub fn new_option(x: Self) -> Self { Self::Option(Rc::new(x)) } + /// Convenience function to construct a new [Self::List]. Allocates a new [Rc]. pub fn new_list(x: Self) -> Self { Self::List(Rc::new(x)) } + /// Convenience function to construct a new [Self::Set]. Allocates a new [Rc]. pub fn new_set(v: Self) -> Self { Self::Set(Rc::new(v)) } + /// Convenience function to construct a new [Self::Map]. Allocates a new [Rc]. pub fn new_map(k: Self, v: Self) -> Self { Self::Map(Rc::new((k, v))) } + /// Convenience function to construct a new [Self::BigMap]. Allocates a new [Rc]. pub fn new_big_map(k: Self, v: Self) -> Self { Self::BigMap(Rc::new((k, v))) } + /// Convenience function to construct a new [Self::Or]. Allocates a new [Rc]. pub fn new_or(l: Self, r: Self) -> Self { Self::Or(Rc::new((l, r))) } + /// Convenience function to construct a new [Self::Contract]. Allocates a new [Rc]. pub fn new_contract(ty: Self) -> Self { Self::Contract(Rc::new(ty)) } + /// Convenience function to construct a new [Self::Ticket]. Allocates a new [Rc]. pub fn new_ticket(ty: Self) -> Self { Self::Ticket(Rc::new(ty)) } + /// Convenience function to construct a new [Self::Lambda]. Allocates a new [Rc]. pub fn new_lambda(ty1: Self, ty2: Self) -> Self { Self::Lambda(Rc::new((ty1, ty2))) } @@ -254,7 +284,24 @@ impl<'a> IntoMicheline<'a> for &'_ Type { } } +/// Enum representing an arbitrary typed Michelson value. The name of the +/// variant corresponds to the name of the type, with snake_case converted to +/// PascalCase. +/// +/// This is used primarily by the interpreter. It should be noted the type has a +/// few quirks related to ordering and equality: +/// +/// 1. Comparing two `TypedValue`s for equality is only well-defined if both are +/// known to have the same type. `TypedValue` itself does not carry its type, +/// so, for instance two empty lists with elements of different types will +/// compare equal. +/// 2. The [Ord] instance can panic if values being compared are of different +/// types and/or are incomparable. This is fine for the interpreter, as the +/// typechecker has verified this invariant holds. However, be mindful of +/// this when comparing `TypedValue` in client code. [PartialOrd] is safe to +/// use, it'll just return [None] for incomparable values. #[derive(Debug, Clone, Eq, PartialEq)] +#[allow(missing_docs)] pub enum TypedValue<'a> { Int(BigInt), Nat(BigUint), @@ -415,18 +462,22 @@ pub(crate) fn unwrap_ticket(t: Ticket) -> TypedValue { } impl<'a> TypedValue<'a> { + /// Convenience function to construct a new [Self::Pair]. Allocates a new [Box]. pub fn new_pair(l: Self, r: Self) -> Self { Self::Pair(Box::new((l, r))) } + /// Convenience function to construct a new [Self::Option]. Allocates a new [Box]. pub fn new_option(x: Option) -> Self { Self::Option(x.map(Box::new)) } + /// Convenience function to construct a new [Self::Or]. Allocates a new [Box]. pub fn new_or(x: Or) -> Self { Self::Or(Box::new(x)) } + /// Convenience function to construct a new [Self::Operation]. Allocates a new [Box]. pub fn new_operation(o: Operation<'a>, c: u128) -> Self { Self::Operation(Box::new(OperationInfo { operation: o, @@ -452,20 +503,31 @@ impl<'a> TypedValue<'a> { Self::Timestamp(n.into()) } + /// Convenience function to construct a new [Self::Ticket]. Allocates a new [Box]. pub fn new_ticket(t: Ticket<'a>) -> Self { Self::Ticket(Box::new(t)) } + /// Convenience function to construct a new [Self::Bls12381G1]. Allocates a new [Box]. pub fn new_bls12381_g1(x: bls::G1) -> Self { Self::Bls12381G1(Box::new(x)) } + /// Convenience function to construct a new [Self::Bls12381G2]. Allocates a new [Box]. pub fn new_bls12381_g2(x: bls::G2) -> Self { Self::Bls12381G2(Box::new(x)) } } +/// Enum representing typechecked Michelson instructions. Some instructions may +/// be applied to different input stacks, for those cases the variant carries a +/// enum specifying the particular version of the instruction (here called +/// "overload"). See [overloads]. +/// +/// The name of the variant corresponds to the name of the instruction, but with +/// UPPER_SNAKE_CASE converted to PascalCase. #[derive(Debug, Eq, PartialEq, Clone)] +#[allow(missing_docs)] pub enum Instruction<'a> { Add(overloads::Add), Mul(overloads::Mul), @@ -577,10 +639,14 @@ pub enum Instruction<'a> { Map(overloads::Map, Vec), } +/// A full typechecked contract script. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ContractScript<'a> { + /// Parameter type. Corresponds to the script's `parameter` field. pub parameter: Type, + /// Storage type. Corresponds to the script's `storage` field. pub storage: Type, + /// Script code. Corresponds to the script's `code` field. pub code: Instruction<'a>, } diff --git a/contrib/mir/src/ast/annotations.rs b/contrib/mir/src/ast/annotations.rs index 39e547fd442a..1f97beb48ba2 100644 --- a/contrib/mir/src/ast/annotations.rs +++ b/contrib/mir/src/ast/annotations.rs @@ -5,17 +5,24 @@ /* */ /******************************************************************************/ +//! Tezos annotations on a [Micheline][crate::ast::Micheline] nodes and +//! utilities for working with them. + use std::borrow::Cow; use crate::lexer::Annotation; +/// A structure holding all Tezos annotations on a [Micheline][crate::ast::Micheline] node. #[derive(Clone, Eq, PartialEq)] pub struct Annotations<'a>(Vec>); +/// Constant corresponding to no annotations. pub const NO_ANNS: Annotations = Annotations::new(); +/// Errors that can happen when working with [Annotations]. #[derive(Debug, Clone, Eq, PartialEq, thiserror::Error)] pub enum AnnotationError { + /// Expected at most one field annotation, but found multiple. #[error("unexpected second field annotation: {0}")] TooManyFieldAnns(String), } @@ -32,14 +39,20 @@ impl std::fmt::Debug for Annotations<'_> { } } +/// A newtype wrapping a field annotation, like `%foo`. This newtype is used to +/// enforce some invariants on the type level. It's impossible to construct +/// manually, except in tests. #[derive(Debug, Clone, Eq, PartialEq)] pub struct FieldAnnotation<'a>(Cow<'a, str>); impl<'a> FieldAnnotation<'a> { + /// View the field annotation contents as a [str] slice. The leading `%` is + /// _not_ included. pub fn as_str(&self) -> &str { &self.0 } + /// Convert the field annotation into the inner [`Cow`]`<'a, str>`. pub fn into_cow(self) -> Cow<'a, str> { self.0 } @@ -51,22 +64,30 @@ impl<'a> FieldAnnotation<'a> { } impl<'a> Annotations<'a> { + /// Create a new list empty of annotations. pub const fn new() -> Self { Annotations(Vec::new()) } + /// Check if the list of annotations is empty. pub fn is_empty(&self) -> bool { self.0.is_empty() } + /// Get the number of annotations in the list. pub fn len(&self) -> usize { self.0.len() } + /// Returns an iterator over the list of annotaions. pub fn iter(&self) -> impl Iterator { self.0.iter() } + /// Get at most one field annotation from the list. If there aren't any + /// field annotations in the list, returns `Ok(None)`. If there is exactly + /// one field annotation, returns `Ok(Some(field_annotation))`. If there are + /// more than one, returns `Err(`[`AnnotationError::TooManyFieldAnns`]`)` pub fn get_single_field_ann(&self) -> Result>, AnnotationError> { use Annotation::*; let mut res = None; diff --git a/contrib/mir/src/ast/big_map.rs b/contrib/mir/src/ast/big_map.rs index cf0896a6cdcb..b9bbcce27fac 100644 --- a/contrib/mir/src/ast/big_map.rs +++ b/contrib/mir/src/ast/big_map.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! `big_map` typed representation and utilities for working with `big_map`s. + use num_bigint::BigInt; use std::{ collections::{btree_map::Entry, BTreeMap}, @@ -46,7 +48,10 @@ pub struct BigMap<'a> { /// if necessary, with copy of the stored map. pub overlay: BTreeMap, Option>>, + /// Type of the map key. pub key_type: Type, + + /// Type of the map value. pub value_type: Type, } @@ -94,10 +99,13 @@ impl<'a> BigMap<'a> { } } +/// Errors that can happen when working with lazy storage. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum LazyStorageError { + /// Decoding from the internal representation failed. #[error("decode failed {0}")] DecodingError(String), + /// Some other error happened. #[error("{0}")] OtherError(String), } @@ -178,6 +186,8 @@ pub trait LazyStorage<'a> { fn big_map_remove(&mut self, id: &BigMapId) -> Result<(), LazyStorageError>; } +/// Bulk-update the big_map. This trait exists mostly for convenience, and has a +/// blanket implementation. pub trait LazyStorageBulkUpdate<'a>: LazyStorage<'a> { /// Update big map with multiple changes, generalizes /// [LazyStorage::big_map_update]. @@ -198,8 +208,9 @@ pub trait LazyStorageBulkUpdate<'a>: LazyStorage<'a> { impl<'a, T: LazyStorage<'a> + ?Sized> LazyStorageBulkUpdate<'a> for T {} +/// A `big_map` representation with metadata, used in [InMemoryLazyStorage]. #[derive(Clone, PartialEq, Eq, Debug)] -pub struct MapInfo<'a> { +struct MapInfo<'a> { map: BTreeMap, TypedValue<'a>>, key_type: Type, value_type: Type, @@ -213,6 +224,7 @@ pub struct InMemoryLazyStorage<'a> { } impl<'a> InMemoryLazyStorage<'a> { + /// Construct a new, empty, in-memory storage. pub fn new() -> Self { InMemoryLazyStorage { next_id: 0.into(), @@ -489,10 +501,14 @@ impl<'a> TypedValue<'a> { } } + /// Traverses a `TypedValue` and add a mutable reference to it to the output + /// vector. pub fn view_big_maps_mut<'b>(&'b mut self, out: &mut Vec<&'b mut BigMap<'a>>) { self.collect_big_maps(&mut |m| out.push(m)); } + /// Same as [TypedValue::view_big_maps_mut], but only collects `big_map` + /// identifiers. pub fn view_big_map_ids(&mut self, out: &mut Vec) { self.collect_big_maps(&mut |m| { if let Some(id) = &m.id { diff --git a/contrib/mir/src/ast/byte_repr_trait.rs b/contrib/mir/src/ast/byte_repr_trait.rs index 66413cd8178a..725f4bbd3ff2 100644 --- a/contrib/mir/src/ast/byte_repr_trait.rs +++ b/contrib/mir/src/ast/byte_repr_trait.rs @@ -5,13 +5,21 @@ /* */ /******************************************************************************/ +//! Trait for values representable by either raw bytes or base58check-derived +//! strings and related types. + use tezos_crypto_rs::base58::FromBase58CheckError; use tezos_crypto_rs::hash::FromBytesError; +/// Errors that can happen when working with [ByteReprTrait]. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum ByteReprError { + /// Encountered an unknown prefix while trying to decode a value either from + /// bytes or base58-check string. #[error("unknown prefix: {0}")] UnknownPrefix(String), + /// Input format is in some way unexpected, with the details explained in + /// the contained string. #[error("wrong format: {0}")] WrongFormat(String), } @@ -31,14 +39,22 @@ impl From for ByteReprError { /// Trait for values representable by either raw bytes or base58check-derived /// strings. pub trait ByteReprTrait: Sized { + /// Try to construct `Self` from base58-check encoded string slice. fn from_base58_check(data: &str) -> Result; + /// Try to construct `Self` from a raw byte slice. Note the slice is _not_ + /// expected to be base58-encoded. fn from_bytes(bytes: &[u8]) -> Result; + /// Construct base58-check representation of `Self`. fn to_base58_check(&self) -> String; + /// Write raw byte represenation of `Self` to the output vector. Note this + /// is _not_ base58-check encoded. fn to_bytes(&self, out: &mut Vec); + /// Convenience function to construct a new [Vec] and write raw byte + /// representation of `Self` to it. fn to_bytes_vec(&self) -> Vec { let mut out = Vec::new(); self.to_bytes(&mut out); diff --git a/contrib/mir/src/ast/comparable.rs b/contrib/mir/src/ast/comparable.rs index a977926234b9..06512073cc83 100644 --- a/contrib/mir/src/ast/comparable.rs +++ b/contrib/mir/src/ast/comparable.rs @@ -1,3 +1,10 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + use super::TypedValue; impl PartialOrd for TypedValue<'_> { diff --git a/contrib/mir/src/ast/micheline.rs b/contrib/mir/src/ast/micheline.rs index 84bdfaec602b..82c596a5c8fe 100644 --- a/contrib/mir/src/ast/micheline.rs +++ b/contrib/mir/src/ast/micheline.rs @@ -5,21 +5,30 @@ /* */ /******************************************************************************/ +//! Definition of Micheline representation and utilities for working with it. + use num_bigint::{BigInt, BigUint}; use typed_arena::Arena; use super::annotations::{Annotations, NO_ANNS}; use crate::lexer::Prim; +/// Representation of a Micheline node. The representation is non-owning by +/// design, so something has to own the child nodes. Generally used with an +/// arena allocator, like, e.g. [typed_arena]. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Micheline<'a> { + /// Micheline integer literal. Int(BigInt), + /// Micheline string literal. String(String), + /// Micheline bytes literal. Bytes(Vec), /// Application of a Micheline primitive to some arguments with optional /// annotations. The primitive is the first field, arguments are the second /// field, annotations are the last field. App(Prim, &'a [Micheline<'a>], Annotations<'a>), + /// Micheline braced sequence. Seq(&'a [Micheline<'a>]), } @@ -60,14 +69,19 @@ impl<'a> Micheline<'a> { buf } + /// Construct a primitive application with zero arguments. pub fn prim0(prim: Prim) -> Self { Micheline::App(prim, &[], NO_ANNS) } + /// Construct a primitive application with one argument, allocating the + /// argument in the [Arena]. pub fn prim1(arena: &'a Arena>, prim: Prim, arg: Micheline<'a>) -> Self { Micheline::App(prim, Self::alloc_seq(arena, [arg]), NO_ANNS) } + /// Construct a primitive application with two arguments, allocating the + /// arguments in the [Arena]. pub fn prim2( arena: &'a Arena>, prim: Prim, @@ -77,6 +91,8 @@ impl<'a> Micheline<'a> { Micheline::App(prim, Self::alloc_seq(arena, [arg1, arg2]), NO_ANNS) } + /// Construct a primitive application with three arguments, allocating the + /// arguments in the [Arena]. pub fn prim3( arena: &'a Arena>, prim: Prim, @@ -87,6 +103,7 @@ impl<'a> Micheline<'a> { Micheline::App(prim, Self::alloc_seq(arena, [arg1, arg2, arg3]), NO_ANNS) } + /// Construct a Micheline sequence, allocating the elements in the [Arena]. pub fn seq(arena: &'a Arena>, args: [Micheline<'a>; N]) -> Self { Micheline::Seq(Self::alloc_seq(arena, args)) } @@ -140,6 +157,7 @@ impl<'a> From<&str> for Micheline<'a> { } } +/// Trait for types that can be converted into [Micheline]. pub trait IntoMicheline<'a> { /// Untypes a value using optimized representation in legacy mode. /// diff --git a/contrib/mir/src/ast/michelson_address.rs b/contrib/mir/src/ast/michelson_address.rs index f160a4db6e4e..7b643453ee7c 100644 --- a/contrib/mir/src/ast/michelson_address.rs +++ b/contrib/mir/src/ast/michelson_address.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `address` values. + pub mod address_hash; pub mod entrypoint; @@ -15,13 +17,20 @@ use address_hash::check_size; use super::{ByteReprError, ByteReprTrait}; +/// Tezos address with an entrypoint, e.g. +/// `KT1BEqzn5Wx8uJrZNvuS9DVHmLvG9td3fDLi%foo`. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq)] pub struct Address { + /// The hash part of the address, i.e. the part before `%`. pub hash: AddressHash, + /// The entrypoint part of the address, i.e. the part after `%`. pub entrypoint: Entrypoint, } impl Address { + /// Returns `true` if the address uses the default entrypoint. Note that + /// addresses that don't explicitly specify the entrypoint, e.g. + /// `KT1BEqzn5Wx8uJrZNvuS9DVHmLvG9td3fDLi`, implicitly use the default one. pub fn is_default_ep(&self) -> bool { self.entrypoint.is_default() } diff --git a/contrib/mir/src/ast/michelson_address/address_hash.rs b/contrib/mir/src/ast/michelson_address/address_hash.rs index aa3b5916fa91..98f8a3833869 100644 --- a/contrib/mir/src/ast/michelson_address/address_hash.rs +++ b/contrib/mir/src/ast/michelson_address/address_hash.rs @@ -5,6 +5,10 @@ /* */ /******************************************************************************/ +//! Structures for [Tezos +//! address](https://docs.tezos.com/smart-contracts/data-types/primitive-data-types#addresses) +//! hash part, i.e. the part without the entrypoint. + use crate::ast::michelson_key_hash::KeyHash; use super::{ByteReprError, ByteReprTrait}; @@ -15,10 +19,13 @@ use tezos_crypto_rs::hash::{ }; macro_rules! address_hash_type_and_impls { - ($($con:ident($ty:ident)),* $(,)*) => { + ($($(#[$meta:meta])* $con:ident($ty:ident)),* $(,)*) => { + /// A enum representing address hashes, like + /// `tz1Nw5nr152qddEjKT2dKBH8XcBMDAg72iLw` or + /// `KT1BRd2ka5q2cPRdXALtXD1QZ38CPam2j1ye`. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq, Hash)] pub enum AddressHash { - $($con($ty)),* + $($(#[$meta])* $con($ty)),* } $(impl From<$ty> for AddressHash { @@ -46,8 +53,11 @@ macro_rules! address_hash_type_and_impls { } address_hash_type_and_impls! { + /// Variant for implicit addresses, `tz1...`, `tz2...`, etc. Implicit(KeyHash), + /// Variant for smart contract addresses, `KT1...`. Kt1(ContractKt1Hash), + /// Variant for smart rollup addresses, `sr1...`. Sr1(SmartRollupHash), } @@ -106,11 +116,14 @@ const TAG_SR1: u8 = 3; const PADDING_ORIGINATED: &[u8] = &[0]; impl AddressHash { - // all address hashes are 20 bytes in length + /// Byte size of a raw address hash. + /// All address hashes are 20 bytes in length. pub const HASH_SIZE: usize = 20; - // +2 for tags: implicit addresses use 2-byte, and KT1/sr1 add zero-byte - // padding to the end + /// Byte size for the Tezos representation of an address hash. + /// [Self::HASH_SIZE] + 2 for tags: implicit addresses use 2-byte tag, and + /// KT1/sr1 add zero-byte padding to the end pub const BYTE_SIZE: usize = Self::HASH_SIZE + 2; + /// Byte length of Base58 representation of address hashes. pub const BASE58_SIZE: usize = 36; } diff --git a/contrib/mir/src/ast/michelson_address/entrypoint.rs b/contrib/mir/src/ast/michelson_address/entrypoint.rs index 3cb3d9800d55..70e520dc8b20 100644 --- a/contrib/mir/src/ast/michelson_address/entrypoint.rs +++ b/contrib/mir/src/ast/michelson_address/entrypoint.rs @@ -5,6 +5,9 @@ /* */ /******************************************************************************/ +//! Structures and utilities for [Tezos +//! entrypoints](https://docs.tezos.com/smart-contracts/entrypoints). + use std::collections::HashMap; use crate::ast::annotations::FieldAnnotation; @@ -12,9 +15,14 @@ use crate::ast::Type; use super::ByteReprError; +/// Structure representing address entrypoint on a Tezos address, in other +/// words, the part after `%` in `KT1BRd2ka5q2cPRdXALtXD1QZ38CPam2j1ye%foo`. +/// Tezos entrypoints are ASCII strings of at most 31 characters long. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq, Hash)] pub struct Entrypoint(String); +/// A structure mapping from entrypoints to their types. This is simply an alias +/// for a [HashMap]. pub type Entrypoints = HashMap; impl std::fmt::Display for Entrypoint { @@ -35,14 +43,17 @@ impl Default for Entrypoint { } impl Entrypoint { + /// Returns `true` if entrypoint is the default entrypoint. pub fn is_default(&self) -> bool { self.0 == DEFAULT_EP_NAME } + /// Returns a reference to the entrypoint name as bytes. pub fn as_bytes(&self) -> &[u8] { self.0.as_bytes() } + /// Returns a reference to the entrypoint name as [str]. pub fn as_str(&self) -> &str { self.0.as_str() } diff --git a/contrib/mir/src/ast/michelson_key.rs b/contrib/mir/src/ast/michelson_key.rs index 808146ae7b1c..b41039073078 100644 --- a/contrib/mir/src/ast/michelson_key.rs +++ b/contrib/mir/src/ast/michelson_key.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `key` values. + use tezos_crypto_rs::{ hash::{Hash, HashTrait, PublicKeyBls, PublicKeyEd25519, PublicKeyP256, PublicKeySecp256k1}, PublicKeyWithHash, @@ -16,10 +18,11 @@ use super::{ }; macro_rules! key_type_and_impls { - ($($con:ident($ty:ident)),* $(,)*) => { + ($($(#[$meta:meta])* $con:ident($ty:ident)),* $(,)*) => { + /// Tezos public key. Corresponds to the `key` Michelson type. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq)] pub enum Key { - $($con($ty)),* + $($(#[$meta])* $con($ty)),* } $(impl From<$ty> for Key { @@ -47,9 +50,13 @@ macro_rules! key_type_and_impls { } key_type_and_impls! { + /// Ed25519 public key. Ed25519(PublicKeyEd25519), + /// Secp256k1 public key. Secp256k1(PublicKeySecp256k1), + /// P256 public key. P256(PublicKeyP256), + /// BLS public key. Bls(PublicKeyBls), } @@ -84,10 +91,12 @@ const TAG_P256: u8 = 2; const TAG_BLS: u8 = 3; impl Key { - /// Smallest key size + /// Smallest key byte size in base58-check encoding. pub const MIN_BASE58_SIZE: usize = 54; + /// Smallest key byte size when represented as raw bytes. pub const MIN_BYTE_SIZE: usize = 32; + /// Construct a [KeyHash] from the key. Essentially hashes the key. pub fn hash(&self) -> KeyHash { use Key::*; // unwrap because errors should be literally impossible, any bytestring diff --git a/contrib/mir/src/ast/michelson_key_hash.rs b/contrib/mir/src/ast/michelson_key_hash.rs index 79b197622f55..cc98dfffaf7c 100644 --- a/contrib/mir/src/ast/michelson_key_hash.rs +++ b/contrib/mir/src/ast/michelson_key_hash.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `key_hash` values. + use tezos_crypto_rs::hash::{ ContractTz1Hash, ContractTz2Hash, ContractTz3Hash, ContractTz4Hash, Hash, HashTrait, }; @@ -12,10 +14,12 @@ use tezos_crypto_rs::hash::{ use super::{ByteReprError, ByteReprTrait}; macro_rules! key_hash_type_and_impls { - ($($con:ident($ty:ident)),* $(,)*) => { + ($($(#[$meta:meta])* $con:ident($ty:ident)),* $(,)*) => { + /// Public key hash. Public key hashes are used to represent implicit + /// Tezos addresses. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq, Hash)] pub enum KeyHash { - $($con($ty)),* + $($(#[$meta])* $con($ty)),* } $(impl From<$ty> for KeyHash { @@ -43,9 +47,13 @@ macro_rules! key_hash_type_and_impls { } key_hash_type_and_impls! { + /// A hash of a Ed25519 public key, `tz1...` in base58-check encoding. Tz1(ContractTz1Hash), + /// A hash of a SecP256k1 public key, `tz2...` in base58-check encoding. Tz2(ContractTz2Hash), + /// A hash of a P256 public key, `tz3...` in base58-check encoding. Tz3(ContractTz3Hash), + /// A hash of a BLS public key, `tz4...` in base58-check encoding. Tz4(ContractTz4Hash), } @@ -80,9 +88,13 @@ const TAG_TZ3: u8 = 2; const TAG_TZ4: u8 = 3; impl KeyHash { - // all hashes are blake2b 160-bit hashes + /// Size of the hash in bytes. + /// All hashes are blake2b 160-bit hashes. pub const HASH_SIZE: usize = 20; + /// Size of the representation size in bytes. + /// Corresponds to [Self::HASH_SIZE] + 1, where 1 byte is used for a tag. pub const BYTE_SIZE: usize = Self::HASH_SIZE + 1; // hash size + tag size + /// Byte size of a base58-check encoded `key_hash`. pub const BASE58_SIZE: usize = 36; } diff --git a/contrib/mir/src/ast/michelson_lambda.rs b/contrib/mir/src/ast/michelson_lambda.rs index c1dfd2a760d6..6d1cff9fae6a 100644 --- a/contrib/mir/src/ast/michelson_lambda.rs +++ b/contrib/mir/src/ast/michelson_lambda.rs @@ -5,24 +5,35 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `lambda 'a 'b` values. + use std::rc::Rc; use crate::lexer::Prim; use super::{annotations::NO_ANNS, Instruction, IntoMicheline, Micheline, Type, TypedValue}; +/// Michelson lambda. Can be either non-recursive or recursive. Michelson +/// lambdas carry their own raw [Micheline] representation to ensure consistent +/// roundtripping through `PACK`/`UNPACK`. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Lambda<'a> { + /// Non-recursive lambda. Lambda { + /// Raw [Micheline] representation. micheline_code: Micheline<'a>, + /// Typechecked code. code: Rc<[Instruction<'a>]>, // see Note: Rc in lambdas }, + /// Recursive lambda. LambdaRec { /// Lambda argument type in_ty: Type, /// Lambda result type out_ty: Type, + /// Raw [Micheline] representation. micheline_code: Micheline<'a>, + /// Typechecked code. code: Rc<[Instruction<'a>]>, // see Note: Rc in lambdas }, } @@ -38,9 +49,13 @@ independent of the length of the code (there's a Lambda::clone call in the implementation) */ +/// Either a simple [Lambda], or a partially-applied one, the result of the +/// `APPLY` instruction. #[derive(Debug, Clone, Eq, PartialEq)] pub enum Closure<'a> { + /// Simple [Lambda]. Lambda(Lambda<'a>), + /// Partially-applied [Lambda]. Apply { /// Captured argument type arg_ty: Type, diff --git a/contrib/mir/src/ast/michelson_list.rs b/contrib/mir/src/ast/michelson_list.rs index 5a88a65c87ce..2c78795d223b 100644 --- a/contrib/mir/src/ast/michelson_list.rs +++ b/contrib/mir/src/ast/michelson_list.rs @@ -5,33 +5,41 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `list 'a` values. + /// A representation of a Michelson list. #[derive(Debug, Clone, Eq, PartialEq)] pub struct MichelsonList(Vec); impl MichelsonList { + /// Construct a new empty list. pub fn new() -> Self { MichelsonList(Vec::new()) } + /// Add an element to the start of the list. pub fn cons(&mut self, x: T) { self.0.push(x) } + /// Remove an element from the start of the list. pub fn uncons(&mut self) -> Option { self.0.pop() } + /// Get the list length, i.e. the number of elements. #[allow(clippy::len_without_is_empty)] pub fn len(&self) -> usize { self.0.len() } + /// Construct an iterator over references to the list elements. pub fn iter(&self) -> Iter<'_, T> { // delegate to `impl IntoIterator for &MichelsonList` self.into_iter() } + /// Construct an iterator over mutable references to the list elements. pub fn iter_mut(&mut self) -> impl Iterator { self.0.iter_mut().rev() } @@ -43,6 +51,7 @@ impl Default for MichelsonList { } } +/// Owning iterator for [MichelsonList]. pub struct IntoIter(std::iter::Rev>); impl Iterator for IntoIter { @@ -59,6 +68,7 @@ impl Iterator for IntoIter { impl ExactSizeIterator for IntoIter {} +/// Non-owning iterator for [MichelsonList]. pub struct Iter<'a, T>(std::iter::Rev>); impl<'a, T> Iterator for Iter<'a, T> { diff --git a/contrib/mir/src/ast/michelson_signature.rs b/contrib/mir/src/ast/michelson_signature.rs index aba42cd25ef4..74e5ccf40b83 100644 --- a/contrib/mir/src/ast/michelson_signature.rs +++ b/contrib/mir/src/ast/michelson_signature.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Representation for typed Michelson `signature` values. + use tezos_crypto_rs::{ base58::*, blake2b, @@ -37,12 +39,20 @@ needed. -- @lierdakil */ +/// Common behaviors for Tezos signatures. pub trait SignatureTrait: Sized + AsRef<[u8]> { + /// Raw size of the byte representation (_not_ base58-check encoded). const BYTE_SIZE: usize; + /// Magic prefix used for the base58-check representation. const BASE58_PREFIX: &'static [u8]; + /// Construct signature from raw bytes. This function is potentially unsafe, + /// as in it can break invariants. Use [Self::try_from_bytes] instead. fn from_bytes(bs: &[u8]) -> Self; + /// Try to construct signature from raw bytes. Returns + /// [FromBytesError::InvalidSize] if the input slice length doesn't match + /// [Self::BYTE_SIZE]. fn try_from_bytes(bs: &[u8]) -> Result { if bs.len() == Self::BYTE_SIZE { Ok(Self::from_bytes(bs)) @@ -51,6 +61,7 @@ pub trait SignatureTrait: Sized + AsRef<[u8]> { } } + /// Construct base58-check representation of the signature. fn to_base58_check(&self) -> String { let data = self.as_ref(); let mut hash = Vec::with_capacity(Self::BASE58_PREFIX.len() + data.len()); @@ -60,6 +71,11 @@ pub trait SignatureTrait: Sized + AsRef<[u8]> { .expect("should always be convertible to base58") } + /// Try to construct a signature from its base58-check representation. + /// Returns [FromBase58CheckError] on error, specifically if payload length + /// or prefix doesn't match [Self::BYTE_SIZE] and [Self::BASE58_PREFIX] + /// respectively. Naturally, also if base58 encoding is invalid or if + /// checksum doesn't match. fn from_b58check(s: &str) -> Result { let bytes = s.from_base58check()?; let expected_len = Self::BASE58_PREFIX.len() + Self::BYTE_SIZE; @@ -78,7 +94,8 @@ pub trait SignatureTrait: Sized + AsRef<[u8]> { } macro_rules! defsignature { - ($name:ident, $size:literal, $prefix:expr) => { + ($(#[$meta:meta])* $name:ident, $size:literal, $prefix:expr) => { + $(#[$meta])* #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq)] pub struct $name(hash::Signature); @@ -98,16 +115,41 @@ macro_rules! defsignature { }; } -defsignature!(Ed25519Signature, 64, [9, 245, 205, 134, 18]); // edsig(99) -defsignature!(Secp256k1Signature, 64, [13, 115, 101, 19, 63]); // spsig1(99) -defsignature!(P256Signature, 64, [54, 240, 44, 52]); // p2sig(98) -defsignature!(GenericSignature, 64, [4, 130, 43]); // sig(96) +defsignature!( + /// Ed25519 signature, `edsig...`. + Ed25519Signature, + 64, + [9, 245, 205, 134, 18] +); // edsig(99) +defsignature!( + /// Secp256k1 signature, `spsig...` + Secp256k1Signature, + 64, + [13, 115, 101, 19, 63] +); // spsig1(99) +defsignature!( + /// P256 signature, `p2sig...` + P256Signature, + 64, + [54, 240, 44, 52] +); // p2sig(98) +defsignature!( + /// Generic signature. Since raw byte representation of signatures are + /// untagged, and Ed25519, Secp256k1 and P256 signatures have the same + /// length, the type of the signature isn't known when constructing it from + /// raw bytes. This signature of yet unknown type is represented by the + /// generic signature, `sig...`. + GenericSignature, + 64, + [4, 130, 43] +); // sig(96) macro_rules! key_type_and_impls { - ($($con:ident($ty:path)),* $(,)*) => { + ($($(#[$meta:meta])* $con:ident($ty:path)),* $(,)*) => { + /// Enum representing arbitrary signature. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq)] pub enum Signature { - $($con($ty)),* + $($(#[$meta])* $con($ty)),* } $(impl From<$ty> for Signature { @@ -135,10 +177,16 @@ macro_rules! key_type_and_impls { } key_type_and_impls! { + /// Ed25519 signature, `edsig...`. Ed25519(Ed25519Signature), + /// Secp256k1 signature, `spsig...`. Secp256k1(Secp256k1Signature), + /// P256 signature, `p2sig...`. P256(P256Signature), + /// BLS signature, `BLsig...`. Bls(BlsSignature), + /// Signature of yet-unknown type. Can be either Ed25519, Secp256k1 or P256, + /// but not BLS. See [GenericSignature] for more information. Generic(GenericSignature), // See Note: [Generic signatures] } @@ -263,6 +311,8 @@ impl Signature { /// This is byte-length of `edsig`, `spsig1`, `p2sig` and `sig` variants. pub const GENERIC_BYTE_LENGTH: usize = 64; + /// Check the signature against a given message and public key. Returns + /// `true` if the signature is correct, `false` otherwise. pub fn check(&self, key: &super::michelson_key::Key, msg: &[u8]) -> bool { use super::michelson_key::Key; use Signature::*; diff --git a/contrib/mir/src/ast/or.rs b/contrib/mir/src/ast/or.rs index 6e55775abf26..d97c281beb66 100644 --- a/contrib/mir/src/ast/or.rs +++ b/contrib/mir/src/ast/or.rs @@ -5,13 +5,22 @@ /* */ /******************************************************************************/ +//! General-purpose sum type and some utilities. Used to represent Michelson `or +//! 'a 'b` type. + +/// A simple binary sum type, corresponding to the Michelson `Or` type. It can +/// also be used as a general-purpose sum type where [Result] doesn't quite fit. #[derive(Debug, Clone, Eq, PartialOrd, Ord, PartialEq)] pub enum Or { + /// "Left" variant of the sum. Left(L), + /// "Right" variant of the sum. Right(R), } impl Or { + /// For an [Or] that has the same type in both branches, transform a value + /// inside the [Or] using a supplied function. pub fn map(self, f: impl FnOnce(T) -> U) -> Or { match self { Self::Left(x) => Or::Left(f(x)), @@ -21,6 +30,8 @@ impl Or { } impl Or { + /// Depending on whether the [Or] contains [Or::Left] or [Or::Right], use + /// correspondingly either `f` or `g` to transform the value inside. pub fn bimap(self, f: impl FnOnce(T) -> V, g: impl FnOnce(U) -> W) -> Or { match self { Self::Left(x) => Or::Left(f(x)), diff --git a/contrib/mir/src/ast/overloads.rs b/contrib/mir/src/ast/overloads.rs index 63ffced8a7f0..e296ffbd067c 100644 --- a/contrib/mir/src/ast/overloads.rs +++ b/contrib/mir/src/ast/overloads.rs @@ -5,6 +5,12 @@ /* */ /******************************************************************************/ +//! Various overloads for different instructions. The name of the enum +//! corresponds to the name of the instruction, while the variant name +//! corresponds to the stack types used for the particular overload. + +#![allow(missing_docs)] + #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Add { IntInt, diff --git a/contrib/mir/src/lib.rs b/contrib/mir/src/lib.rs index 2e25ce9bc988..e38fbb779377 100644 --- a/contrib/mir/src/lib.rs +++ b/contrib/mir/src/lib.rs @@ -5,19 +5,25 @@ /* */ /******************************************************************************/ #![warn(clippy::redundant_clone)] +#![warn(missing_docs)] #![deny(clippy::disallowed_methods)] +//! # M.I.R. -- Michelson in Rust +//! +//! Rust implementation of the typechecker and interpreter for the Michelson +//! smart contract language. + pub mod ast; pub mod bls; pub mod context; pub mod gas; pub mod interpreter; -pub mod irrefutable_match; +mod irrefutable_match; pub mod lexer; pub mod parser; pub mod serializer; pub mod stack; -pub mod syntax; +mod syntax; pub mod typechecker; pub mod tzt; -- GitLab From 5a95e875359de47e0c902494ef08d775a4c83721 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 21:35:48 +0300 Subject: [PATCH 05/20] MIR: stack module documentation --- contrib/mir/src/stack.rs | 53 +++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/contrib/mir/src/stack.rs b/contrib/mir/src/stack.rs index 2c025e534876..608c00710902 100644 --- a/contrib/mir/src/stack.rs +++ b/contrib/mir/src/stack.rs @@ -5,12 +5,17 @@ /* */ /******************************************************************************/ +//! Utilities and types for representing a stack. + use std::ops::{Index, IndexMut}; use std::slice::SliceIndex; use crate::ast::*; +/// Stack of [Type]s. pub type TypeStack = Stack; + +/// Stack of [TypedValue]s. Named `IStack` for "interpeter stack". pub type IStack<'a> = Stack>; /// Possibly failed type stack. Stacks are considered failed after @@ -18,7 +23,9 @@ pub type IStack<'a> = Stack>; /// typechecking) with any other stack. #[derive(Debug, PartialEq, Eq, Clone)] pub enum FailingTypeStack { + /// The stack isn't failed. Ok(TypeStack), + /// The stack is failed. Failed, } @@ -76,12 +83,27 @@ impl Stack { len.checked_sub(i + 1).expect("out of bounds stack access") } + /// Removes and returns the element at position `i` within the stack, where + /// 0 corresponds to the top, shifting all elements below it up. This has + /// worst-case complexity of O(n). + /// + /// # Panics + /// + /// When `i` is larger or equal to the length of the stack. pub fn remove(&mut self, i: usize) -> T { self.0.remove(self.vec_index(i)) } - /// Insert an element at i'th stack index, such that after the call there are `i` number of - /// elements before the newly inserted element. + /// Insert an element at i'th stack index, such that after the call there + /// are `i` number of elements before the newly inserted element. `insert(0, + /// x)` is equivalent to `push(x)`. + /// + /// This has to move elements of the stack after insertion, so worst-case + /// complexity is O(n). + /// + /// # Panics + /// + /// If `i` is larger than the length of the stack. pub fn insert(&mut self, i: usize, e: T) { if i > 0 { // We subtract one from the index because since our stack is inverted, the insertion @@ -135,6 +157,12 @@ impl Stack { .rev() } + /// Reserve additional space on the stack for at least `additional` + /// elements. Similar to [Vec::reserve]. + /// + /// # Panics + /// + /// If the new capacity exceeds [isize::MAX] bytes. pub fn reserve(&mut self, additional: usize) { self.0.reserve(additional) } @@ -147,6 +175,8 @@ impl Stack { /// Split off the top `size` elements of the stack into a new `Stack`. /// + /// # Panics + /// /// Panics if the `size` is larger than length of the stack. pub fn split_off(&mut self, size: usize) -> Stack { let len = self.len(); @@ -176,21 +206,21 @@ impl Stack { } } -/// Newtype for specifying the order of elements in a `Stack` vs elements in -/// a `Vec`/slice. Used in the `From` trait for `Stack`. _First_ element of -/// the `Vec` will end up at the _top_ of the stack. `from()` conversion has -/// O(n) complexity. See also `TopIsLast`. +/// Newtype for specifying the order of elements in a [Stack] vs elements in +/// a [Vec]/slice. Used in the [From] trait for [Stack]. _First_ element of +/// the [Vec] will end up at the _top_ of the stack. `from()` conversion has +/// O(n) complexity. See also [TopIsLast]. /// /// `from_iter()` implementation is slightly inefficient, still O(n), but /// the constant is a bit higher than necessary. If you're worried about -/// efficiency, consider using `TopIsLast` with an explicit `rev()`. +/// efficiency, consider using [TopIsLast] with an explicit `rev()`. pub struct TopIsFirst(pub Stack); -/// Newtype for specifying the order of elements in a `Stack` vs elements in -/// a `Vec`/slice. Used in the `From` trait for `Stack`. _First_ element of -/// the `Vec` will end up at the _bottom_ of the stack. `from()` conversion +/// Newtype for specifying the order of elements in a [Stack] vs elements in +/// a [Vec]/slice. Used in the [From] trait for [Stack]. _First_ element of +/// the [Vec] will end up at the _bottom_ of the stack. `from()` conversion /// has O(1) complexity for vectors, O(n) for slices since those have to be -/// cloned. See also `TopIsFirst` +/// cloned. See also [TopIsFirst]. pub struct TopIsLast(pub Stack); impl From> for TopIsFirst { @@ -256,6 +286,7 @@ impl IndexMut for Stack { } } +/// Owning [Stack] iterator. pub struct IntoIter(std::iter::Rev>); impl Iterator for IntoIter { -- GitLab From 435b00d811f7348a2a32731bd69bf19df595fc90 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 21:51:55 +0300 Subject: [PATCH 06/20] MIR: documentation for the parser module --- contrib/mir/src/parser.rs | 19 +++++++++++++++++-- contrib/mir/src/parser/macros.rs | 7 +++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/contrib/mir/src/parser.rs b/contrib/mir/src/parser.rs index 95449255cb68..5e5c583949d6 100644 --- a/contrib/mir/src/parser.rs +++ b/contrib/mir/src/parser.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Michelson parser. + pub mod macros; use crate::ast::*; @@ -15,15 +17,21 @@ use logos::Logos; use macros::MacroError; use typed_arena::Arena; +/// Errors that can happen during parsing, aside from parser-specific ones. #[derive(Debug, PartialEq, thiserror::Error)] pub enum ParserError { + /// An error happened at the lexer stage. #[error(transparent)] LexerError(#[from] LexerError), + /// An error happened during macro expansion. #[error(transparent)] MacroError(#[from] MacroError), } +/// A parser for Michelson. Carries an [Arena] for placing [Micheline] nodes +/// into. pub struct Parser<'a> { + /// The [Arena] to place [Micheline] nodes into. pub arena: Arena>, } @@ -34,16 +42,21 @@ impl Default for Parser<'_> { } impl<'a> Parser<'a> { + /// Construct a new parser. pub fn new() -> Self { Parser { arena: Arena::new(), } } + /// Parse Michelson code or value into [Micheline]. pub fn parse(&'a self, src: &'a str) -> Result> { syntax::MichelineNakedParser::new().parse(&self.arena, spanned_lexer(src)) } + /// Parse Michelson script into [Micheline]. Top-level refers to a full + /// Michelson script, i.e. something that contains `parameter`, `storage` + /// and `code` fields. pub fn parse_top_level( &'a self, src: &'a str, @@ -52,8 +65,10 @@ impl<'a> Parser<'a> { } } -pub fn spanned_lexer( - src: &'_ str, +/// Given a Michelson string, create an iterator over lexemes in that string, +/// with location information attached. +pub(crate) fn spanned_lexer( + src: &str, ) -> impl Iterator> + '_ { Tok::lexer(src) .spanned() diff --git a/contrib/mir/src/parser/macros.rs b/contrib/mir/src/parser/macros.rs index 35e290f90913..75594f0f5d49 100644 --- a/contrib/mir/src/parser/macros.rs +++ b/contrib/mir/src/parser/macros.rs @@ -5,17 +5,24 @@ /* */ /******************************************************************************/ +//! Macro substitutions. + use super::*; use crate::lexer::macros::*; use crate::lexer::Prim; +/// Errors possible during macro expansion. #[derive(Debug, PartialEq, Eq, thiserror::Error)] pub enum MacroError { + /// Macro is applied to an unexpected (i.e. invalid) number of arguments, + /// e.g. `FAIL {}`, or `IF_SOME` without arguments. #[error("unexpected number of arguments for macro: {0}")] UnexpectedArgumentCount(Macro), } +/// Expand a macro in raw [Micheline]. Requires access to an [Arena] in order to +/// allocate the new instructions the macro was expanded to. pub fn expand_macro<'a>( arena: &'a Arena>, m: &Macro, -- GitLab From 6f16eedaded40271561e8126742a7daf3dbe6910 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Tue, 26 Dec 2023 22:17:34 +0300 Subject: [PATCH 07/20] MIR: documentation for the lexer module --- contrib/mir/src/lexer.rs | 44 ++++++++++++++++++++++++++++++--- contrib/mir/src/lexer/errors.rs | 11 +++++++++ contrib/mir/src/lexer/macros.rs | 13 +++++++++- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/contrib/mir/src/lexer.rs b/contrib/mir/src/lexer.rs index cc87f28329ff..68412ae5b3de 100644 --- a/contrib/mir/src/lexer.rs +++ b/contrib/mir/src/lexer.rs @@ -5,6 +5,10 @@ /* */ /******************************************************************************/ +//! Michelson lexer. The main lexer entrypoint is defined on the [Tok] type, +//! specifically, `Tok::lexer`. See [Logos::lexer]. Generally, you don't need to +//! call the lexer explicitly, [crate::parser::Parser] will do that for you. + use std::borrow::Cow; use logos::Logos; @@ -30,10 +34,11 @@ macro_rules! coalesce { /// provided, and defines `FromStr` implementation using stringified /// representation of the identifiers. macro_rules! defprim { - ($ty:ident; $($(#[token($str:expr)])? $prim:ident),* $(,)*) => { + ($(#[$meta:meta])* $ty:ident; $($(#[token($str:expr)])? $prim:ident),* $(,)*) => { #[derive(Debug, Clone, Copy, PartialEq, Eq, EnumCount)] - #[allow(non_camel_case_types, clippy::upper_case_acronyms)] + #[allow(non_camel_case_types, clippy::upper_case_acronyms, missing_docs)] #[repr(u8)] + $(#[$meta])* pub enum $ty { $($prim),* } @@ -70,6 +75,7 @@ macro_rules! defprim { // TODO: https://gitlab.com/tezos/tezos/-/issues/6632 // Add a test on ordering defprim! { + /// Micheline primitives. Prim; parameter, storage, code, False, Elt, Left, None, Pair, Right, Some, True, Unit, @@ -109,14 +115,17 @@ defprim! { } impl Prim { - // Our [Prim] enum has its variants in the right order, so its - // discriminant should match the ID. + /// Write the primitive identifier (as per Micheline binary encoding) into + /// the output vector. pub fn encode(&self, out: &mut Vec) { + // Our [Prim] enum has its variants in the right order, so its + // discriminant should match the ID. out.push(*self as u8) } } defprim! { + /// Additional TZT primitives. TztPrim; Stack_elt, input, @@ -135,22 +144,37 @@ defprim! { Contract, } +/// Either a Micheline primitive, TZT primitive, or a macro lexeme. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Noun { + /// Micheline primitive. Prim(Prim), + /// TZT primitive. TztPrim(TztPrim), + /// Macro lexeme. MacroPrim(Macro), } +/// A single Micheline annotation. Annotations are optionally-owned, meaning +/// they should use references when feasible, but can use owned heap-allocated +/// values when necessary. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Annotation<'a> { + /// Special annotation, i.e. `@%`, `@%%` or `%@` verbatim. Special(Cow<'a, str>), + /// Field annotation, e.g. `%foo`. The inner value does not contain the + /// leading `%`. Field(Cow<'a, str>), + /// Variable annotation, e.g. `@foo`. The inner value does not contain the + /// leading `@`. Variable(Cow<'a, str>), + /// Type annotation, e.g. `:foo`. The inner value does not contain the + /// leading `:`. Type(Cow<'a, str>), } impl Annotation<'_> { + /// Convert the inner value of [Annotation] to an owned [String]. pub fn into_owned(self) -> Annotation<'static> { match self { Annotation::Special(s) => Annotation::Special(Cow::Owned(s.into_owned())), @@ -184,33 +208,45 @@ pub(crate) fn try_ann_from_str(value: &str) -> Option { } } +/// Tokens representing Michelson lexemes. #[derive(Debug, Clone, PartialEq, Eq, Logos)] #[logos(error = LexerError, skip r"[ \t\r\n\v\f]+|#[^\n]*\n")] pub enum Tok<'a> { + /// A primitive token: a Micheline primitive, TZT primitive, or a macro + /// token. #[regex(r"[A-Za-z_][A-Za-z_0-9]*", lex_noun)] Noun(Noun), + /// Number literal. #[regex("([+-]?)[0-9]+", lex_number)] Number(BigInt), + /// String literal. #[regex(r#""(\\.|[^\\"])*""#, lex_string)] String(String), + /// Bytes literal. #[regex(r#"0x[0-9a-fA-F]*"#, lex_bytes)] Bytes(Vec), + /// An annotation, see [Annotation]. // regex as per https://tezos.gitlab.io/active/michelson.html#syntax #[regex(r"@%|@%%|%@|[@:%][_0-9a-zA-Z][_0-9a-zA-Z\.%@]*", lex_annotation)] Annotation(Annotation<'a>), + /// Left parenthesis `(`. #[token("(")] LParen, + /// Right parenthesis `)`. #[token(")")] RParen, + /// Left brace `{`. #[token("{")] LBrace, + /// Right brace `}`. #[token("}")] RBrace, + /// Semicolon. #[token(";")] Semi, } diff --git a/contrib/mir/src/lexer/errors.rs b/contrib/mir/src/lexer/errors.rs index 04d8ae70c3cb..8792f0346578 100644 --- a/contrib/mir/src/lexer/errors.rs +++ b/contrib/mir/src/lexer/errors.rs @@ -5,23 +5,34 @@ /* */ /******************************************************************************/ +//! Errors possible during the lexing stage. + +/// Unknown primitive error #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] #[error("unknown primitive: {0}")] pub struct PrimError(pub String); +/// Errors possible during the lexing stage. #[derive(Debug, PartialEq, Clone, thiserror::Error, Default)] pub enum LexerError { + /// Unrecognized token. #[error("unknown token")] #[default] UnknownToken, + /// Parsing of a numeric literal failed. This shouldn't generally happen + /// except when parsing tzt. #[error("parsing of numeric literal {0} failed")] NumericLiteral(String), + /// Found a forbidden character in a string literal. #[error("forbidden character found in string literal \"{0}\"")] ForbiddenCharacterIn(String), + /// Found an undefined escape sequence in a string literal. #[error("undefined escape sequence: \"\\{0}\"")] UndefinedEscape(char), + /// Unknown primitive. #[error(transparent)] PrimError(#[from] PrimError), + /// Invalid hexadecimal sequence in a byte literal. #[error("invalid hex sequence: {0}")] InvalidHex(#[from] hex::FromHexError), } diff --git a/contrib/mir/src/lexer/macros.rs b/contrib/mir/src/lexer/macros.rs index 15ff9f5ace98..1ec109f568eb 100644 --- a/contrib/mir/src/lexer/macros.rs +++ b/contrib/mir/src/lexer/macros.rs @@ -5,20 +5,27 @@ /* */ /******************************************************************************/ +//! Utilities for lexing macros. + use logos::*; use super::super::ast::Micheline; use super::errors::*; +/// Arguments, to which a macro is applied. #[derive(Debug)] pub enum MacroArgs<'a> { + /// A macro was applied to no arguments. NoArgs, + /// A macro is applied to one argument. OneArg(Micheline<'a>), + /// A macro is applied to two arguments. TwoArgs(Micheline<'a>, Micheline<'a>), } +/// Enum representing macro names. #[derive(Debug, Clone, PartialEq, Eq, Logos)] -#[allow(non_camel_case_types, clippy::upper_case_acronyms)] +#[allow(non_camel_case_types, clippy::upper_case_acronyms, missing_docs)] #[logos(error = LexerError)] pub enum Macro { #[token("CMPEQ")] @@ -39,8 +46,12 @@ pub enum Macro { ASSERT_CMPLE, #[token("FAIL")] FAIL, + /// Corresponds to `DI..IP` macro. The value carried by the variant + /// corresponds to the number of `I`s. #[regex("DII+P", lex_diip)] DIIP(u16), + /// Corresponds to `DU..UP` macro. The value carried by the variant + /// corresponds to the number of `U`s. #[regex("DUU+P", lex_duup)] DUUP(u16), } -- GitLab From 282e0cb886316894cfff2e3bd0d7238de2e78aeb Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 14:49:12 +0300 Subject: [PATCH 08/20] MIR: documentation for the bls module --- contrib/mir/src/bls.rs | 2 ++ contrib/mir/src/bls/fr.rs | 13 +++++++++++++ contrib/mir/src/bls/g1.rs | 21 ++++++++++++++++++++- contrib/mir/src/bls/g2.rs | 21 ++++++++++++++++++++- contrib/mir/src/bls/pairing.rs | 2 ++ 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/contrib/mir/src/bls.rs b/contrib/mir/src/bls.rs index 16880ba6d526..b28765469e83 100644 --- a/contrib/mir/src/bls.rs +++ b/contrib/mir/src/bls.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! BLS12-381 data types and operations. + pub mod fr; pub mod g1; pub mod g2; diff --git a/contrib/mir/src/bls/fr.rs b/contrib/mir/src/bls/fr.rs index cb60674aefd7..2a929ac30695 100644 --- a/contrib/mir/src/bls/fr.rs +++ b/contrib/mir/src/bls/fr.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Definitions for [Fr], an element of the BLS12-381 scalar field Fr + use std::mem::MaybeUninit; use blst::*; @@ -12,12 +14,17 @@ use num_bigint::{BigInt, Sign}; use num_traits::One; use std::ops::{Add, Mul, Neg}; +/// An element of the BLS12-381 scalar field Fr #[derive(PartialEq, Eq, Debug, Clone)] pub struct Fr(blst_fr); impl Fr { + /// Size of serialized data in bytes. pub const BYTE_SIZE: usize = 32; + /// Construct [Fr] from a scalar represented as raw bytes. If the scalar + /// isn't part of the field, or if its length exceeds [Self::BYTE_SIZE], the + /// result is [None]. pub fn from_bytes(bs: &[u8]) -> Option { if bs.len() > Self::BYTE_SIZE { return None; @@ -39,6 +46,7 @@ impl Fr { mbfr.map(Self) } + /// Construct a scalar represented as raw bytes from [Fr]. pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] { let mut out = [0; Self::BYTE_SIZE]; let mut scalar = MaybeUninit::uninit(); @@ -64,6 +72,8 @@ impl Fr { }) } + /// Construct [Fr] from a [BigInt] scalar. The scalar is taken modulo group + /// order. pub fn from_big_int(i: &BigInt) -> Self { // this would be likely slightly more efficient with rem_euclid, but // it's only added in num-bigint 0.4, and we're stuck with 0.3 for now @@ -75,14 +85,17 @@ impl Fr { Self::from_bytes(&buf).unwrap() } + /// Construct a [BigInt] scalar from [Fr]. pub fn to_big_int(&self) -> BigInt { BigInt::from_bytes_le(Sign::Plus, &self.to_bytes()) } + /// Additive identity in the Fr field. pub fn zero() -> Self { Self::from_bytes(&[]).unwrap() } + /// Multiplicative identity in the Fr field. pub fn one() -> Self { Self::from_bytes(&[1]).unwrap() } diff --git a/contrib/mir/src/bls/g1.rs b/contrib/mir/src/bls/g1.rs index 2abc9cbdce62..e075e1a87cc8 100644 --- a/contrib/mir/src/bls/g1.rs +++ b/contrib/mir/src/bls/g1.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Definitions for [G1], a point on the BLS12-381 curve G1. + use blst::*; use std::{ mem::MaybeUninit, @@ -13,12 +15,17 @@ use std::{ use super::fr::Fr; +/// A point on the BLS12-381 curve G1. #[derive(PartialEq, Eq, Debug, Clone)] pub struct G1(blst_p1); impl G1 { - const BYTE_SIZE: usize = 96; + /// Byte size of serialized data. + pub const BYTE_SIZE: usize = 96; + /// Construct [G1] from raw bytes representing a point on G1 in + /// affine coordinates. If data size is not [Self::BYTE_SIZE] or the point + /// is not on G1, the result is [None]. pub fn from_bytes(bs: &[u8]) -> Option { if bs.len() != Self::BYTE_SIZE { None @@ -58,6 +65,8 @@ impl G1 { } } + /// Serialize [G1] to a byte array, representing a point on G1 in + /// affine coordinates. pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] { let mut out = [0; Self::BYTE_SIZE]; unsafe { @@ -66,6 +75,7 @@ impl G1 { out } + /// Additive identity on G1. pub fn zero() -> Self { G1::from_bytes(&[ 0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -76,10 +86,19 @@ impl G1 { .unwrap() } + /// Multiplicative identity on G1. pub fn one() -> Self { Self::from_affine(unsafe { blst::BLS12_381_G1 }).unwrap() } + /// Additive inverse of [`G1::one()`], such that + /// + /// ``` + /// # use mir::bls::g1::G1; + /// # assert!( + /// G1::one() + G1::neg_one() == G1::zero() + /// # ); + /// ``` pub fn neg_one() -> Self { Self::from_affine(unsafe { blst::BLS12_381_NEG_G1 }).unwrap() } diff --git a/contrib/mir/src/bls/g2.rs b/contrib/mir/src/bls/g2.rs index ce803474a09c..971408f3f048 100644 --- a/contrib/mir/src/bls/g2.rs +++ b/contrib/mir/src/bls/g2.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Definitions for [G2], a point on the BLS12-381 curve G2. + use blst::*; use std::{ mem::MaybeUninit, @@ -13,12 +15,17 @@ use std::{ use super::fr::Fr; +/// A point on the BLS12-381 curve G2. #[derive(PartialEq, Eq, Debug, Clone)] pub struct G2(blst_p2); impl G2 { - const BYTE_SIZE: usize = 192; + /// Byte size of serialized data. + pub const BYTE_SIZE: usize = 192; + /// Construct [G2] from raw bytes representing a point on G2 in + /// affine coordinates. If data size is not [Self::BYTE_SIZE] or the point + /// is not on G2, the result is [None]. pub fn from_bytes(bs: &[u8]) -> Option { if bs.len() != Self::BYTE_SIZE { None @@ -58,6 +65,8 @@ impl G2 { } } + /// Serialize [G2] to a byte array, representing a point on G2 in + /// affine coordinates. pub fn to_bytes(&self) -> [u8; Self::BYTE_SIZE] { let mut out = [0; Self::BYTE_SIZE]; unsafe { @@ -66,6 +75,7 @@ impl G2 { out } + /// Additive identity on G2. pub fn zero() -> Self { G2::from_bytes(&[ 0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -79,10 +89,19 @@ impl G2 { .unwrap() } + /// Multiplicative identity on G2. pub fn one() -> Self { Self::from_affine(unsafe { blst::BLS12_381_G2 }).unwrap() } + /// Additive inverse of [`G2::one()`], such that + /// + /// ``` + /// # use mir::bls::g2::G2; + /// # assert!( + /// G2::one() + G2::neg_one() == G2::zero() + /// # ); + /// ``` pub fn neg_one() -> Self { Self::from_affine(unsafe { blst::BLS12_381_NEG_G2 }).unwrap() } diff --git a/contrib/mir/src/bls/pairing.rs b/contrib/mir/src/bls/pairing.rs index 11f199561a9d..d5f99c0f269e 100644 --- a/contrib/mir/src/bls/pairing.rs +++ b/contrib/mir/src/bls/pairing.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Check a BLS12-381 pairing. + use std::borrow::Borrow; use blst::*; -- GitLab From 12f59fe0b2bdc0481ebdabba5bf69796c3cd6e91 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 15:07:31 +0300 Subject: [PATCH 09/20] MIR: documentation for the serializer module --- contrib/mir/src/serializer.rs | 22 ++++++++++++++--- contrib/mir/src/serializer/decode.rs | 8 +++++++ contrib/mir/src/serializer/encode.rs | 35 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/contrib/mir/src/serializer.rs b/contrib/mir/src/serializer.rs index ed2f8dec8b4c..0c1c9e3e0204 100644 --- a/contrib/mir/src/serializer.rs +++ b/contrib/mir/src/serializer.rs @@ -1,4 +1,20 @@ -pub(self) mod constants; -pub mod decode; -pub mod encode; +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Serialization to and deserialization from bytes. Used for `PACK` and +//! `UNPACK` instructions respectively, but can be used for general-purpose +//! Michelson data serialization as well. +//! +//! Functions are defined as associated functions on [crate::ast::Micheline], +//! see it for more. + +mod constants; +mod decode; +mod encode; mod integration_tests; + +pub use {decode::*, encode::*}; diff --git a/contrib/mir/src/serializer/decode.rs b/contrib/mir/src/serializer/decode.rs index 38d8960ed050..986d5f69447b 100644 --- a/contrib/mir/src/serializer/decode.rs +++ b/contrib/mir/src/serializer/decode.rs @@ -23,20 +23,28 @@ use crate::{ lexer::{try_ann_from_str, Annotation, Prim}, }; +/// Errors that can happen during deserialization. #[derive(PartialEq, Debug, Clone, Copy, thiserror::Error)] pub enum DecodeError { + /// Trailing bytes present after decoding a value. #[error("trailing bytes after decoding the value")] TrailingBytes, + /// Expected PACK format, but no leading 0x05 byte found. #[error("PACK tag 0x05 not found")] NoPackTag, + /// Expected more data, but found EOF. #[error("expected more data, but got EOF")] UnexpectedEOF, + /// Unknown data tag. #[error("unknown tag: {0}")] UnknownTag(u8), + /// Forbidden character found during string deserialization. #[error("forbidden character in string")] ForbiddenStringCharacter, + /// Expected a primitive, but could not interpret the byte as a primitive. #[error("unknown primitive tag: {0}")] UnknownPrim(u8), + /// Failed to deserialize an annotation. #[error("could not decode annotation")] BadAnnotation, } diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index dc415e538372..e4cc3faad805 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -73,6 +73,25 @@ impl<'a> AppEncoder<'a> for &'a [Micheline<'a>] { } impl Annotation<'_> { + /// Serialize annotation to the output byte [Vec], using the `PACK` format. + /// Essentially this means write the annotation with the corresponding tag + /// character verbatim to the output, so, for example, + /// + /// ``` + /// use mir::lexer::Annotation; + /// let mut out = vec![]; + /// Annotation::Field("field".into()).encode_bytes(&mut out); + /// assert_eq!(&out, b"%field"); + /// ``` + /// + /// Note that [Annotation::Special] are written to the output verbatim: + /// + /// ``` + /// use mir::lexer::Annotation; + /// let mut out = vec![]; + /// Annotation::Special("@%".into()).encode_bytes(&mut out); + /// assert_eq!(&out, b"@%"); + /// ``` pub fn encode_bytes(&self, out: &mut Vec) { match self { Annotation::Special(s) => out.extend_from_slice(s.as_bytes()), @@ -93,6 +112,22 @@ impl Annotation<'_> { } impl Annotations<'_> { + /// Serialize a collection of annotations to the output byte [Vec], using + /// the `PACK` format. Essentially this means write 4 bytes of length, + /// followed by annotations with the corresponding tag character verbatim to + /// the output, separated by a space character `0x20`. So, for example, + /// + /// ``` + /// use mir::ast::annotations::Annotations; + /// use mir::lexer::Annotation; + /// let mut out = vec![]; + /// Annotations::from([ + /// Annotation::Field("field".into()), + /// Annotation::Variable("var".into()), + /// ]) + /// .encode_bytes(&mut out); + /// assert_eq!(&out, b"\x00\x00\x00\x0B%field @var"); + /// ``` pub fn encode_bytes(&self, out: &mut Vec) { with_patchback_len(out, |out| { // Add them space-separated -- GitLab From 69c219d0283e620d22962c8cd7a8be9d175e1b55 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 15:28:42 +0300 Subject: [PATCH 10/20] MIR: documentation for the gas module --- contrib/mir/src/gas.rs | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/contrib/mir/src/gas.rs b/contrib/mir/src/gas.rs index fd887450e481..50ce8e5ef528 100644 --- a/contrib/mir/src/gas.rs +++ b/contrib/mir/src/gas.rs @@ -5,34 +5,47 @@ /* */ /******************************************************************************/ +//! Gas accounting and costs. + use num_bigint::{BigInt, BigUint}; +/// Structure carrying the remaining gas amount. #[derive(Debug)] pub struct Gas { milligas_amount: Option, } +/// Out of gas error. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] #[error("out of gas")] pub struct OutOfGas; -// Default gas limit per transaction, according to -// https://opentezos.com/tezos-basics/economics-and-rewards/#transaction-cost -const DEFAULT_GAS_AMOUNT: u32 = 1_040_000; +/// Default gas limit per transaction, according to +/// +pub const DEFAULT_GAS_AMOUNT: u32 = 1_040_000; impl Default for Gas { + /// Constructs [Gas] with [DEFAULT_GAS_AMOUNT] gas remaining. fn default() -> Self { Gas::new(DEFAULT_GAS_AMOUNT * 1000) } } impl Gas { + /// Construct a new [Gas] with the specified `milligas_amount` milligas + /// remaining. pub fn new(milligas_amount: u32) -> Gas { Gas { milligas_amount: Some(milligas_amount), } } + /// Try to consume the specified milligas `cost`. If not enough gas left, + /// return [OutOfGas], and mark gas as exhausted. + /// + /// # Panics + /// + /// If gas was previously exhausted. pub fn consume(&mut self, cost: u32) -> Result<(), OutOfGas> { self.milligas_amount = self.milligas().checked_sub(cost); if self.milligas_amount.is_none() { @@ -42,6 +55,11 @@ impl Gas { } } + /// Get the remaining milligas amount. + /// + /// # Panics + /// + /// If gas was previously exhausted. pub fn milligas(&self) -> u32 { self.milligas_amount .expect("Access to gas after exhaustion") @@ -102,6 +120,8 @@ impl Log2i for u64 { } } +/// Typechecking gas costs. +#[allow(missing_docs)] pub mod tc_cost { use checked::Checked; @@ -232,6 +252,7 @@ pub mod tc_cost { } } +/// Get byte size of [BigInt] or [BigUint]. pub trait BigIntByteSize { /// Minimal size in bytes a given bigint is representable in. fn byte_size(&self) -> u64; @@ -254,6 +275,8 @@ impl BigIntByteSize for BigUint { } } +/// Interpretation gas costs +#[allow(missing_docs)] pub mod interpret_cost { use checked::Checked; use num_bigint::{BigInt, BigUint}; -- GitLab From a4bea5158affb732a5223e232eb6125d254e5c55 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 16:17:59 +0300 Subject: [PATCH 11/20] MIR: documentation for the typechecker module --- contrib/mir/src/typechecker.rs | 78 ++++++++++++++++++++++- contrib/mir/src/typechecker/type_props.rs | 11 ++++ 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/contrib/mir/src/typechecker.rs b/contrib/mir/src/typechecker.rs index 71afb483b40c..fe27357fa229 100644 --- a/contrib/mir/src/typechecker.rs +++ b/contrib/mir/src/typechecker.rs @@ -5,6 +5,9 @@ /* */ /******************************************************************************/ +//! Michelson typechecker definitions. Most functions defined as associated +//! functions on [Micheline], see there for more. + use crate::ast::michelson_address::entrypoint::{check_ep_name_len, Entrypoints}; use chrono::prelude::DateTime; use num_bigint::{BigInt, BigUint, TryFromBigIntError}; @@ -36,84 +39,137 @@ use crate::{ast::*, bls}; /// Typechecker error type. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum TcError { + /// Two stacks didn't compare equal when they should have. #[error("type stacks not equal: {0:?} != {1:?}")] StacksNotEqual(TypeStack, TypeStack, StacksNotEqualReason), + /// Ran out of gas during typechecking. #[error(transparent)] OutOfGas(#[from] OutOfGas), + /// The type didn't satisfy a given [TypeProperty]. #[error("type is not {0}: {1:?}")] InvalidTypeProperty(TypeProperty, Type), + /// Encountered FAIL instruction not in tail position. #[error("FAIL instruction is not in tail position")] FailNotInTail, + /// Failed to interpret a number as a value of some type due to a numeric + /// conversion error. #[error("numeric conversion failed: {0}")] NumericConversion(#[from] TryFromBigIntError<()>), + /// Types are not equal when they should be. #[error(transparent)] TypesNotEqual(#[from] TypesNotEqual), + /// Encountered the forbidden `DUP 0` instruction. #[error("DUP 0 is forbidden")] Dup0, + /// Encountered a forbidden `PAIR 0`, `PAIR 1`, `UNPAIR 0` or `UNPAIR 1` + /// instruction. #[error("{0} {1} is forbidden")] PairN01(Prim, u16), + /// Failed typechecking the value as the given type. #[error("value {0} is invalid for type {1:?}")] InvalidValueForType(String, Type), + /// When typechecking a `map` or `big_map`, encountered a non-`Elt` element + /// in a sequence. #[error("value {0:?} is invalid element for container type {1:?}")] InvalidEltForMap(String, Type), + /// Elements of a `map`, `set` or a `big_map` were not sorted in the + /// ascending order. #[error("sequence elements must be in strictly ascending order for type {0:?}")] ElementsNotSorted(Type), + /// Duplicate keys/elements when typechecking a `map`, `set` or a `big_map`. #[error("sequence elements must contain no duplicate keys for type {0:?}")] DuplicateElements(Type), + /// The given instruction can not be used with its input stack. #[error("no matching overload for {instr} on stack {stack:?}{}", .reason.as_ref().map_or("".to_owned(), |x| format!(", reason: {}", x)))] NoMatchingOverload { + /// The instruction being typechecked. instr: Prim, + /// The offending input stack. stack: TypeStack, + /// Optional details. reason: Option, }, + /// Encountered an error when typechecking a value represented as raw bytes + /// or a base58-check string. #[error("invalid value for type {0:?}: {1}")] ByteReprError(Type, ByteReprError), + /// Failed to typecheck an annotation as an entrypoint. #[error("invalid entrypoint: {0}")] EntrypointError(ByteReprError), + /// Failed to typecheck value of type `chain_id`. #[error("invalid value for chain_id: {0}")] ChainIdError(#[from] ChainIdError), + /// Encountered a SELF instruction in a forbidden context. #[error("SELF instruction is forbidden in this context")] SelfForbidden, + /// Entrypoint not found. #[error("no such entrypoint: {0}")] NoSuchEntrypoint(Entrypoint), + /// Contract with the given address not found. #[error("no such contract")] NoSuchContract, + /// Implicit account typechecked as a `contract 'ty` where `'ty` is neither + /// `unit` nor `ticket 'a` #[error("unexpected implicit account parameter type: {0:?}")] UnexpectedImplicitAccountType(Type), - #[error("Entrypoint specified from two different sources")] + /// In `CONTRACT` instruction, entrypoint was specified both in the address + /// and as an annotation to the instruction. + #[error("entrypoint specified from two different sources")] EntrypointAmbiguity, + /// Encountered unexpected Micheline syntax. #[error("unexpected syntax: {0}")] UnexpectedMicheline(String), + /// When typechecking a complete script, encountered duplicate top-level + /// field, viz. `code`, `parameter`, or `storage`. #[error("duplicate top-level element: {0}")] DuplicateTopLevelElt(Prim), + /// When typechecking a complete script, didn't find a required top-level + /// field, viz. `code`, `parameter`, or `storage`. #[error("missing top-level element: {0}")] MissingTopLevelElt(Prim), + /// Instructions like `DUP n` and `PAIR n` accept an argument that must be a + /// natural between 0 and 1023 inclusive. Found an integer outside this + /// bounds instead. #[error("expected a natural between 0 and 1023, but got {0}")] ExpectedU10(BigInt), + /// Encountered an error when working with annotations. #[error(transparent)] AnnotationError(#[from] AnnotationError), + /// Found a duplicate entrypoint when parsing a type. #[error("duplicate entrypoint: {0}")] DuplicateEntrypoint(Entrypoint), + /// Encountered an explicit default entrypoint annotation where it is + /// forbidden, e.g. with `CONTRACT` instruction. #[error("explicit default entrypoint is forbidden in: {0}")] ExplicitDefaultEntrypointError(Prim), + /// Instruction is not yet implemented. #[error("Unhandled instruction: {0}")] TodoInstr(Prim), + /// Type is not yet implemented. #[error("Unhandled type: {0}")] TodoType(Prim), + /// `big_map` with the supplied identifier not found in the storage. #[error("big map with ID {0} not found in the lazy storage")] BigMapNotFound(BigInt), + /// An error occurred when working with `big_map` storage. #[error("lazy storage error: {0:?}")] LazyStorageError(LazyStorageError), + /// Output stack after `MAP` instruction's code block is empty. #[error("MAP block returned an empty stack")] MapBlockEmptyStack, + /// All branches of a `MAP` instruction's code block are failing. #[error("all branches of a MAP block use FAILWITH, its type cannot be inferred")] MapBlockFail, } +/// Errors happening when typechecking a value of type `chain_id`. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum ChainIdError { + /// Error happened when typechecking a (supposedly) base58-check encoded + /// string as `chain_id`. #[error("{0}")] FromBase58CheckError(String), + /// Error happened when typechecking raw bytes as `chain_id`. #[error("{0}")] FromBytesError(String), } @@ -130,32 +186,48 @@ impl From for ChainIdError { } } +/// More detailed, optional explanation for [TcError::NoMatchingOverload]. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum NoMatchingOverloadReason { + /// Input stack is too short. #[error("stack too short, expected at least {expected}")] - StackTooShort { expected: usize }, + StackTooShort { + /// Expected minimal stack size + expected: usize, + }, + /// Types don't match. #[error(transparent)] TypesNotEqual(#[from] TypesNotEqual), + /// Expected a type `pair 'a 'b` in the input stack, but did not find it. #[error("expected pair 'a 'b, but got {0:?}")] ExpectedPair(Type), + /// Expected a type `option 'a` in the input stack, but did not find it. #[error("expected option 'a, but got {0:?}")] ExpectedOption(Type), + /// Expected a type `list 'a` in the input stack, but did not find it. #[error("expected list 'a, but got {0:?}")] ExpectedList(Type), + /// Expected a type `or 'a 'b` in the input stack, but did not find it. #[error("expected or 'a 'b, but got {0:?}")] ExpectedOr(Type), + /// Expected a comparable type in the input stack, but it was not + /// comparable. #[error("type not comparable: {0:?}")] TypeNotComparable(Type), } +/// More detailed explanation for [TcError::StacksNotEqual] #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum StacksNotEqualReason { + /// The given types in the stacks do not match. #[error(transparent)] TypesNotEqual(#[from] TypesNotEqual), + /// Stack lengths differ. #[error("lengths are different: {0} != {1}")] LengthsDiffer(usize, usize), } +/// Generic type mismatch error. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] #[error("types not equal: {0:?} != {1:?}")] pub struct TypesNotEqual(Type, Type); @@ -206,6 +278,8 @@ impl<'a> Micheline<'a> { parse_ty(ctx, self) } + /// Interpreting `Micheline` as a contract parameter type, collect its + /// entrypoints into [Entrypoints]. pub fn get_entrypoints(&self, ctx: &mut Ctx) -> Result { let (entrypoints, _) = parse_parameter_ty_with_entrypoints(ctx, self)?; Ok(entrypoints) diff --git a/contrib/mir/src/typechecker/type_props.rs b/contrib/mir/src/typechecker/type_props.rs index 4160a992ea32..02004ea7cf92 100644 --- a/contrib/mir/src/typechecker/type_props.rs +++ b/contrib/mir/src/typechecker/type_props.rs @@ -5,11 +5,16 @@ /* */ /******************************************************************************/ +//! Ensure [TypeProperty] holds for a given [Type]. + use super::TcError; use crate::ast::Type; use crate::gas::{tc_cost, Gas}; +/// Type properties, as described in +/// #[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[allow(missing_docs)] pub enum TypeProperty { Comparable, Passable, @@ -36,6 +41,12 @@ impl std::fmt::Display for TypeProperty { } impl Type { + /// Ensure a given property `prop` holds for `self`. This function consumes + /// gas, hence a mutable reference to [Gas] must be provided. The function + /// traverses the type, so worst-case complexity is O(n). + /// + /// If a property doesn't hold, returns [TcError::InvalidTypeProperty]. Can + /// run out of gas, in which case it will return [TcError::OutOfGas]. pub fn ensure_prop(&self, gas: &mut Gas, prop: TypeProperty) -> Result<(), TcError> { use Type::*; gas.consume(tc_cost::TYPE_PROP_STEP)?; -- GitLab From d84f8339009bc2d3ad967cbc540f8c77490be440 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 16:21:31 +0300 Subject: [PATCH 12/20] MIR: documentation for the interpreter module --- contrib/mir/src/interpreter.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/contrib/mir/src/interpreter.rs b/contrib/mir/src/interpreter.rs index 8d12a00fc4c0..203a02934746 100644 --- a/contrib/mir/src/interpreter.rs +++ b/contrib/mir/src/interpreter.rs @@ -5,6 +5,9 @@ /* */ /******************************************************************************/ +//! Michelson interpreter definitions. Most functions are defined on +//! [Instruction] and [ContractScript], see there for more. + use checked::Checked; use cryptoxide::hashing::{blake2b_256, keccak256, sha256, sha3_256, sha512}; use num_bigint::{BigInt, BigUint, Sign}; @@ -22,23 +25,32 @@ use crate::irrefutable_match::irrefutable_match; use crate::stack::*; use crate::typechecker::{typecheck_contract_address, typecheck_value}; +/// Errors possible during interpretation. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum InterpretError<'a> { + /// Interpreter ran out of gas. #[error(transparent)] OutOfGas(#[from] OutOfGas), + /// When performing mutez arithmetic, an overflow occurred. #[error("mutez overflow")] MutezOverflow, + /// Interpreter reached a `FAILWITH` instruction. #[error("failed with: {1:?} of type {0:?}")] FailedWith(Type, TypedValue<'a>), + /// An error occurred when working with `big_map` storage. #[error("lazy storage error: {0}")] LazyStorageError(#[from] LazyStorageError), } +/// Errors possible when interpreting a full contract script. #[derive(Debug, PartialEq, Eq, thiserror::Error)] pub enum ContractInterpretError<'a> { + /// Failed to typecheck the provided input as the type expected by the + /// script. #[error("failed typechecking input: {0}")] TcError(#[from] crate::typechecker::TcError), - #[error("runtime failure while running the contract: {0}")] + /// Failed during the interpretation of the script code. + #[error("runtime failure while running the script: {0}")] InterpretError(InterpretError<'a>), } -- GitLab From 11fcfcdeff2c200ef2727811b0901b6b7d458f01 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 16:49:59 +0300 Subject: [PATCH 13/20] MIR: documentation for the context module --- contrib/mir/src/context.rs | 67 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/contrib/mir/src/context.rs b/contrib/mir/src/context.rs index 2f37882f74d4..9536b371cb07 100644 --- a/contrib/mir/src/context.rs +++ b/contrib/mir/src/context.rs @@ -1,3 +1,12 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! The "outer context" required for typechecking and interpreting Michelson. + #![allow(clippy::type_complexity)] use crate::ast::big_map::{InMemoryLazyStorage, LazyStorage}; use crate::ast::michelson_address::entrypoint::Entrypoints; @@ -8,55 +17,113 @@ use num_bigint::{BigInt, BigUint}; use std::collections::HashMap; use tezos_crypto_rs::hash::OperationListHash; +/// [Ctx] includes "outer context" required for typechecking and interpreting +/// Michelson. pub struct Ctx<'a> { + /// [Gas] counter. Defaults to [`Gas::default()`] pub gas: Gas, + /// Transfer amount that initiated this execution. Defaults to `0` pub amount: i64, + /// Contract balance. Defaults to `0`. pub balance: i64, + /// Current blockchain level. Defaults to `0`. pub level: BigUint, + /// Transfer sender, i.e. the contract that initiated the current internal + /// transaction. The result of the `SENDER` instruction. Defaults to + /// `KT1BEqzn5Wx8uJrZNvuS9DVHmLvG9td3fDLi`. pub sender: AddressHash, + /// Transfer source, i.e. the contract that initiated and signed the current + /// transaction. The result of the `SOURCE` instruction. Note that in a + /// regular blockchain, this is necessarily an implicit account. Defaults to + /// `tz1TSbthBCECxmnABv73icw7yyyvUWFLAoSP`. pub source: AddressHash, + /// Minimal block time in seconds. The result of the `MIN_BLOCK_TIME` + /// instruciton. Defaults to `1`. pub min_block_time: BigUint, + /// Identifier of the chain where the script is being executed. The result + /// of the `CHAIN_ID` instruction. Defaults to `NetXynUjJNZm7wi`. pub chain_id: tezos_crypto_rs::hash::ChainId, + /// Address of the contract being executed. The result of the `SELF_ADDRESS` + /// instruction. Defaults to `KT1BEqzn5Wx8uJrZNvuS9DVHmLvG9td3fDLi`. pub self_address: AddressHash, + /// A function that maps contract addresses to their entrypoints. It only + /// needs to work with smart contract and smart rollup addresses, as + /// implicit accounts don't really have entrypoints. For a given address, + /// the function must return either [None], meaning the contract doesn't + /// exist, or [`Some(entrypoints)`] with the map of its entrypoints. See + /// also [Self::set_known_contracts]. Defaults to returning [None] for any + /// address. pub lookup_contract: Box Option>, + /// A function that maps public key hashes (i.e. effectively implicit + /// account addresses) to their corresponding voting powers. Note that if + /// you provide a custom function here, you also must define + /// [Self::total_voting_power] to be consistent with your function! See also + /// [Self::set_voting_powers]. Defaults to returning `0` for any address. pub voting_powers: Box BigUint>, + /// The minimal injection time for the current block, as a unix timestamp + /// (in seconds). Defaults to `0`. pub now: BigInt, + /// Total voting power. Note that if you are setting this manually, you must + /// also provide a consistent implementation for [Self::voting_powers]. See + /// also [Self::set_voting_powers]. Defaults to `0`. pub total_voting_power: BigUint, + /// Hash for the current operation group. This will be used to generate + /// contract addresses for newly-created contracts (via `CREATE_CONTRACT` + /// instruction). Defaults to + /// `onvsLP3JFZia2mzZKWaFuFkWg2L5p3BDUhzh5Kr6CiDDN3rtQ1D`. pub operation_group_hash: [u8; 32], // NB: lifetime is mandatory if we want to use types implementing with // references inside for LazyStorage, and we do due to how Runtime is passed // as &mut + /// Storage for `big_map`s. By default uses [InMemoryLazyStorage], but can + /// admit a custom implementation of [LazyStorage] trait. Defaults to a new, + /// empty, [InMemoryLazyStorage]. pub big_map_storage: Box + 'a>, origination_counter: u32, operation_counter: u128, } impl Ctx<'_> { + /// Increment the internal operation counter and return it. Used as a nonce + /// for operations. pub fn operation_counter(&mut self) -> u128 { self.operation_counter += 1; self.operation_counter } + /// Forcibly set the operation counter. This is mostly useful for testing purposes. pub fn set_operation_counter(&mut self, v: u128) { self.operation_counter = v; } + /// Set a reasonable implementation for [Self::lookup_contract] by providing + /// something that can convert to [`HashMap`]. pub fn set_known_contracts(&mut self, v: impl Into>) { let map = v.into(); self.lookup_contract = Box::new(move |ah| map.get(ah).cloned()); } + /// Set a reasonable implementation for [Self::voting_powers] and a + /// consistent value for [Self::total_voting_power] by providing something + /// that converts into [`HashMap`], mapping key hashes to + /// voting powers. If a given key hash is unspecified, its voting power is + /// assumed to be `0`. [Self::total_voting_power] is set to the sum of all + /// values. pub fn set_voting_powers(&mut self, v: impl Into>) { let map: HashMap = v.into(); self.total_voting_power = map.values().sum(); self.voting_powers = Box::new(move |x| map.get(x).unwrap_or(&0u32.into()).clone()); } + /// Increment origination counter and return its new value. Used as a nonce + /// to generate unique contract addresses for the `CREATE_CONTRACT` + /// instruction. pub fn origination_counter(&mut self) -> u32 { self.origination_counter += 1; self.origination_counter } + /// Forcibly set an origination counter. Mostly useful in tests. pub fn set_origination_counter(&mut self, v: u32) { self.origination_counter = v; } -- GitLab From 42fc9cedf01eafbd3a66fec2ae658f49e170afae Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 17:12:06 +0300 Subject: [PATCH 14/20] MIR: documentation for the tzt module --- contrib/mir/src/syntax.lalrpop | 2 +- contrib/mir/src/tzt.rs | 47 +++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/contrib/mir/src/syntax.lalrpop b/contrib/mir/src/syntax.lalrpop index 669f0d4272ca..ce2c8f868131 100644 --- a/contrib/mir/src/syntax.lalrpop +++ b/contrib/mir/src/syntax.lalrpop @@ -170,4 +170,4 @@ tztEntity : TztEntity<'a> = { "other_contracts" => TztEntity::OtherContracts(<>), } -pub tztTestEntities : Vec> = semicolonSepSeq; +pub(crate) tztTestEntities : Vec> = semicolonSepSeq; diff --git a/contrib/mir/src/tzt.rs b/contrib/mir/src/tzt.rs index 4937450204dd..62dd87206bf8 100644 --- a/contrib/mir/src/tzt.rs +++ b/contrib/mir/src/tzt.rs @@ -5,6 +5,8 @@ /* */ /******************************************************************************/ +//! Definitions for the TZT runner. + mod expectation; use num_bigint::BigInt; @@ -25,16 +27,23 @@ use crate::syntax::tztTestEntitiesParser; use crate::typechecker::*; use crate::tzt::expectation::*; +/// Test's input stack represented as a [Vec] of pairs of type and typechecked +/// value. The top of the stack is the _leftmost_ element. pub type TestStack<'a> = Vec<(Type, TypedValue<'a>)>; +/// The TZT execution didn't succeed, the expectation is not fulfilled. #[derive(PartialEq, Eq, Clone, Debug)] pub enum TztTestError<'a> { + /// Expected and actual output stacks don't match. StackMismatch( (FailingTypeStack, IStack<'a>), (FailingTypeStack, IStack<'a>), ), + /// An error happened, when the test expected a success. UnexpectedError(TestError<'a>), + /// Execution completed succesfully, when the test expected an error. UnexpectedSuccess(ErrorExpectation<'a>, IStack<'a>), + /// Expected one error, but got another. ExpectedDifferentError(ErrorExpectation<'a>, TestError<'a>), } @@ -69,14 +78,24 @@ impl fmt::Display for TztTestError<'_> { /// Represent one Tzt test. #[derive(Debug, PartialEq, Eq, Clone)] pub struct TztTest<'a> { + /// Test code, the content of the `code` field. pub code: Micheline<'a>, + /// Test input, as defined by the `input` field. pub input: TestStack<'a>, + /// Expected output, as defined by the `output` field. pub output: TestExpectation<'a>, + /// Transfer amount, as defined by the `amount` field. pub amount: Option, + /// Contract balance, as defined by the `balance` field. pub balance: Option, + /// Current chain identifier, as defined by the `chain_id` field. pub chain_id: Option, + /// Self parameter entrypoints, as defined by the type in the `parameter` + /// field. pub parameter: Option, + /// Self address, as defined by the `self` field. pub self_addr: Option, + /// Other known contracts, as defined by `other_contracts` field. pub other_contracts: Option>, } @@ -123,6 +142,7 @@ fn typecheck_stack<'a>( } impl<'a> Parser<'a> { + /// Parse top-level definition of a TZT test. pub fn parse_tzt_test(&'a self, src: &'a str) -> Result> { tztTestEntitiesParser::new() .parse(&self.arena, spanned_lexer(src))? @@ -262,8 +282,10 @@ impl<'a> TryFrom>> for TztTest<'a> { /// the code in a test can fail. #[derive(Debug, PartialEq, Eq, Clone, thiserror::Error)] pub enum TestError<'a> { + /// Error happened during typechecking. #[error(transparent)] TypecheckerError(#[from] TcError), + /// Error happened during interpretation. #[error(transparent)] InterpreterError(InterpretError<'a>), } @@ -278,13 +300,18 @@ impl<'a> From> for TestError<'a> { /// the code in a test. #[derive(Debug, PartialEq, Eq, Clone)] pub enum TestExpectation<'a> { - ExpectSuccess(Vec<(Type, TypedValue<'a>)>), + /// Expecting the test code to finish with the given output stack. + ExpectSuccess(TestStack<'a>), + /// Expecting the test code to fail with the given error. ExpectError(ErrorExpectation<'a>), } +/// Expected test error. #[derive(Debug, PartialEq, Eq, Clone)] pub enum ErrorExpectation<'a> { + /// Typechecker error, with an optional string. TypecheckerError(Option), + /// Interpreter error. InterpreterError(InterpreterErrorExpectation<'a>), } @@ -299,10 +326,15 @@ impl fmt::Display for ErrorExpectation<'_> { } } +/// Interpreter errors we can expect. #[derive(Debug, PartialEq, Eq, Clone)] pub enum InterpreterErrorExpectation<'a> { + /// GeneralOverflow error, which can happen with bit-shift arithmetic. GeneralOverflow(BigInt, BigInt), + /// MutezOverflow error, which can happen with mutez arithmetic. MutezOverflow(i64, i64), + /// FailedWith error, which happens when execution reaches `FAILWITH` + /// instruction. FailedWith(Micheline<'a>), } @@ -319,7 +351,7 @@ impl fmt::Display for InterpreterErrorExpectation<'_> { /// Helper type for use during parsing, represent a single /// line from the test file. -pub enum TztEntity<'a> { +pub(crate) enum TztEntity<'a> { Code(Micheline<'a>), Input(Vec<(Micheline<'a>, Micheline<'a>)>), Output(TztOutput<'a>), @@ -331,9 +363,12 @@ pub enum TztEntity<'a> { OtherContracts(Vec<(Micheline<'a>, Micheline<'a>)>), } -/// Possible values for the "output" expectation field in a Tzt test -pub enum TztOutput<'a> { +/// Possible values for the "output" expectation field in a Tzt test. This is a +/// [Micheline] ("untyped") version of [TestExpectation]. +pub(crate) enum TztOutput<'a> { + /// Expecting the test code to finish with the given output stack. TztSuccess(Vec<(Micheline<'a>, Micheline<'a>)>), + /// Expecting the test code to fail with the given error. TztError(ErrorExpectation<'a>), } @@ -364,6 +399,10 @@ fn execute_tzt_test_code<'a>( Ok((t_stack, i_stack)) } +/// Run a [TztTest]. If the test is succesful, the result is `Ok(())`. +/// Otherwise, it returns [TztTestError]. An [Arena] must be supplied, it will +/// be used for storing the results of `UNPACK`, which may end up as part of the +/// error. pub fn run_tzt_test<'a>( test: TztTest<'a>, arena: &'a Arena>, -- GitLab From 817a320c2cb3c2c3aea2773157fd15d392f2a5a9 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 17:22:33 +0300 Subject: [PATCH 15/20] MIR: List currently unsupported instructions in docs --- contrib/mir/src/lib.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/contrib/mir/src/lib.rs b/contrib/mir/src/lib.rs index e38fbb779377..baf7f33f5ea5 100644 --- a/contrib/mir/src/lib.rs +++ b/contrib/mir/src/lib.rs @@ -12,6 +12,29 @@ //! //! Rust implementation of the typechecker and interpreter for the Michelson //! smart contract language. +//! +//! The library is currently incomplete. The following instructions are not +//! supported: +//! +//! - `ADD: timestamp : int` +//! - `ADD: int : timestamp` +//! - `SUB` +//! - `EDIV` +//! - `LSL` +//! - `LSR` +//! - `EMPTY_MAP` +//! - `SAPLING_EMPTY_STATE` +//! - `SAPLING_VERIFY_UPDATE` +//! - `OPEN_CHEST` +//! - `VIEW` +//! +//! The following types are currently not supported: +//! +//! - `chest` +//! - `chest_key` +//! - `tx_rollup_l2_address` +//! - `sapling_state` +//! - `sapling_transaction` pub mod ast; pub mod bls; -- GitLab From be2c966bbe46691d7edbc824b1320d556e37848c Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 18:09:03 +0300 Subject: [PATCH 16/20] MIR: move Annotation type to ast::annotations module --- contrib/mir/src/ast.rs | 7 ++--- contrib/mir/src/ast/annotations.rs | 41 +++++++++++++++++++++++++- contrib/mir/src/gas.rs | 2 +- contrib/mir/src/lexer.rs | 43 ++-------------------------- contrib/mir/src/parser.rs | 4 +-- contrib/mir/src/serializer/decode.rs | 4 +-- contrib/mir/src/serializer/encode.rs | 11 ++++--- contrib/mir/src/syntax.lalrpop | 2 +- 8 files changed, 55 insertions(+), 59 deletions(-) diff --git a/contrib/mir/src/ast.rs b/contrib/mir/src/ast.rs index 916e80bf6b9b..8d4b7faff7be 100644 --- a/contrib/mir/src/ast.rs +++ b/contrib/mir/src/ast.rs @@ -34,12 +34,9 @@ use std::{ pub use tezos_crypto_rs::hash::ChainId; use typed_arena::Arena; -use crate::{ - bls, - lexer::{Annotation, Prim}, -}; +use crate::{bls, lexer::Prim}; -pub use annotations::{FieldAnnotation, NO_ANNS}; +pub use annotations::{Annotation, Annotations, FieldAnnotation, NO_ANNS}; pub use big_map::BigMap; pub use byte_repr_trait::{ByteReprError, ByteReprTrait}; pub use micheline::IntoMicheline; diff --git a/contrib/mir/src/ast/annotations.rs b/contrib/mir/src/ast/annotations.rs index 1f97beb48ba2..3df8a1f2e0c2 100644 --- a/contrib/mir/src/ast/annotations.rs +++ b/contrib/mir/src/ast/annotations.rs @@ -10,7 +10,46 @@ use std::borrow::Cow; -use crate::lexer::Annotation; +/// A single Micheline annotation. Annotations are optionally-owned, meaning +/// they should use references when feasible, but can use owned heap-allocated +/// values when necessary. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Annotation<'a> { + /// Special annotation, i.e. `@%`, `@%%` or `%@` verbatim. + Special(Cow<'a, str>), + /// Field annotation, e.g. `%foo`. The inner value does not contain the + /// leading `%`. + Field(Cow<'a, str>), + /// Variable annotation, e.g. `@foo`. The inner value does not contain the + /// leading `@`. + Variable(Cow<'a, str>), + /// Type annotation, e.g. `:foo`. The inner value does not contain the + /// leading `:`. + Type(Cow<'a, str>), +} + +impl Annotation<'_> { + /// Convert the inner value of [Annotation] to an owned [String]. + pub fn into_owned(self) -> Annotation<'static> { + match self { + Annotation::Special(s) => Annotation::Special(Cow::Owned(s.into_owned())), + Annotation::Field(s) => Annotation::Field(Cow::Owned(s.into_owned())), + Annotation::Variable(s) => Annotation::Variable(Cow::Owned(s.into_owned())), + Annotation::Type(s) => Annotation::Type(Cow::Owned(s.into_owned())), + } + } +} + +impl std::fmt::Display for Annotation<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Annotation::Special(s) => write!(f, "{s}"), + Annotation::Field(s) => write!(f, "%{s}"), + Annotation::Variable(s) => write!(f, "@{s}"), + Annotation::Type(s) => write!(f, ":{s}"), + } + } +} /// A structure holding all Tezos annotations on a [Micheline][crate::ast::Micheline] node. #[derive(Clone, Eq, PartialEq)] diff --git a/contrib/mir/src/gas.rs b/contrib/mir/src/gas.rs index 50ce8e5ef528..6d240d3b52eb 100644 --- a/contrib/mir/src/gas.rs +++ b/contrib/mir/src/gas.rs @@ -753,7 +753,7 @@ pub mod interpret_cost { } for annot in annots { // Annotations are accounted as simple string literals - use crate::lexer::Annotation as Ann; + use crate::ast::Annotation as Ann; size.str_byte += match annot { // Including annotation prefix into the size too Ann::Field(a) => a.len() + 1, diff --git a/contrib/mir/src/lexer.rs b/contrib/mir/src/lexer.rs index 68412ae5b3de..ffda8ded7cc3 100644 --- a/contrib/mir/src/lexer.rs +++ b/contrib/mir/src/lexer.rs @@ -20,6 +20,8 @@ use macros::*; use num_bigint::BigInt; use strum_macros::EnumCount; +use crate::ast::Annotation; + /// Expand to the first argument if not empty; otherwise, the second argument. macro_rules! coalesce { (, $r:expr) => { @@ -155,47 +157,6 @@ pub enum Noun { MacroPrim(Macro), } -/// A single Micheline annotation. Annotations are optionally-owned, meaning -/// they should use references when feasible, but can use owned heap-allocated -/// values when necessary. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Annotation<'a> { - /// Special annotation, i.e. `@%`, `@%%` or `%@` verbatim. - Special(Cow<'a, str>), - /// Field annotation, e.g. `%foo`. The inner value does not contain the - /// leading `%`. - Field(Cow<'a, str>), - /// Variable annotation, e.g. `@foo`. The inner value does not contain the - /// leading `@`. - Variable(Cow<'a, str>), - /// Type annotation, e.g. `:foo`. The inner value does not contain the - /// leading `:`. - Type(Cow<'a, str>), -} - -impl Annotation<'_> { - /// Convert the inner value of [Annotation] to an owned [String]. - pub fn into_owned(self) -> Annotation<'static> { - match self { - Annotation::Special(s) => Annotation::Special(Cow::Owned(s.into_owned())), - Annotation::Field(s) => Annotation::Field(Cow::Owned(s.into_owned())), - Annotation::Variable(s) => Annotation::Variable(Cow::Owned(s.into_owned())), - Annotation::Type(s) => Annotation::Type(Cow::Owned(s.into_owned())), - } - } -} - -impl std::fmt::Display for Annotation<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Annotation::Special(s) => write!(f, "{s}"), - Annotation::Field(s) => write!(f, "%{s}"), - Annotation::Variable(s) => write!(f, "@{s}"), - Annotation::Type(s) => write!(f, ":{s}"), - } - } -} - pub(crate) fn try_ann_from_str(value: &str) -> Option { match value { s @ ("@%" | "@%%" | "%@") => Some(Annotation::Special(Cow::Borrowed(s))), diff --git a/contrib/mir/src/parser.rs b/contrib/mir/src/parser.rs index 5e5c583949d6..b419ea3a55d7 100644 --- a/contrib/mir/src/parser.rs +++ b/contrib/mir/src/parser.rs @@ -99,8 +99,8 @@ pub mod test_helpers { mod tests { use super::test_helpers::*; use crate::ast::micheline::test_helpers::{app, seq}; - use crate::ast::Micheline; - use crate::lexer::{Annotation, Prim}; + use crate::ast::{Annotation, Micheline}; + use crate::lexer::Prim; #[test] fn instructions() { diff --git a/contrib/mir/src/serializer/decode.rs b/contrib/mir/src/serializer/decode.rs index 986d5f69447b..72fe2b369fca 100644 --- a/contrib/mir/src/serializer/decode.rs +++ b/contrib/mir/src/serializer/decode.rs @@ -18,9 +18,9 @@ use typed_arena::Arena; use crate::{ ast::{ annotations::{Annotations, NO_ANNS}, - Micheline, + Annotation, Micheline, }, - lexer::{try_ann_from_str, Annotation, Prim}, + lexer::{try_ann_from_str, Prim}, }; /// Errors that can happen during deserialization. diff --git a/contrib/mir/src/serializer/encode.rs b/contrib/mir/src/serializer/encode.rs index e4cc3faad805..b80adc0712c7 100644 --- a/contrib/mir/src/serializer/encode.rs +++ b/contrib/mir/src/serializer/encode.rs @@ -12,8 +12,8 @@ use tezos_data_encoding::{enc::BinWriter, types::Zarith}; use super::constants::*; use crate::{ - ast::{annotations::Annotations, Micheline}, - lexer::{Annotation, Prim}, + ast::{Annotation, Annotations, Micheline}, + lexer::Prim, }; trait AppEncoder<'a>: IntoIterator> + Sized { @@ -78,7 +78,7 @@ impl Annotation<'_> { /// character verbatim to the output, so, for example, /// /// ``` - /// use mir::lexer::Annotation; + /// use mir::ast::Annotation; /// let mut out = vec![]; /// Annotation::Field("field".into()).encode_bytes(&mut out); /// assert_eq!(&out, b"%field"); @@ -87,7 +87,7 @@ impl Annotation<'_> { /// Note that [Annotation::Special] are written to the output verbatim: /// /// ``` - /// use mir::lexer::Annotation; + /// use mir::ast::Annotation; /// let mut out = vec![]; /// Annotation::Special("@%".into()).encode_bytes(&mut out); /// assert_eq!(&out, b"@%"); @@ -118,8 +118,7 @@ impl Annotations<'_> { /// the output, separated by a space character `0x20`. So, for example, /// /// ``` - /// use mir::ast::annotations::Annotations; - /// use mir::lexer::Annotation; + /// use mir::ast::{Annotations, Annotation}; /// let mut out = vec![]; /// Annotations::from([ /// Annotation::Field("field".into()), diff --git a/contrib/mir/src/syntax.lalrpop b/contrib/mir/src/syntax.lalrpop index ce2c8f868131..53a7d9001acc 100644 --- a/contrib/mir/src/syntax.lalrpop +++ b/contrib/mir/src/syntax.lalrpop @@ -13,7 +13,7 @@ use crate::ast::*; use crate::ast::annotations::*; use crate::parser::ParserError; use crate::parser::macros::expand_macro; -use crate::lexer::{LexerError, Annotation, Prim, Noun, TztPrim as TzP, Tok}; +use crate::lexer::{LexerError, Prim, Noun, TztPrim as TzP, Tok}; use crate::lexer::macros::{MacroArgs, Macro}; use crate::typechecker as TC; use crate::tzt::*; -- GitLab From 2a1f12faedd937f65777ee2e1cc5af71ef0a787a Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 19:49:37 +0300 Subject: [PATCH 17/20] MIR: add an example to lib docs --- contrib/mir/src/lib.rs | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/contrib/mir/src/lib.rs b/contrib/mir/src/lib.rs index baf7f33f5ea5..e228ac8a0db6 100644 --- a/contrib/mir/src/lib.rs +++ b/contrib/mir/src/lib.rs @@ -35,6 +35,105 @@ //! - `tx_rollup_l2_address` //! - `sapling_state` //! - `sapling_transaction` +//! +//! # Usage +//! +//! The general pipeline is as follows: parse → typecheck → interpret → +//! serialize result. +//! +//! There are essentially two parsers available, one, in [parser::Parser], which +//! can be used to parse Michelson source code from strings. Another one is +//! implemented as [ast::Micheline::decode_raw], which can be used to +//! deserialize Michelson from bytes. +//! +//! Whether parsed from string or bytes, the result of a parse is +//! [ast::Micheline]. Since Micheline can represent any part of a Michelson +//! script, several associated functions exist for typechecking: +//! +//! - [ast::Micheline::typecheck_value] can be used to typecheck a Michelson +//! value, e.g. `1` or `Some "string"`. +//! - [ast::Micheline::typecheck_instruction] can be used to typecheck a Michelson +//! instruction or a sequence of instructions. +//! - [ast::Micheline::typecheck_script] can be used to typecheck a full +//! Michelson script, i.e. something that defines `parameter`, `storage` and +//! `code` fields. +//! +//! Any of these functions requires a reference to the external context, +//! [context::Ctx]. Context keeps track of the used gas, and also carries +//! information about the world outside of the interpreter. You can construct a +//! context with reasonable defaults using [`context::Ctx::default()`]. After that, you +//! may want to adjust some things. Refer to [context::Ctx] documentation. +//! +//! Once `Micheline` is typechecked, it will result in either [ast::TypedValue], +//! [ast::Instruction], or [ast::ContractScript]. The latter two have +//! [ast::Instruction::interpret] and [ast::ContractScript::interpret] +//! associated functions that serve as main entry-points for the interpreter. +//! +//! The result of interpretation is either a [ast::TypedValue] or a stack of +//! them. [ast::IntoMicheline::into_micheline_optimized_legacy] can be used to +//! convert [ast::TypedValue] into [ast::Micheline], at which point, +//! [ast::Micheline::encode] can be employed to serialize the data. +//! +//! Some functions require access to a [typed_arena::Arena]. [parser::Parser] +//! already has one, so that one can be reused. If memory consumption is a +//! concern, and depending on the workload, it may be slightly more economical +//! to create a new `Arena` for different stages. +//! +//! Here's a simple example, running a Fibonacci contract: +//! +//! ``` +//! use mir::ast::*; +//! use mir::context::Ctx; +//! use mir::parser::Parser; +//! use typed_arena::Arena; +//! let script = r#" +//! parameter nat; +//! storage int; +//! code { CAR ; INT ; PUSH int 0 ; DUP 2 ; GT ; +//! IF { DIP { PUSH int -1 ; ADD } ; +//! PUSH int 1 ; +//! DUP 3 ; +//! GT ; +//! LOOP { SWAP ; DUP 2 ; ADD ; DIP 2 { PUSH int -1 ; ADD } ; DUP 3 ; GT } ; +//! DIP { DROP 2 } } +//! { DIP { DROP } }; +//! NIL operation; +//! PAIR } +//! "#; +//! let parser = Parser::new(); +//! let contract_micheline = parser.parse_top_level(script).unwrap(); +//! let mut ctx = Ctx::default(); +//! // You can change various things about the context here, see [Ctx] +//! // documentation. +//! let contract_typechecked = contract_micheline.typecheck_script(&mut ctx).unwrap(); +//! // We construct parameter and storage manually, but you'd probably +//! // parse or deserialize them from some sort of input/storage, so we use +//! // parser and decoder respectively. +//! // Note that you can opt to use a new parser and/or a new arena for +//! // parameter and storage. However, they _must_ outlive `ctx`. +//! let parameter = parser.parse("123").unwrap(); +//! let storage = Micheline::decode_raw(&parser.arena, &[0x00, 0x00]).unwrap(); // integer 0 +//! // Note: the arena passed in here _must_ outlive `ctx`. We reuse the one +//! // from `parser` for simplicity, you may also opt to create a new one to +//! // potentially save a bit of memory (depends on the workload). +//! let (operations_iter, new_storage) = contract_typechecked +//! .interpret(&mut ctx, &parser.arena, parameter, storage) +//! .unwrap(); +//! let TypedValue::Int(new_storage_int) = &new_storage else { unreachable!() }; +//! assert_eq!(new_storage_int, &22698374052006863956975682u128.into()); +//! assert_eq!(operations_iter.collect::>(), vec![]); +//! // Arena passed in here does not need to outlive `ctx`. Could reuse the one +//! // from `parser` again, but we create a new one to mix things up. If you're +//! // not concerned about memory consumption, it may be faster to reuse the +//! // same arena everywhere. +//! let packed_new_storage = new_storage +//! .into_micheline_optimized_legacy(&Arena::new()) +//! .encode(); +//! assert_eq!( +//! packed_new_storage, +//! vec![0x00, 0x82, 0x81, 0x8d, 0xe6, 0xdf, 0x96, 0x8c, 0xad, 0xa5, 0xc5, 0xb4, 0xac, 0x02] +//! ); +//! ``` pub mod ast; pub mod bls; -- GitLab From 8c888ed9b77a2eacfc4045ae8937a63ee84e92cd Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Wed, 27 Dec 2023 22:43:39 +0300 Subject: [PATCH 18/20] MIR: add a simple example --- contrib/mir/examples/thread_local_storage.rs | 54 ++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 contrib/mir/examples/thread_local_storage.rs diff --git a/contrib/mir/examples/thread_local_storage.rs b/contrib/mir/examples/thread_local_storage.rs new file mode 100644 index 000000000000..ba51bf598e33 --- /dev/null +++ b/contrib/mir/examples/thread_local_storage.rs @@ -0,0 +1,54 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Emulate persistent storage using `thread_local!`. + +use mir::ast::*; +use mir::context::Ctx; +use mir::parser::Parser; +use std::cell::RefCell; + +use typed_arena::Arena; + +/// A simple counter contract +static SCRIPT: &str = r#" + parameter unit; + storage nat; + code { CDR; PUSH nat 1; ADD; NIL operation; PAIR } + "#; + +fn run_contract(parameter: Micheline) { + thread_local! { + // you could define a global static, too, but you would use RwLock + // instead of RefCell then. + static STORAGE: RefCell> = RefCell::new(vec![0, 0]); + } + let parser = Parser::new(); + let contract_micheline = parser.parse_top_level(SCRIPT).unwrap(); + let mut ctx = Ctx::default(); + let contract_typechecked = contract_micheline.typecheck_script(&mut ctx).unwrap(); + STORAGE.with(|storage| { + storage.replace_with(|storage| { + let storage = Micheline::decode_raw(&parser.arena, storage).unwrap(); + let (_, new_storage) = contract_typechecked + .interpret(&mut ctx, &parser.arena, parameter, storage) + .unwrap(); + let TypedValue::Nat(storage_nat) = &new_storage else { unreachable!() }; + println!("{storage_nat}"); + new_storage + .into_micheline_optimized_legacy(&Arena::new()) + .encode() + }); + }); +} + +fn main() { + run_contract(().into()); // prints "1" + run_contract(().into()); // prints "2" + run_contract(().into()); // prints "3" + run_contract(().into()); // prints "4" +} -- GitLab From 0fea58f4150ecb94e8dae8c179a7a34a0bffae50 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Fri, 29 Dec 2023 16:18:15 +0300 Subject: [PATCH 19/20] MIR: add an example of lazily parsing the script --- contrib/mir/examples/lazy_parse.rs | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 contrib/mir/examples/lazy_parse.rs diff --git a/contrib/mir/examples/lazy_parse.rs b/contrib/mir/examples/lazy_parse.rs new file mode 100644 index 000000000000..f294cbdc7816 --- /dev/null +++ b/contrib/mir/examples/lazy_parse.rs @@ -0,0 +1,61 @@ +/******************************************************************************/ +/* */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) [2023] Serokell */ +/* */ +/******************************************************************************/ + +//! Only parse the script once, save a few cycles on subsequent runs. + +use mir::ast::*; +use mir::context::Ctx; +use mir::parser::Parser; +use std::sync::OnceLock; + +/// A simple contract that sums parameter and storage +static SCRIPT: &str = r#" + parameter nat; + storage nat; + code { UNPAIR; ADD; NIL operation; PAIR } + "#; + +static CONTRACT: OnceLock> = OnceLock::new(); + +/// Lazily parse the contract into [Micheline]. This unavoidably leaks a +/// [Parser] on initialization, but since it's supposed to be static, that +/// [Parser] would have to live for the duration of the program anyway, and it +/// only happens once, so memory-wise this shouldn't make a difference. +fn contract() -> &'static Micheline<'static> { + CONTRACT.get_or_init(|| { + let parser = Box::leak(Box::new(Parser::new())); + parser.parse_top_level(SCRIPT).unwrap() + }) +} + +/// We pass storage as a parameter, generally it would be stored somewhere. +fn run_contract(parameter: Micheline, storage: Micheline) { + let parser = Parser::new(); + // The contract is only lazily parsed once. + let contract_micheline = contract(); + let mut ctx = Ctx::default(); + let contract_typechecked = contract_micheline.typecheck_script(&mut ctx).unwrap(); + + let (_, new_storage) = contract_typechecked + .interpret(&mut ctx, &parser.arena, parameter, storage) + .unwrap(); + let TypedValue::Nat(storage_nat) = &new_storage else { unreachable!() }; + println!("{storage_nat}"); +} + +fn time(f: impl Fn()) { + let time = std::time::Instant::now(); + f(); + dbg!(time.elapsed()); +} + +fn main() { + time(|| run_contract(1.into(), 2.into())); // prints "3", takes notably longer then subsequent runs. + time(|| run_contract(2.into(), 3.into())); // prints "5" + time(|| run_contract(3.into(), 4.into())); // prints "7" + time(|| run_contract(4.into(), 5.into())); // prints "9" +} -- GitLab From 934ad7620e556e2cf54cbc648afacc3415dd0ccd Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sat, 30 Dec 2023 20:04:58 +0300 Subject: [PATCH 20/20] MIR: add information about examples to README and library docs --- contrib/mir/README.md | 15 +++++++++++++++ contrib/mir/src/lib.rs | 3 +++ 2 files changed, 18 insertions(+) diff --git a/contrib/mir/README.md b/contrib/mir/README.md index 5e61a72d9853..1d33d43f5831 100644 --- a/contrib/mir/README.md +++ b/contrib/mir/README.md @@ -25,3 +25,18 @@ You can run the included tests by the following command. Some tests print gas consumption information (in addition to testing it), but `cargo test` omits output from successful tests by default. To see it, run `cargo test -- --show-output` + +#### Running examples + +The repository includes some simple examples in the `examples/` directory. To +run them, you can use + +`cargo run --example example_name` + +Add the `--release` flag to build with optimization. + +For example: + +`cargo run --example lazy_parse --release` + +Note examples are automatically built (but not run) by `cargo test`. diff --git a/contrib/mir/src/lib.rs b/contrib/mir/src/lib.rs index e228ac8a0db6..7aea26299cb3 100644 --- a/contrib/mir/src/lib.rs +++ b/contrib/mir/src/lib.rs @@ -134,6 +134,9 @@ //! vec![0x00, 0x82, 0x81, 0x8d, 0xe6, 0xdf, 0x96, 0x8c, 0xad, 0xa5, 0xc5, 0xb4, 0xac, 0x02] //! ); //! ``` +//! +//! You can find more examples in +//! pub mod ast; pub mod bls; -- GitLab