From 8373ce0284f59a4cbe830d739a2d5c1dbc0c355d Mon Sep 17 00:00:00 2001
From: Yann Regis-Gianas <yann@nomadic-labs.com>
Date: Thu, 23 Feb 2023 17:12:21 +0100
Subject: [PATCH 1/6] Proto/Michelson: Allow the elaborator to use the unparser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be able to normalize the code embedded in each lambda value, the
elaborator needs to reuses the unparser.

The unparser was already indirectly dependent on the elaborator,
through a module functor.

Hence, this change introduces an (indirect) mutual dependency between
the elaborator and the unparser.

This is achieved thanks to a stratified definition of two functionals
finally applied together on the elaborator side.

Signed-off-by: Yann Regis-Gianas <yann@nomadic-labs.com>
Co-Authored-by: Raphaël Cauderlier <raphael.cauderlier@nomadic-labs.com>
---
 .../lib_protocol/script_ir_translator.ml      | 125 +++++++++++++++---
 .../lib_protocol/script_ir_unparser.ml        |  10 +-
 .../lib_protocol/script_ir_unparser.mli       |  29 +++-
 3 files changed, 143 insertions(+), 21 deletions(-)

diff --git a/src/proto_alpha/lib_protocol/script_ir_translator.ml b/src/proto_alpha/lib_protocol/script_ir_translator.ml
index f71eda44dd51..41eee00ad96f 100644
--- a/src/proto_alpha/lib_protocol/script_ir_translator.ml
+++ b/src/proto_alpha/lib_protocol/script_ir_translator.ml
@@ -1882,6 +1882,7 @@ let parse_toplevel :
 
 let rec parse_data :
     type a ac.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     stack_depth:int ->
     context ->
@@ -1889,13 +1890,14 @@ let rec parse_data :
     (a, ac) ty ->
     Script.node ->
     (a * context) tzresult Lwt.t =
- fun ~elab_conf ~stack_depth ctxt ~allow_forged ty script_data ->
+ fun ~unparse_code_rec ~elab_conf ~stack_depth ctxt ~allow_forged ty script_data ->
   Gas.consume ctxt Typecheck_costs.parse_data_cycle >>?= fun ctxt ->
   let non_terminal_recursion ctxt ty script_data =
     if Compare.Int.(stack_depth > 10_000) then
       tzfail Typechecking_too_many_recursive_calls
     else
       parse_data
+        ~unparse_code_rec
         ~elab_conf
         ~stack_depth:(stack_depth + 1)
         ctxt
@@ -2071,6 +2073,7 @@ let rec parse_data :
   | Lambda_t (ta, tr, _ty_name), (Seq (_loc, _) as script_instr) ->
       traced
       @@ parse_kdescr
+           ~unparse_code_rec
            Tc_context.data
            ~elab_conf
            ~stack_depth:(stack_depth + 1)
@@ -2084,6 +2087,7 @@ let rec parse_data :
       traced
       @@ ( lambda_t loc ta tr >>?= fun lambda_rec_ty ->
            parse_lam_rec
+             ~unparse_code_rec
              Tc_context.(add_lambda data)
              ~elab_conf
              ~stack_depth:(stack_depth + 1)
@@ -2325,12 +2329,17 @@ let rec parse_data :
 
 and parse_view :
     type storage storagec.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     context ->
     (storage, storagec) ty ->
     view ->
     (storage typed_view * context) tzresult Lwt.t =
- fun ~elab_conf ctxt storage_type {input_ty; output_ty; view_code} ->
+ fun ~unparse_code_rec
+     ~elab_conf
+     ctxt
+     storage_type
+     {input_ty; output_ty; view_code} ->
   let legacy = elab_conf.legacy in
   let input_ty_loc = location input_ty in
   record_trace_eval
@@ -2348,6 +2357,7 @@ and parse_view :
   >>?= fun (Ex_ty output_ty, ctxt) ->
   pair_t input_ty_loc input_ty storage_type >>?= fun (Ty_ex_c pair_ty) ->
   parse_instr
+    ~unparse_code_rec
     ~elab_conf
     ~stack_depth:0
     Tc_context.view
@@ -2388,22 +2398,25 @@ and parse_view :
 
 and parse_views :
     type storage storagec.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     context ->
     (storage, storagec) ty ->
     view_map ->
     (storage typed_view_map * context) tzresult Lwt.t =
- fun ~elab_conf ctxt storage_type views ->
+ fun ~unparse_code_rec ~elab_conf ctxt storage_type views ->
   let aux ctxt name cur_view =
     Gas.consume
       ctxt
       (Michelson_v1_gas.Cost_of.Interpreter.view_update name views)
-    >>?= fun ctxt -> parse_view ~elab_conf ctxt storage_type cur_view
+    >>?= fun ctxt ->
+    parse_view ~unparse_code_rec ~elab_conf ctxt storage_type cur_view
   in
   Script_map.map_es_in_context aux ctxt views
 
 and parse_kdescr :
     type arg argc ret retc.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     stack_depth:int ->
     tc_context ->
@@ -2412,8 +2425,16 @@ and parse_kdescr :
     (ret, retc) ty ->
     Script.node ->
     ((arg, end_of_stack, ret, end_of_stack) kdescr * context) tzresult Lwt.t =
- fun ~elab_conf ~stack_depth tc_context ctxt arg ret script_instr ->
+ fun ~unparse_code_rec
+     ~elab_conf
+     ~stack_depth
+     tc_context
+     ctxt
+     arg
+     ret
+     script_instr ->
   parse_instr
+    ~unparse_code_rec
     ~elab_conf
     tc_context
     ctxt
@@ -2446,6 +2467,7 @@ and parse_kdescr :
 
 and parse_lam_rec :
     type arg argc ret retc.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     stack_depth:int ->
     tc_context ->
@@ -2455,8 +2477,17 @@ and parse_lam_rec :
     ((arg, ret) lambda, _) ty ->
     Script.node ->
     ((arg, ret) lambda * context) tzresult Lwt.t =
- fun ~elab_conf ~stack_depth tc_context ctxt arg ret lambda_rec_ty script_instr ->
+ fun ~unparse_code_rec
+     ~elab_conf
+     ~stack_depth
+     tc_context
+     ctxt
+     arg
+     ret
+     lambda_rec_ty
+     script_instr ->
   parse_instr
+    ~unparse_code_rec
     ~elab_conf
     tc_context
     ctxt
@@ -2488,6 +2519,7 @@ and parse_lam_rec :
 
 and parse_instr :
     type a s.
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     stack_depth:int ->
     tc_context ->
@@ -2495,7 +2527,13 @@ and parse_instr :
     Script.node ->
     (a, s) stack_ty ->
     ((a, s) judgement * context) tzresult Lwt.t =
- fun ~elab_conf ~stack_depth tc_context ctxt script_instr stack_ty ->
+ fun ~unparse_code_rec
+     ~elab_conf
+     ~stack_depth
+     tc_context
+     ctxt
+     script_instr
+     stack_ty ->
   let for_logging_only x =
     if elab_conf.keep_extra_types_for_interpreter_logging then Some x else None
   in
@@ -2534,6 +2572,7 @@ and parse_instr :
       tzfail Typechecking_too_many_recursive_calls
     else
       parse_instr
+        ~unparse_code_rec
         ~elab_conf
         tc_context
         ctxt
@@ -2668,6 +2707,7 @@ and parse_instr :
       parse_packable_ty ctxt ~stack_depth:(stack_depth + 1) ~legacy t
       >>?= fun (Ex_ty t, ctxt) ->
       parse_data
+        ~unparse_code_rec
         ~elab_conf
         ~stack_depth:(stack_depth + 1)
         ctxt
@@ -3365,6 +3405,7 @@ and parse_instr :
       check_kind [Seq_kind] code >>?= fun () ->
       check_var_annot loc annot >>?= fun () ->
       parse_kdescr
+        ~unparse_code_rec
         (Tc_context.add_lambda tc_context)
         ~elab_conf
         ~stack_depth:(stack_depth + 1)
@@ -3387,6 +3428,7 @@ and parse_instr :
       check_var_annot loc annot >>?= fun () ->
       lambda_t loc arg ret >>?= fun lambda_rec_ty ->
       parse_lam_rec
+        ~unparse_code_rec
         Tc_context.(add_lambda tc_context)
         ~elab_conf
         ~stack_depth:(stack_depth + 1)
@@ -3982,6 +4024,7 @@ and parse_instr :
       trace
         (Ill_typed_contract (canonical_code, []))
         (parse_kdescr
+           ~unparse_code_rec
            (Tc_context.toplevel ~storage_type ~param_type:arg_type ~entrypoints)
            ctxt
            ~elab_conf
@@ -3991,7 +4034,9 @@ and parse_instr :
            code_field)
       >>=? function
       | {kbef = Item_t (arg, Bot_t); kaft = Item_t (ret, Bot_t); _}, ctxt ->
-          let views_result = parse_views ctxt ~elab_conf storage_type views in
+          let views_result =
+            parse_views ~unparse_code_rec ctxt ~elab_conf storage_type views
+          in
           trace (Ill_typed_contract (canonical_code, [])) views_result
           >>=? fun (_typed_views, ctxt) ->
           (let error_details = Informative loc in
@@ -4726,11 +4771,12 @@ let code_size ctxt code views =
   >|? fun ctxt -> (code_size, ctxt)
 
 let parse_code :
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     context ->
     code:lazy_expr ->
     (ex_code * context) tzresult Lwt.t =
- fun ~elab_conf ctxt ~code ->
+ fun ~unparse_code_rec ~elab_conf ctxt ~code ->
   Script.force_decode_in_context
     ~consume_deserialization_gas:When_needed
     ctxt
@@ -4757,6 +4803,7 @@ let parse_code :
   trace
     (Ill_typed_contract (code, []))
     (parse_kdescr
+       ~unparse_code_rec
        Tc_context.(toplevel ~storage_type ~param_type:arg_type ~entrypoints)
        ~elab_conf
        ctxt
@@ -4774,13 +4821,14 @@ let parse_code :
           ctxt ) )
 
 let parse_storage :
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     context ->
     allow_forged:bool ->
     ('storage, _) ty ->
     storage:lazy_expr ->
     ('storage * context) tzresult Lwt.t =
- fun ~elab_conf ctxt ~allow_forged storage_type ~storage ->
+ fun ~unparse_code_rec ~elab_conf ctxt ~allow_forged storage_type ~storage ->
   Script.force_decode_in_context
     ~consume_deserialization_gas:When_needed
     ctxt
@@ -4791,6 +4839,7 @@ let parse_storage :
       let storage_type = serialize_ty_for_error storage_type in
       Ill_typed_data (None, storage, storage_type))
     (parse_data
+       ~unparse_code_rec
        ~elab_conf
        ~stack_depth:0
        ctxt
@@ -4799,18 +4848,20 @@ let parse_storage :
        (root storage))
 
 let parse_script :
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     elab_conf:elab_conf ->
     context ->
     allow_forged_in_storage:bool ->
     Script.t ->
     (ex_script * context) tzresult Lwt.t =
- fun ~elab_conf ctxt ~allow_forged_in_storage {code; storage} ->
-  parse_code ~elab_conf ctxt ~code
+ fun ~unparse_code_rec ~elab_conf ctxt ~allow_forged_in_storage {code; storage} ->
+  parse_code ~unparse_code_rec ~elab_conf ctxt ~code
   >>=? fun ( Ex_code
                (Code
                  {code; arg_type; storage_type; views; entrypoints; code_size}),
              ctxt ) ->
   parse_storage
+    ~unparse_code_rec
     ~elab_conf
     ctxt
     ~allow_forged:allow_forged_in_storage
@@ -4834,12 +4885,13 @@ type typechecked_code_internal =
       -> typechecked_code_internal
 
 let typecheck_code :
+    unparse_code_rec:Script_ir_unparser.unparse_code_rec ->
     legacy:bool ->
     show_types:bool ->
     context ->
     Script.expr ->
     (typechecked_code_internal * context) tzresult Lwt.t =
- fun ~legacy ~show_types ctxt code ->
+ fun ~unparse_code_rec ~legacy ~show_types ctxt code ->
   (* Constants need to be expanded or [parse_toplevel] may fail. *)
   Global_constants_storage.expand ctxt code >>=? fun (ctxt, code) ->
   parse_toplevel ctxt ~legacy code >>?= fun (toplevel, ctxt) ->
@@ -4867,6 +4919,7 @@ let typecheck_code :
   let elab_conf = Script_ir_translator_config.make ~legacy ?type_logger () in
   let result =
     parse_kdescr
+      ~unparse_code_rec
       (Tc_context.toplevel ~storage_type ~param_type:arg_type ~entrypoints)
       ctxt
       ~elab_conf
@@ -4877,7 +4930,9 @@ let typecheck_code :
   in
   trace (Ill_typed_contract (code, !type_map)) result
   >>=? fun ((_ : (_, _, _, _) kdescr), ctxt) ->
-  let views_result = parse_views ctxt ~elab_conf storage_type views in
+  let views_result =
+    parse_views ~unparse_code_rec ctxt ~elab_conf storage_type views
+  in
   trace (Ill_typed_contract (code, !type_map)) views_result
   >|=? fun (typed_views, ctxt) ->
   ( Typechecked_code_internal
@@ -4946,6 +5001,11 @@ include Data_unparser (struct
   let parse_data = parse_data
 end)
 
+let unparse_code_rec : unparse_code_rec =
+ fun ctxt ~stack_depth mode node ->
+  unparse_code ctxt ~stack_depth mode node >>=? fun (code, ctxt) ->
+  return (Micheline.root code, ctxt)
+
 let parse_and_unparse_script_unaccounted ctxt ~legacy ~allow_forged_in_storage
     mode ~normalize_types {code; storage} =
   Script.force_decode_in_context
@@ -4953,7 +5013,7 @@ let parse_and_unparse_script_unaccounted ctxt ~legacy ~allow_forged_in_storage
     ctxt
     code
   >>?= fun (code, ctxt) ->
-  typecheck_code ~legacy ~show_types:false ctxt code
+  typecheck_code ~unparse_code_rec ~legacy ~show_types:false ctxt code
   >>=? fun ( Typechecked_code_internal
                {
                  toplevel =
@@ -4971,6 +5031,7 @@ let parse_and_unparse_script_unaccounted ctxt ~legacy ~allow_forged_in_storage
                },
              ctxt ) ->
   parse_storage
+    ~unparse_code_rec
     ~elab_conf:(Script_ir_translator_config.make ~legacy ())
     ctxt
     ~allow_forged:allow_forged_in_storage
@@ -5414,12 +5475,31 @@ let extract_lazy_storage_diff ctxt mode ~temporary ~to_duplicate ~to_update ty v
 let list_of_big_map_ids ids =
   Lazy_storage.IdSet.fold Big_map (fun id acc -> id :: acc) ids []
 
-let parse_data = parse_data ~stack_depth:0
+let parse_data ~elab_conf ctxt ~allow_forged ty t =
+  parse_data ~unparse_code_rec ~elab_conf ~allow_forged ~stack_depth:0 ctxt ty t
+
+let parse_view ~elab_conf ctxt ty view =
+  parse_view ~unparse_code_rec ~elab_conf ctxt ty view
 
-let parse_comparable_data ?type_logger =
+let parse_views ~elab_conf ctxt ty views =
+  parse_views ~unparse_code_rec ~elab_conf ctxt ty views
+
+let parse_code ~elab_conf ctxt ~code =
+  parse_code ~unparse_code_rec ~elab_conf ctxt ~code
+
+let parse_storage ~elab_conf ctxt ~allow_forged ty ~storage =
+  parse_storage ~unparse_code_rec ~elab_conf ctxt ~allow_forged ty ~storage
+
+let parse_script ~elab_conf ctxt ~allow_forged_in_storage script =
+  parse_script ~unparse_code_rec ~elab_conf ctxt ~allow_forged_in_storage script
+
+let parse_comparable_data ?type_logger ctxt ty t =
   parse_data
     ~elab_conf:Script_ir_translator_config.(make ~legacy:false ?type_logger ())
     ~allow_forged:false
+    ctxt
+    ty
+    t
 
 let parse_instr :
     type a s.
@@ -5430,7 +5510,14 @@ let parse_instr :
     (a, s) stack_ty ->
     ((a, s) judgement * context) tzresult Lwt.t =
  fun ~elab_conf tc_context ctxt script_instr stack_ty ->
-  parse_instr ~elab_conf ~stack_depth:0 tc_context ctxt script_instr stack_ty
+  parse_instr
+    ~unparse_code_rec
+    ~elab_conf
+    ~stack_depth:0
+    tc_context
+    ctxt
+    script_instr
+    stack_ty
 
 let unparse_data = unparse_data ~stack_depth:0
 
@@ -5526,5 +5613,5 @@ let script_size
   (Saturation_repr.(add code_size storage_size |> to_int), cost)
 
 let typecheck_code ~legacy ~show_types ctxt code =
-  typecheck_code ~legacy ~show_types ctxt code
+  typecheck_code ~unparse_code_rec ~legacy ~show_types ctxt code
   >|=? fun (Typechecked_code_internal {type_map; _}, ctxt) -> (type_map, ctxt)
diff --git a/src/proto_alpha/lib_protocol/script_ir_unparser.ml b/src/proto_alpha/lib_protocol/script_ir_unparser.ml
index 0c17a6f566cb..5e61073b8b28 100644
--- a/src/proto_alpha/lib_protocol/script_ir_unparser.ml
+++ b/src/proto_alpha/lib_protocol/script_ir_unparser.ml
@@ -451,7 +451,12 @@ let account_for_future_serialization_cost unparsed_data ctxt =
   Gas.consume ctxt (Script.micheline_serialization_cost unparsed_data)
   >|? fun ctxt -> (unparsed_data, ctxt)
 
-(* -- Unparsing data of any type -- *)
+type unparse_code_rec =
+  t ->
+  stack_depth:int ->
+  unparsing_mode ->
+  Script.node ->
+  ((canonical_location, prim) node * t, error trace) result Lwt.t
 
 module type MICHELSON_PARSER = sig
   val opened_ticket_type :
@@ -468,6 +473,7 @@ module type MICHELSON_PARSER = sig
     (ex_ty * context) tzresult
 
   val parse_data :
+    unparse_code_rec:unparse_code_rec ->
     elab_conf:Script_ir_translator_config.elab_config ->
     stack_depth:int ->
     context ->
@@ -480,6 +486,7 @@ end
 module Data_unparser (P : MICHELSON_PARSER) = struct
   open Script_tc_errors
 
+  (* -- Unparsing data of any type -- *)
   let rec unparse_data_rec :
       type a ac.
       context ->
@@ -714,6 +721,7 @@ module Data_unparser (P : MICHELSON_PARSER) = struct
              as all packable values are also forgeable. *)
         in
         P.parse_data
+          ~unparse_code_rec
           ~elab_conf
           ctxt
           ~stack_depth:(stack_depth + 1)
diff --git a/src/proto_alpha/lib_protocol/script_ir_unparser.mli b/src/proto_alpha/lib_protocol/script_ir_unparser.mli
index 07f0926b7184..82747ff5c6d9 100644
--- a/src/proto_alpha/lib_protocol/script_ir_unparser.mli
+++ b/src/proto_alpha/lib_protocol/script_ir_unparser.mli
@@ -142,11 +142,37 @@ val unparse_contract :
   'b typed_contract ->
   ('loc Script.michelson_node * context, error trace) result
 
-(** [MICHESLON_PARSER] signature describes a set of dependencies required to
+(** Lambdas are normalized at parsing and also at unparsing. These
+    normalizations require to parse and unparse data appearing inside
+    PUSH and introduces a mutual dependency between this module and
+    [Script_ir_translator] which is resolved as follows:
+    - [Script_ir_translator.parse_data] takes the normalization function
+      [unparse_code_rec] as argument,
+    - the unparsing function [unparse_data_rec] and the normalization
+      function [unparse_code_rec] are mutually defined in a functor
+      parameterized by the missing bits from [Script_ir_translator]; see the
+      module signature [MICHELSON_PARSER] below.
+ *)
+
+(** The [unparse_code_rec] function is not exported (except through
+    the [Internal_for_benchmarking] module), but needs to be called by
+    [parse_data] to normalize lambdas so it is passed as argument to
+    the [parse_data] of the [MICHELSON_PARSER] signature below and to
+    several functions of [Script_ir_translator]. To avoid repeating the
+    signature of this function we define a type alias for it. *)
+type unparse_code_rec =
+  context ->
+  stack_depth:int ->
+  unparsing_mode ->
+  Script.node ->
+  (Script.node * context) tzresult Lwt.t
+
+(** [MICHELSON_PARSER] signature describes a set of dependencies required to
     unparse arbitrary values in the IR. Because some of those values contain
     just a Michelson code that does not need to be parsed immediately,
     unparsing them requires extracting information from that code – that's
     why we depend on the parser here. *)
+
 module type MICHELSON_PARSER = sig
   val opened_ticket_type :
     Script.location ->
@@ -162,6 +188,7 @@ module type MICHELSON_PARSER = sig
     (ex_ty * context) tzresult
 
   val parse_data :
+    unparse_code_rec:unparse_code_rec ->
     elab_conf:Script_ir_translator_config.elab_config ->
     stack_depth:int ->
     context ->
-- 
GitLab


From 9b59a49f6c14293a98765a85ab7d3e125a14c411 Mon Sep 17 00:00:00 2001
From: Yann Regis-Gianas <yann@nomadic-labs.com>
Date: Thu, 23 Feb 2023 17:15:43 +0100
Subject: [PATCH 2/6] Proto/Michelson: Normalize lambda abstraction code
 produced by the elaborator

Signed-off-by: Yann Regis-Gianas <yann@nomadic-labs.com>
---
 .../lib_protocol/script_ir_translator.ml         | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/proto_alpha/lib_protocol/script_ir_translator.ml b/src/proto_alpha/lib_protocol/script_ir_translator.ml
index 41eee00ad96f..62863764a38c 100644
--- a/src/proto_alpha/lib_protocol/script_ir_translator.ml
+++ b/src/proto_alpha/lib_protocol/script_ir_translator.ml
@@ -1865,6 +1865,12 @@ let parse_toplevel :
           Script_ir_annot.error_unexpected_annot sloc sannot >|? fun () ->
           ({code_field = c; arg_type; views; storage_type = s}, ctxt))
 
+(* Normalize lambdas during parsing *)
+
+let normalized_lam ~unparse_code_rec ~stack_depth ctxt kdescr code_field =
+  unparse_code_rec ctxt ~stack_depth:(stack_depth + 1) Optimized code_field
+  >|=? fun (code_field, ctxt) -> (Lam (kdescr, code_field), ctxt)
+
 (* -- parse data of any type -- *)
 
 (*
@@ -2081,7 +2087,13 @@ let rec parse_data :
            ta
            tr
            script_instr
-      >|=? fun (kdescr, ctxt) -> (Lam (kdescr, script_instr), ctxt)
+      >>=? fun (kdescr, ctxt) ->
+      (normalized_lam [@ocaml.tailcall])
+        ~unparse_code_rec
+        ctxt
+        ~stack_depth
+        kdescr
+        script_instr
   | ( Lambda_t (ta, tr, _ty_name),
       Prim (loc, D_Lambda_rec, [(Seq (_loc, _) as script_instr)], []) ) ->
       traced
@@ -3414,6 +3426,8 @@ and parse_instr :
         ret
         code
       >>=? fun (kdescr, ctxt) ->
+      (* No need to normalize the unparsed component to Optimized mode here
+         because the script is already normalized in Optimized mode. *)
       let instr = {apply = (fun k -> ILambda (loc, Lam (kdescr, code), k))} in
       lambda_t loc arg ret >>?= fun ty ->
       let stack = Item_t (ty, stack) in
-- 
GitLab


From c5deeb4a37ab002bde26a32710d7679bb9892fd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Cauderlier?=
 <raphael.cauderlier@nomadic-labs.com>
Date: Fri, 24 Feb 2023 17:07:18 +0100
Subject: [PATCH 3/6] Proto/Michelson: also normalize recursive lambdas

---
 .../lib_protocol/script_ir_translator.ml      | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/proto_alpha/lib_protocol/script_ir_translator.ml b/src/proto_alpha/lib_protocol/script_ir_translator.ml
index 62863764a38c..1dad55842a66 100644
--- a/src/proto_alpha/lib_protocol/script_ir_translator.ml
+++ b/src/proto_alpha/lib_protocol/script_ir_translator.ml
@@ -1871,6 +1871,10 @@ let normalized_lam ~unparse_code_rec ~stack_depth ctxt kdescr code_field =
   unparse_code_rec ctxt ~stack_depth:(stack_depth + 1) Optimized code_field
   >|=? fun (code_field, ctxt) -> (Lam (kdescr, code_field), ctxt)
 
+let normalized_lam_rec ~unparse_code_rec ~stack_depth ctxt kdescr code_field =
+  unparse_code_rec ctxt ~stack_depth:(stack_depth + 1) Optimized code_field
+  >|=? fun (code_field, ctxt) -> (LamRec (kdescr, code_field), ctxt)
+
 (* -- parse data of any type -- *)
 
 (*
@@ -2518,16 +2522,27 @@ and parse_lam_rec :
          @@ ty_eq ~error_details ty ret
          >>? fun (eq, ctxt) ->
          eq >|? fun Eq ->
-         ((LamRec (close_descr descr, script_instr) : (arg, ret) lambda), ctxt))
+         ( (close_descr descr
+             : (arg, (arg, ret) lambda * end_of_stack, ret, end_of_stack) kdescr),
+           ctxt ))
+      >>=? fun (closed_descr, ctxt) ->
+      (normalized_lam_rec [@ocaml.tailcall])
+        ~unparse_code_rec
+        ~stack_depth
+        ctxt
+        closed_descr
+        script_instr
   | Typed {loc; aft = stack_ty; _}, ctxt ->
       let ret = serialize_ty_for_error ret in
       let stack_ty = serialize_stack_for_error ctxt stack_ty in
       tzfail @@ Bad_return (loc, stack_ty, ret)
   | Failed {descr}, ctxt ->
-      return
-        ( (LamRec (close_descr (descr (Item_t (ret, Bot_t))), script_instr)
-            : (arg, ret) lambda),
-          ctxt )
+      (normalized_lam_rec [@ocaml.tailcall])
+        ~unparse_code_rec
+        ~stack_depth
+        ctxt
+        (close_descr (descr (Item_t (ret, Bot_t))))
+        script_instr
 
 and parse_instr :
     type a s.
@@ -3442,7 +3457,10 @@ and parse_instr :
       check_var_annot loc annot >>?= fun () ->
       lambda_t loc arg ret >>?= fun lambda_rec_ty ->
       parse_lam_rec
-        ~unparse_code_rec
+        ~unparse_code_rec:(fun ctxt ~stack_depth:_ _unparsing_mode node ->
+          return (node, ctxt))
+        (* No need to normalize the unparsed component to Optimized mode here
+           because the script is already normalized in Optimized mode. *)
         Tc_context.(add_lambda tc_context)
         ~elab_conf
         ~stack_depth:(stack_depth + 1)
-- 
GitLab


From 472b07f92d0ebce4463abd2a8316cd3221389f27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Cauderlier?=
 <raphael.cauderlier@nomadic-labs.com>
Date: Tue, 28 Feb 2023 11:56:57 +0100
Subject: [PATCH 4/6] Tests: reset regression traces

A few gas regressions caused by the additional normalization of
lambdas in contract arguments.
---
 ...packunpack_rev_cty--storage125992234--input1028781121-.out | 4 ++--
 ...-packunpack_rev_cty--storage125992234--input802670583-.out | 4 ++--
 .../hash_data.ml/Alpha- hash data ... of type ... (good).out  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input1028781121-.out b/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input1028781121-.out
index 5e192dc5df3f..101b4253219e 100644
--- a/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input1028781121-.out	
+++ b/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input1028781121-.out	
@@ -7,7 +7,7 @@ emitted operations
 big_map diff
   
 trace
-  - location: 28 (just consumed gas: 502.607)
+  - location: 28 (just consumed gas: 503.139)
     [ (Pair (Pair "edpkuBknW28nW72KG6RoHtYW7p12T6GKc7nAbwYX5m8Wd9sDVC9yav"
                   Unit
                   "edsigthTzJ8X7MPmNeEwybRAvdxS1pupqcM5Mk4uCuyZAe7uEk68YpuGDeViW8wSXMrCi5CwoNgqs8V2w8ayB5dMJzrYCHhD8C7"
@@ -1165,7 +1165,7 @@ trace
     [ { PACK } ]
   - location: 303 (just consumed gas: 0.597)
     [ 0x050200000002030c ]
-  - location: 304 (just consumed gas: 1.024)
+  - location: 304 (just consumed gas: 1.556)
     [ (Some { PACK }) ]
   - location: 309 (just consumed gas: 0)
     [ { PACK } ]
diff --git a/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input802670583-.out b/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input802670583-.out
index a1e2971d3a6d..78f57e647eca 100644
--- a/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input802670583-.out	
+++ b/tezt/tests/expected/contract_opcodes.ml/Alpha- opcodes -packunpack_rev_cty--storage125992234--input802670583-.out	
@@ -7,7 +7,7 @@ emitted operations
 big_map diff
   
 trace
-  - location: 28 (just consumed gas: 492.580)
+  - location: 28 (just consumed gas: 493.644)
     [ (Pair (Pair "edpkuBknW28nW72KG6RoHtYW7p12T6GKc7nAbwYX5m8Wd9sDVC9yav"
                   Unit
                   "edsigthTzJ8X7MPmNeEwybRAvdxS1pupqcM5Mk4uCuyZAe7uEk68YpuGDeViW8wSXMrCi5CwoNgqs8V2w8ayB5dMJzrYCHhD8C7"
@@ -1003,7 +1003,7 @@ trace
     [ { DUP ; DROP ; PACK } ]
   - location: 303 (just consumed gas: 1.129)
     [ 0x05020000000603210320030c ]
-  - location: 304 (just consumed gas: 2.086)
+  - location: 304 (just consumed gas: 3.150)
     [ (Some { DUP ; DROP ; PACK }) ]
   - location: 309 (just consumed gas: 0)
     [ { DUP ; DROP ; PACK } ]
diff --git a/tezt/tests/expected/hash_data.ml/Alpha- hash data ... of type ... (good).out b/tezt/tests/expected/hash_data.ml/Alpha- hash data ... of type ... (good).out
index b8f040cbd9b0..6ca993cff8ec 100644
--- a/tezt/tests/expected/hash_data.ml/Alpha- hash data ... of type ... (good).out	
+++ b/tezt/tests/expected/hash_data.ml/Alpha- hash data ... of type ... (good).out	
@@ -330,7 +330,7 @@ Raw Script-expression-ID-Hash: 0xf0147b0fcd9ed958297e3e663f25d0427f7f70576f22047
 Ledger Blake2b hash: HAAxipLZ7LdX9GLgN7p1bX9KneQ82FcCsss3YicsKaAS
 Raw Sha256 hash: 0x077e8b5d0b528d84284d773f8cc36da052248bb96c341d68d51b86e0ce098262
 Raw Sha512 hash: 0x7553bc601c56a361eb537e2bcba685ae6fbf3b68003bd9426db318352db433445aee770bab94631ce5bdf29b948caf1665e208c9be33d8b70fa67d2839d4a16a
-Gas remaining: 1039999.109 units remaining
+Gas remaining: 1039998.843 units remaining
 
 ./octez-client --mode mockup hash data '{ PUSH nat 1; ADD }' of type 'lambda nat nat'
 Raw packed data: 0x0502000000080743036200010312
@@ -339,7 +339,7 @@ Raw Script-expression-ID-Hash: 0x42a0dcc26c782dcc094ba725e4bcc4f8710c72203a895b5
 Ledger Blake2b hash: 5V6BBTaoBmSdwu8pv5xNMNBwm1mQGdPJ8e8N3s22Gcwp
 Raw Sha256 hash: 0x113616e40fd9fcf4cb73a9cf9c57fc0ab1853fa4432b9ca975b472d0173bc007
 Raw Sha512 hash: 0x2256250f59e3c1c708eae9aa4d6f7d2dafdf82cb12b872db8320048c0dab3e225e96e846e284382fcfc832b83f0bf3ee3945fc6936357b2c29d96f29ba90315a
-Gas remaining: 1039997.205 units remaining
+Gas remaining: 1039995.855 units remaining
 
 ./octez-client --mode mockup hash data '{}' of type 'list unit'
 Raw packed data: 0x050200000000
-- 
GitLab


From 85852d990610bb7f85110ca6bb6b5d47b7689854 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Cauderlier?=
 <raphael.cauderlier@nomadic-labs.com>
Date: Tue, 28 Feb 2023 14:05:36 +0100
Subject: [PATCH 5/6] Tests: fix the script_manager_contracts test

---
 tezt/tests/script_manager_contracts.ml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tezt/tests/script_manager_contracts.ml b/tezt/tests/script_manager_contracts.ml
index a04b00046c16..c8f21195107b 100644
--- a/tezt/tests/script_manager_contracts.ml
+++ b/tezt/tests/script_manager_contracts.ml
@@ -203,15 +203,16 @@ let test_manager_contracts =
     Client.get_balance_for ~account:"bootstrap2" client
   in
   let amount = Tez.of_mutez_int 10_100_000 in
+  let fee = Tez.of_mutez_int 0_000_636 in
   let* () =
     Client.transfer
       ~amount
       ~giver:"manager"
       ~receiver:"bootstrap2"
       ~gas_limit:((128 * 26350) + 12)
+      ~fee
       client
   in
-  let fee = Tez.of_mutez_int 0_000_635 in
   let* () = check_balance ~__LOC__ "manager" Tez.(balance - amount) in
   let* () =
     check_balance ~__LOC__ "bootstrap2" Tez.(balance_bootstrap + amount - fee)
-- 
GitLab


From 8649a27d03717eb27592cde23c77aa6098161c74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Cauderlier?=
 <raphael.cauderlier@nomadic-labs.com>
Date: Tue, 28 Feb 2023 16:44:58 +0100
Subject: [PATCH 6/6] Tests/Michelson: add a few unit tests for normalization
 of lambdas

---
 .../test/integration/michelson/main.ml        |   1 +
 .../michelson/test_lambda_normalization.ml    | 235 ++++++++++++++++++
 2 files changed, 236 insertions(+)
 create mode 100644 src/proto_alpha/lib_protocol/test/integration/michelson/test_lambda_normalization.ml

diff --git a/src/proto_alpha/lib_protocol/test/integration/michelson/main.ml b/src/proto_alpha/lib_protocol/test/integration/michelson/main.ml
index dd1a9f76fe0f..f93799c4d484 100644
--- a/src/proto_alpha/lib_protocol/test/integration/michelson/main.ml
+++ b/src/proto_alpha/lib_protocol/test/integration/michelson/main.ml
@@ -55,5 +55,6 @@ let () =
       ("annotations", Test_annotations.tests);
       ("event logging", Test_contract_event.tests);
       ("patched contracts", Test_patched_contracts.tests);
+      ("lambda normalization", Test_lambda_normalization.tests);
     ]
   |> Lwt_main.run
diff --git a/src/proto_alpha/lib_protocol/test/integration/michelson/test_lambda_normalization.ml b/src/proto_alpha/lib_protocol/test/integration/michelson/test_lambda_normalization.ml
new file mode 100644
index 000000000000..ab940088a9cf
--- /dev/null
+++ b/src/proto_alpha/lib_protocol/test/integration/michelson/test_lambda_normalization.ml
@@ -0,0 +1,235 @@
+(*****************************************************************************)
+(*                                                                           *)
+(* Open Source License                                                       *)
+(* Copyright (c) 2023 Nomadic Labs, <contact@nomadic-labs.com>               *)
+(*                                                                           *)
+(* Permission is hereby granted, free of charge, to any person obtaining a   *)
+(* copy of this software and associated documentation files (the "Software"),*)
+(* to deal in the Software without restriction, including without limitation *)
+(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
+(* and/or sell copies of the Software, and to permit persons to whom the     *)
+(* Software is furnished to do so, subject to the following conditions:      *)
+(*                                                                           *)
+(* The above copyright notice and this permission notice shall be included   *)
+(* in all copies or substantial portions of the Software.                    *)
+(*                                                                           *)
+(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
+(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
+(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
+(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
+(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
+(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
+(* DEALINGS IN THE SOFTWARE.                                                 *)
+(*                                                                           *)
+(*****************************************************************************)
+
+(** Testing
+    -------
+    Component:  Protocol (Michelson)
+    Invocation: dune exec \
+                src/proto_alpha/lib_protocol/test/integration/michelson/main.exe \
+                -- test "^lambda normalization"
+    Subject:    Test that lambdas are normalized to optimized format at elaboration
+*)
+
+open Protocol
+open Alpha_context
+open Script_typed_ir
+
+let new_ctxt () =
+  let open Lwt_result_wrap_syntax in
+  let* block, _contract = Context.init1 () in
+  let+ incr = Incremental.begin_construction block in
+  Incremental.alpha_ctxt incr
+
+let parse_and_project (ty : ((_, _) lambda, _) ty) (node : Script.node) =
+  let open Lwt_result_wrap_syntax in
+  let* ctxt = new_ctxt () in
+  let elab_conf = Script_ir_translator_config.make ~legacy:false () in
+  let*@ lam, _ctxt =
+    Script_ir_translator.parse_data ~elab_conf ctxt ~allow_forged:false ty node
+  in
+  match lam with
+  | Lam (_kdescr, node) -> return node
+  | LamRec (_kdescr, node) ->
+      return
+        Micheline.(
+          Prim (dummy_location, Michelson_v1_primitives.D_Lambda_rec, [node], []))
+
+let node_of_string str =
+  let open Lwt_result_wrap_syntax in
+  let*? parsed =
+    Micheline_parser.no_parsing_error
+    @@ Michelson_v1_parser.parse_expression ~check:false str
+  in
+  return @@ Micheline.root parsed.expanded
+
+let node_to_string node =
+  Format.asprintf
+    "%a"
+    Micheline_printer.print_expr
+    ((Micheline_printer.printable Michelson_v1_primitives.string_of_prim)
+       (Micheline.strip_locations node))
+
+let assert_lambda_normalizes_to ~loc ty str expected =
+  let open Lwt_result_wrap_syntax in
+  let* node = node_of_string str in
+  let* node_normalized = parse_and_project ty node in
+  let str_normalized = node_to_string node_normalized in
+  let* expected_node = node_of_string expected in
+  let expected = node_to_string expected_node in
+  Assert.equal_string ~loc expected str_normalized
+
+let assert_normalizes_to ~loc ty str expected =
+  let open Lwt_result_wrap_syntax in
+  let* () = assert_lambda_normalizes_to ~loc ty str expected in
+  let* () =
+    assert_lambda_normalizes_to
+      ~loc
+      ty
+      ("Lambda_rec " ^ str)
+      ("Lambda_rec " ^ expected)
+  in
+  return_unit
+
+let test_lambda_normalization () =
+  let open Lwt_result_wrap_syntax in
+  let*?@ ty =
+    Script_typed_ir.(lambda_t Micheline.dummy_location unit_t never_t)
+  in
+  let*?@ lam_unit_unit =
+    Script_typed_ir.(lambda_t Micheline.dummy_location unit_t unit_t)
+  in
+  let* () =
+    (* Empty sequence normalizes to itself. *)
+    assert_lambda_normalizes_to ~loc:__LOC__ lam_unit_unit "{}" "{}"
+  in
+  let* () =
+    (* Another example normalizing to itself. *)
+    assert_normalizes_to ~loc:__LOC__ ty "{FAILWITH}" "{FAILWITH}"
+  in
+  let* () =
+    (* Readable address normalizes to optimized. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx"; FAILWITH}|}
+      {|{PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78; FAILWITH}|}
+  in
+  let* () =
+    (* Binary pair normalizes to itself. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (pair nat nat) (Pair 0 0); FAILWITH}|}
+      {|{PUSH (pair nat nat) (Pair 0 0); FAILWITH}|}
+  in
+  let* () =
+    (* Ternary pair normalizes to nested binary pairs. Type is unchanged. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (pair nat nat nat) (Pair 0 0 0); FAILWITH}|}
+      {|{PUSH (pair nat nat nat) (Pair 0 (Pair 0 0)); FAILWITH}|}
+  in
+  let* () =
+    (* Same with nested pairs in type. Type is still unchanged. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (pair nat (pair nat nat)) (Pair 0 0 0); FAILWITH}|}
+      {|{PUSH (pair nat (pair nat nat)) (Pair 0 (Pair 0 0)); FAILWITH}|}
+  in
+  let* () =
+    (* Quadrary pair normalizes to sequence. Type is unchanged. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (pair nat nat nat nat) (Pair 0 0 0 0); FAILWITH}|}
+      {|{PUSH (pair nat nat nat nat) {0; 0; 0; 0}; FAILWITH}|}
+  in
+  let* () =
+    (* Code inside LAMBDA is normalized too. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{LAMBDA unit never
+           {PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx"; FAILWITH};
+         FAILWITH}|}
+      {|{LAMBDA unit never
+           {PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78; FAILWITH};
+         FAILWITH}|}
+  in
+  let* () =
+    (* Same with LAMBDA replaced by PUSH. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (lambda unit never)
+           {PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx"; FAILWITH};
+         FAILWITH}|}
+      {|{PUSH (lambda unit never)
+           {PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78; FAILWITH};
+         FAILWITH}|}
+  in
+  let* () =
+    (* Code inside LAMBDA_REC is normalized too. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{LAMBDA_REC unit never
+           {PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx";
+            FAILWITH};
+         FAILWITH}|}
+      {|{LAMBDA_REC unit never
+           {PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78;
+            FAILWITH};
+         FAILWITH}|}
+  in
+  let* () =
+    (* Same with LAMBDA_REC replaced by PUSH. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH (lambda unit never)
+           (Lambda_rec
+              {PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx";
+               FAILWITH});
+         FAILWITH}|}
+      {|{PUSH (lambda unit never)
+           (Lambda_rec
+              {PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78;
+               FAILWITH});
+         FAILWITH}|}
+  in
+  let* () =
+    (* Code inside CREATE_CONTRACT is normalized too. *)
+    assert_normalizes_to
+      ~loc:__LOC__
+      ty
+      {|{PUSH mutez 0;
+         NONE key_hash;
+         CREATE_CONTRACT
+           {parameter unit;
+            storage unit;
+            code { PUSH address "tz1KqTpEZ7Yob7QbPE4Hy4Wo8fHG8LhKxZSx"; FAILWITH}};
+         DROP;
+         FAILWITH}|}
+      {|{PUSH mutez 0;
+         NONE key_hash;
+         CREATE_CONTRACT
+           {parameter unit;
+            storage unit;
+            code { PUSH address 0x000002298c03ed7d454a101eb7022bc95f7e5f41ac78; FAILWITH}};
+         DROP;
+         FAILWITH}|}
+  in
+  return_unit
+
+let tests =
+  [
+    Tztest.tztest
+      "Test that lambdas are normalized to optimized format during elaboration"
+      `Quick
+      test_lambda_normalization;
+  ]
-- 
GitLab