From ed2498b81e3334e7c83f0c4f4bb333add7737189 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Proust?= <code@bnwr.net>
Date: Wed, 16 Nov 2022 09:06:25 +0100
Subject: [PATCH 1/3] Build: update data-encoding to 0.7.1

---
 .gitlab-ci.yml                             | 2 +-
 manifest/main.ml                           | 2 +-
 opam/octez-codec.opam                      | 2 +-
 opam/octez-sc-rollup-node-alpha.opam       | 2 +-
 opam/tezos-baking-014-PtKathma.opam        | 2 +-
 opam/tezos-baking-015-PtLimaPt.opam        | 2 +-
 opam/tezos-baking-alpha.opam               | 2 +-
 opam/tezos-base.opam                       | 2 +-
 opam/tezos-benchmark.opam                  | 2 +-
 opam/tezos-client-commands.opam            | 2 +-
 opam/tezos-crypto-dal.opam                 | 2 +-
 opam/tezos-crypto.opam                     | 2 +-
 opam/tezos-error-monad.opam                | 2 +-
 opam/tezos-event-logging-test-helpers.opam | 2 +-
 opam/tezos-event-logging.opam              | 2 +-
 opam/tezos-hacl.opam                       | 2 +-
 opam/tezos-micheline.opam                  | 2 +-
 opam/tezos-protocol-environment.opam       | 2 +-
 opam/tezos-rpc.opam                        | 2 +-
 opam/tezos-sapling.opam                    | 2 +-
 opam/tezos-scoru-wasm.opam                 | 2 +-
 opam/tezos-stdlib-unix.opam                | 2 +-
 opam/tezos-test-helpers.opam               | 2 +-
 opam/tezos-tps-evaluation.opam             | 2 +-
 opam/tezos-tree-encoding.opam              | 2 +-
 opam/virtual/octez-deps.opam               | 2 +-
 scripts/version.sh                         | 4 ++--
 27 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 246c3d355087..1d7dbe25d4a4 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -33,7 +33,7 @@ variables:
   # /!\ CI_REGISTRY is overriden to use a private Docker registry mirror in AWS ECR
   # in GitLab namespaces `nomadic-labs` and `tezos`
   ## This value MUST be the same as `opam_repository_tag` in `scripts/version.sh`
-  build_deps_image_version: 07ec20d5412eea534311dc1f3e887048eb468586
+  build_deps_image_version: 6799b94e7be89877c4d35423fa0122d239bcc2e9
   build_deps_image_name: "${CI_REGISTRY}/tezos/opam-repository"
   GIT_STRATEGY: fetch
   GIT_DEPTH: "1"
diff --git a/manifest/main.ml b/manifest/main.ml
index 52d4ab3f6aed..24c5bd6c76be 100644
--- a/manifest/main.ml
+++ b/manifest/main.ml
@@ -138,7 +138,7 @@ let data_encoding =
     ~js_compatible:true
     ~main_module:"Data_encoding"
     "data-encoding"
-    V.(at_least "0.6" && less_than "0.7")
+    V.(at_least "0.7.1" && less_than "1.0.0")
 
 let digestif = external_lib ~js_compatible:true "digestif" V.(at_least "0.7.3")
 
diff --git a/opam/octez-codec.opam b/opam/octez-codec.opam
index eb4c917b3a52..756bd8801540 100644
--- a/opam/octez-codec.opam
+++ b/opam/octez-codec.opam
@@ -9,7 +9,7 @@ dev-repo: "git+https://gitlab.com/tezos/tezos.git"
 license: "MIT"
 depends: [
   "dune" { >= "3.0" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-base"
   "tezos-client-base-unix"
   "tezos-client-base"
diff --git a/opam/octez-sc-rollup-node-alpha.opam b/opam/octez-sc-rollup-node-alpha.opam
index f9117717610c..5df60c8c13e6 100644
--- a/opam/octez-sc-rollup-node-alpha.opam
+++ b/opam/octez-sc-rollup-node-alpha.opam
@@ -29,7 +29,7 @@ depends: [
   "tezos-sc-rollup-alpha"
   "tezos-layer2-utils-alpha"
   "tezos-layer2-store"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "irmin-pack" { >= "3.4.3" & < "3.5.0" }
   "irmin" { >= "3.4.3" & < "3.5.0" }
   "ringo" { >= "0.9" & < "1.0.0" }
diff --git a/opam/tezos-baking-014-PtKathma.opam b/opam/tezos-baking-014-PtKathma.opam
index 6ab6522a8927..ba0568f45f37 100644
--- a/opam/tezos-baking-014-PtKathma.opam
+++ b/opam/tezos-baking-014-PtKathma.opam
@@ -30,7 +30,7 @@ depends: [
   "lwt-canceler" { >= "0.3" & < "0.4" }
   "lwt-exit"
   "uri" { >= "2.2.0" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-client-base-unix"
   "tezos-mockup"
   "tezos-mockup-proxy"
diff --git a/opam/tezos-baking-015-PtLimaPt.opam b/opam/tezos-baking-015-PtLimaPt.opam
index 8d837ab733c9..c2610d43f7e0 100644
--- a/opam/tezos-baking-015-PtLimaPt.opam
+++ b/opam/tezos-baking-015-PtLimaPt.opam
@@ -30,7 +30,7 @@ depends: [
   "lwt-canceler" { >= "0.3" & < "0.4" }
   "lwt-exit"
   "uri" { >= "2.2.0" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-client-base-unix"
   "tezos-mockup"
   "tezos-mockup-proxy"
diff --git a/opam/tezos-baking-alpha.opam b/opam/tezos-baking-alpha.opam
index bf9db9226833..3ec92de1372e 100644
--- a/opam/tezos-baking-alpha.opam
+++ b/opam/tezos-baking-alpha.opam
@@ -30,7 +30,7 @@ depends: [
   "lwt-canceler" { >= "0.3" & < "0.4" }
   "lwt-exit"
   "uri" { >= "2.2.0" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-client-base-unix"
   "tezos-mockup"
   "tezos-mockup-proxy"
diff --git a/opam/tezos-base.opam b/opam/tezos-base.opam
index 790a4f629598..200497c893b1 100644
--- a/opam/tezos-base.opam
+++ b/opam/tezos-base.opam
@@ -11,7 +11,7 @@ depends: [
   "dune" { >= "3.0" }
   "tezos-stdlib"
   "tezos-crypto"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-error-monad"
   "tezos-rpc"
   "tezos-micheline"
diff --git a/opam/tezos-benchmark.opam b/opam/tezos-benchmark.opam
index c5c4acb531f2..dcc203a4f8f6 100644
--- a/opam/tezos-benchmark.opam
+++ b/opam/tezos-benchmark.opam
@@ -15,7 +15,7 @@ depends: [
   "tezos-crypto"
   "tezos-micheline"
   "tezos-clic"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "prbnmcn-cgrph" { = "0.0.2" }
   "prbnmcn-dagger" { = "0.0.2" }
   "prbnmcn-dagger-stats" { = "0.0.2" }
diff --git a/opam/tezos-client-commands.opam b/opam/tezos-client-commands.opam
index e80809fa1909..a9c84d864b4c 100644
--- a/opam/tezos-client-commands.opam
+++ b/opam/tezos-client-commands.opam
@@ -17,7 +17,7 @@ depends: [
   "tezos-p2p-services"
   "tezos-stdlib-unix"
   "tezos-signer-backends"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "uri" { >= "2.2.0" }
 ]
 build: [
diff --git a/opam/tezos-crypto-dal.opam b/opam/tezos-crypto-dal.opam
index f38fa4e776e8..63bcca466c01 100644
--- a/opam/tezos-crypto-dal.opam
+++ b/opam/tezos-crypto-dal.opam
@@ -11,7 +11,7 @@ depends: [
   "dune" { >= "3.0" }
   "tezos-stdlib"
   "tezos-error-monad"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-crypto"
   "tezos-bls12-381-polynomial" { >= "0.1.3" }
   "lwt" { >= "5.6.0" }
diff --git a/opam/tezos-crypto.opam b/opam/tezos-crypto.opam
index 5417531a157c..10798949d92d 100644
--- a/opam/tezos-crypto.opam
+++ b/opam/tezos-crypto.opam
@@ -10,7 +10,7 @@ license: "MIT"
 depends: [
   "dune" { >= "3.0" }
   "tezos-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-lwt-result-stdlib"
   "lwt" { >= "5.6.0" }
   "tezos-hacl"
diff --git a/opam/tezos-error-monad.opam b/opam/tezos-error-monad.opam
index 577e0b88954f..d7ad9b03ac39 100644
--- a/opam/tezos-error-monad.opam
+++ b/opam/tezos-error-monad.opam
@@ -11,7 +11,7 @@ depends: [
   "dune" { >= "3.0" }
   "ocaml" { >= "4.07" }
   "tezos-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "lwt-canceler" { >= "0.3" & < "0.4" }
   "lwt" { >= "5.6.0" }
   "tezos-lwt-result-stdlib"
diff --git a/opam/tezos-event-logging-test-helpers.opam b/opam/tezos-event-logging-test-helpers.opam
index 5fcfb7b03f90..831e5b7e8b3c 100644
--- a/opam/tezos-event-logging-test-helpers.opam
+++ b/opam/tezos-event-logging-test-helpers.opam
@@ -11,7 +11,7 @@ depends: [
   "dune" { >= "3.0" }
   "tezos-stdlib"
   "tezos-lwt-result-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-error-monad"
   "tezos-event-logging"
   "tezos-test-helpers"
diff --git a/opam/tezos-event-logging.opam b/opam/tezos-event-logging.opam
index 75add03ef0fb..49eafebf8df8 100644
--- a/opam/tezos-event-logging.opam
+++ b/opam/tezos-event-logging.opam
@@ -10,7 +10,7 @@ license: "MIT"
 depends: [
   "dune" { >= "3.0" }
   "tezos-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-error-monad"
   "tezos-lwt-result-stdlib"
   "lwt_log"
diff --git a/opam/tezos-hacl.opam b/opam/tezos-hacl.opam
index 34005a777c2d..1f1036279cb0 100644
--- a/opam/tezos-hacl.opam
+++ b/opam/tezos-hacl.opam
@@ -19,7 +19,7 @@ depends: [
   "tezos-error-monad" {with-test}
   "zarith" { with-test & >= "1.12" & < "1.13" }
   "zarith_stubs_js" {with-test}
-  "data-encoding" { with-test & >= "0.6" & < "0.7" }
+  "data-encoding" { with-test & >= "0.7.1" & < "1.0.0" }
   "qcheck-alcotest" { with-test & >= "0.18" }
   "tezos-test-helpers" {with-test}
 ]
diff --git a/opam/tezos-micheline.opam b/opam/tezos-micheline.opam
index 1a96d2816df3..cc78ef114c85 100644
--- a/opam/tezos-micheline.opam
+++ b/opam/tezos-micheline.opam
@@ -15,7 +15,7 @@ depends: [
   "zarith_stubs_js"
   "tezos-stdlib"
   "tezos-error-monad"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
 ]
 build: [
   ["rm" "-r" "vendors"]
diff --git a/opam/tezos-protocol-environment.opam b/opam/tezos-protocol-environment.opam
index b6b5f19af9ca..bd8f9c261e53 100644
--- a/opam/tezos-protocol-environment.opam
+++ b/opam/tezos-protocol-environment.opam
@@ -14,7 +14,7 @@ depends: [
   "tezos-crypto"
   "tezos-lwt-result-stdlib"
   "tezos-scoru-wasm"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "bls12-381" { >= "5.0.0" & < "5.1.0" }
   "tezos-plonk" { >= "0.1.2" }
   "zarith" { >= "1.12" & < "1.13" }
diff --git a/opam/tezos-rpc.opam b/opam/tezos-rpc.opam
index bce9eefe83e6..98b564b78e8b 100644
--- a/opam/tezos-rpc.opam
+++ b/opam/tezos-rpc.opam
@@ -9,7 +9,7 @@ dev-repo: "git+https://gitlab.com/tezos/tezos.git"
 license: "MIT"
 depends: [
   "dune" { >= "3.0" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-error-monad"
   "resto" { >= "1.0" }
   "resto-directory" { >= "1.0" }
diff --git a/opam/tezos-sapling.opam b/opam/tezos-sapling.opam
index 221191ce1841..bfbc8018fe76 100644
--- a/opam/tezos-sapling.opam
+++ b/opam/tezos-sapling.opam
@@ -14,7 +14,7 @@ depends: [
   "integers_stubs_js"
   "ctypes" { >= "0.18.0" }
   "ctypes_stubs_js"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "tezos-stdlib"
   "tezos-crypto"
   "tezos-error-monad"
diff --git a/opam/tezos-scoru-wasm.opam b/opam/tezos-scoru-wasm.opam
index 24a4cf6734bf..fe3f882449a3 100644
--- a/opam/tezos-scoru-wasm.opam
+++ b/opam/tezos-scoru-wasm.opam
@@ -15,7 +15,7 @@ depends: [
   "tezos-webassembly-interpreter"
   "tezos-context"
   "tezos-lwt-result-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
 ]
 build: [
   ["rm" "-r" "vendors"]
diff --git a/opam/tezos-stdlib-unix.opam b/opam/tezos-stdlib-unix.opam
index d72ad88a1f76..7e861b11a042 100644
--- a/opam/tezos-stdlib-unix.opam
+++ b/opam/tezos-stdlib-unix.opam
@@ -14,7 +14,7 @@ depends: [
   "tezos-lwt-result-stdlib"
   "tezos-event-logging"
   "tezos-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "lwt" { >= "5.6.0" }
   "ipaddr" { >= "5.0.0" & < "6.0.0" }
   "re" { >= "1.7.2" }
diff --git a/opam/tezos-test-helpers.opam b/opam/tezos-test-helpers.opam
index 2543bce78df2..f5f850579f66 100644
--- a/opam/tezos-test-helpers.opam
+++ b/opam/tezos-test-helpers.opam
@@ -16,7 +16,7 @@ depends: [
   "alcotest" { >= "1.5.0" }
   "lwt" { >= "5.6.0" }
   "pure-splitmix" { = "0.3" }
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
 ]
 build: [
   ["rm" "-r" "vendors"]
diff --git a/opam/tezos-tps-evaluation.opam b/opam/tezos-tps-evaluation.opam
index 03caceb53819..0e842d4d9b41 100644
--- a/opam/tezos-tps-evaluation.opam
+++ b/opam/tezos-tps-evaluation.opam
@@ -13,7 +13,7 @@ depends: [
   "caqti"
   "caqti-dynload"
   "caqti-lwt"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "lwt" { >= "5.6.0" }
   "tezos-baking-alpha"
   "tezos-client-alpha"
diff --git a/opam/tezos-tree-encoding.opam b/opam/tezos-tree-encoding.opam
index d8bf70c05191..ab2798bf742f 100644
--- a/opam/tezos-tree-encoding.opam
+++ b/opam/tezos-tree-encoding.opam
@@ -12,7 +12,7 @@ depends: [
   "tezos-base"
   "tezos-lazy-containers"
   "tezos-lwt-result-stdlib"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
 ]
 build: [
   ["rm" "-r" "vendors"]
diff --git a/opam/virtual/octez-deps.opam b/opam/virtual/octez-deps.opam
index 39b0aaab28a8..879b988addef 100644
--- a/opam/virtual/octez-deps.opam
+++ b/opam/virtual/octez-deps.opam
@@ -29,7 +29,7 @@ depends: [
   "ctypes" { >= "0.18.0" }
   "ctypes-foreign" { >= "0.18.0" }
   "ctypes_stubs_js"
-  "data-encoding" { >= "0.6" & < "0.7" }
+  "data-encoding" { >= "0.7.1" & < "1.0.0" }
   "digestif" { >= "0.7.3" }
   "dune-configurator"
   "ezjsonm" { >= "1.1.0" }
diff --git a/scripts/version.sh b/scripts/version.sh
index b82a791b7b30..a0556119d285 100755
--- a/scripts/version.sh
+++ b/scripts/version.sh
@@ -20,12 +20,12 @@ export recommended_node_version=14.12.0
 
 ## full_opam_repository is a commit hash of the public OPAM repository, i.e.
 ## https://github.com/ocaml/opam-repository
-export full_opam_repository_tag=f0b73342dac7ff49e1746ba37b975462ad3ae91b
+export full_opam_repository_tag=a6705450b38ef71796ee47c7029d2c539630fe0d
 
 ## opam_repository is an additional, tezos-specific opam repository.
 ## This value MUST be the same as `build_deps_image_version` in `.gitlab/ci/templates.yml
 export opam_repository_url=https://gitlab.com/tezos/opam-repository
-export opam_repository_tag=07ec20d5412eea534311dc1f3e887048eb468586
+export opam_repository_tag=6799b94e7be89877c4d35423fa0122d239bcc2e9
 export opam_repository_git=$opam_repository_url.git
 export opam_repository=$opam_repository_git\#$opam_repository_tag
 
-- 
GitLab


From 8d048c31188f6ce0d28618f5a3bf94f50b7ddb21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Proust?= <code@bnwr.net>
Date: Thu, 22 Sep 2022 11:43:47 +0200
Subject: [PATCH 2/3] Proto-env: backwards compatibility layer

---
 .../structs/v0_data_encoding.ml               | 38 ++++---------------
 .../structs/v3_data_encoding.ml               | 38 ++++---------------
 .../structs/v5_data_encoding.ml               | 38 ++++---------------
 3 files changed, 21 insertions(+), 93 deletions(-)

diff --git a/src/lib_protocol_environment/structs/v0_data_encoding.ml b/src/lib_protocol_environment/structs/v0_data_encoding.ml
index 1988704c5d60..000e7e87d555 100644
--- a/src/lib_protocol_environment/structs/v0_data_encoding.ml
+++ b/src/lib_protocol_environment/structs/v0_data_encoding.ml
@@ -57,39 +57,15 @@ module Encoding = struct
       repr_agnostic_custom {write; read} ~schema:Json_schema.any
     in
     Data_encoding__Encoding.raw_splitted ~json ~binary
+
+  let dynamic_size :
+      ?kind:[`Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding =
+   fun ?kind e ->
+    let kind = (kind :> [`N | `Uint30 | `Uint16 | `Uint8] option) in
+    dynamic_size ?kind e
 end
 
-(* We have to define this twice bc in data-encoding<0.7 the type equality for
-   [lazy_t] is not propagated. *)
-let lazy_encoding encoding =
-  let binary = lazy_encoding encoding in
-  let json =
-    let open Json_encoding in
-    let write (type value)
-        (module Repr : Json_repr.Repr with type value = value) le =
-      match force_decode le with
-      | Some r ->
-          Json_repr.convert
-            (module Json_repr.Ezjsonm)
-            (module Repr)
-            (Json.construct encoding r)
-      | None ->
-          apply_lazy
-            ~fun_value:(fun _ -> assert false)
-            ~fun_bytes:(fun b ->
-              let (`Hex h) = Hex.of_bytes b in
-              Repr.repr (`O [("unparsed-binary", Repr.repr (`String h))]))
-            ~fun_combine:(fun _ _ -> assert false)
-            le
-    in
-    let read (type value) (module Repr : Json_repr.Repr with type value = value)
-        j =
-      let j = Json_repr.convert (module Repr) (module Json_repr.Ezjsonm) j in
-      make_lazy encoding (Json.destruct encoding j)
-    in
-    repr_agnostic_custom {write; read} ~schema:Json_schema.any
-  in
-  Data_encoding__Encoding.raw_splitted ~json ~binary
+include Encoding
 
 module Json = struct
   include Data_encoding.Json
diff --git a/src/lib_protocol_environment/structs/v3_data_encoding.ml b/src/lib_protocol_environment/structs/v3_data_encoding.ml
index 7b19599ddbca..1702cdc3ae3d 100644
--- a/src/lib_protocol_environment/structs/v3_data_encoding.ml
+++ b/src/lib_protocol_environment/structs/v3_data_encoding.ml
@@ -57,39 +57,15 @@ module Encoding = struct
       repr_agnostic_custom {write; read} ~schema:Json_schema.any
     in
     Data_encoding__Encoding.raw_splitted ~json ~binary
+
+  let dynamic_size :
+      ?kind:[`Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding =
+   fun ?kind e ->
+    let kind = (kind :> [`N | `Uint30 | `Uint16 | `Uint8] option) in
+    dynamic_size ?kind e
 end
 
-(* We have to define this twice bc in data-encoding<0.7 the type equality for
-   [lazy_t] is not propagated. *)
-let lazy_encoding encoding =
-  let binary = lazy_encoding encoding in
-  let json =
-    let open Json_encoding in
-    let write (type value)
-        (module Repr : Json_repr.Repr with type value = value) le =
-      match force_decode le with
-      | Some r ->
-          Json_repr.convert
-            (module Json_repr.Ezjsonm)
-            (module Repr)
-            (Json.construct encoding r)
-      | None ->
-          apply_lazy
-            ~fun_value:(fun _ -> assert false)
-            ~fun_bytes:(fun b ->
-              let (`Hex h) = Hex.of_bytes b in
-              Repr.repr (`O [("unparsed-binary", Repr.repr (`String h))]))
-            ~fun_combine:(fun _ _ -> assert false)
-            le
-    in
-    let read (type value) (module Repr : Json_repr.Repr with type value = value)
-        j =
-      let j = Json_repr.convert (module Repr) (module Json_repr.Ezjsonm) j in
-      make_lazy encoding (Json.destruct encoding j)
-    in
-    repr_agnostic_custom {write; read} ~schema:Json_schema.any
-  in
-  Data_encoding__Encoding.raw_splitted ~json ~binary
+include Encoding
 
 module Json = struct
   include Data_encoding.Json
diff --git a/src/lib_protocol_environment/structs/v5_data_encoding.ml b/src/lib_protocol_environment/structs/v5_data_encoding.ml
index 0dc7fdcb656e..382cc192f019 100644
--- a/src/lib_protocol_environment/structs/v5_data_encoding.ml
+++ b/src/lib_protocol_environment/structs/v5_data_encoding.ml
@@ -57,36 +57,12 @@ module Encoding = struct
       repr_agnostic_custom {write; read} ~schema:Json_schema.any
     in
     Data_encoding__Encoding.raw_splitted ~json ~binary
+
+  let dynamic_size :
+      ?kind:[`Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding =
+   fun ?kind e ->
+    let kind = (kind :> [`N | `Uint30 | `Uint16 | `Uint8] option) in
+    dynamic_size ?kind e
 end
 
-(* We have to define this twice bc in data-encoding<0.7 the type equality for
-   [lazy_t] is not propagated. *)
-let lazy_encoding encoding =
-  let binary = lazy_encoding encoding in
-  let json =
-    let open Json_encoding in
-    let write (type value)
-        (module Repr : Json_repr.Repr with type value = value) le =
-      match force_decode le with
-      | Some r ->
-          Json_repr.convert
-            (module Json_repr.Ezjsonm)
-            (module Repr)
-            (Json.construct encoding r)
-      | None ->
-          apply_lazy
-            ~fun_value:(fun _ -> assert false)
-            ~fun_bytes:(fun b ->
-              let (`Hex h) = Hex.of_bytes b in
-              Repr.repr (`O [("unparsed-binary", Repr.repr (`String h))]))
-            ~fun_combine:(fun _ _ -> assert false)
-            le
-    in
-    let read (type value) (module Repr : Json_repr.Repr with type value = value)
-        j =
-      let j = Json_repr.convert (module Repr) (module Json_repr.Ezjsonm) j in
-      make_lazy encoding (Json.destruct encoding j)
-    in
-    repr_agnostic_custom {write; read} ~schema:Json_schema.any
-  in
-  Data_encoding__Encoding.raw_splitted ~json ~binary
+include Encoding
-- 
GitLab


From 6bd7454b3d57ba4094e93fe4262bc169b3159d4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Proust?= <code@bnwr.net>
Date: Wed, 28 Sep 2022 10:55:22 +0200
Subject: [PATCH 3/3] Proto-env-v8: expose newer data-encoding interface

---
 src/lib_protocol_environment/sigs/v8.ml       | 338 ++++++++++++++++--
 .../sigs/v8/data_encoding.mli                 | 338 ++++++++++++++++--
 .../tezos_protocol_environment_structs.ml     |   7 +-
 .../structs/v8_data_encoding.ml               |  62 ++++
 4 files changed, 704 insertions(+), 41 deletions(-)
 create mode 100644 src/lib_protocol_environment/structs/v8_data_encoding.ml

diff --git a/src/lib_protocol_environment/sigs/v8.ml b/src/lib_protocol_environment/sigs/v8.ml
index 09089087c8d2..7c2669649a3c 100644
--- a/src/lib_protocol_environment/sigs/v8.ml
+++ b/src/lib_protocol_environment/sigs/v8.ml
@@ -3525,6 +3525,8 @@ type 'a t
 
 type 'a encoding = 'a t
 
+type string_json_repr = Hex | Plain
+
 val classify : 'a encoding -> [`Fixed of int | `Dynamic | `Variable]
 
 (** {3 Ground descriptors} *)
@@ -3609,6 +3611,52 @@ val z : Z.t encoding
 (** Positive big number, see [z]. *)
 val n : Z.t encoding
 
+(** [uint_like_n ()] is an encoding for [int] which uses the same representation
+    as {!n}.
+
+    For compatibility with 32-bit machines, this encoding supports the same
+    range of encodings as [int31], but only the positive ones. I.e., it
+    supports the inclusive range [0] to [(1 lsl 30) - 1].
+
+    The optional parameter [?max_value] can be used to further restrict the
+    range of values. If [max_value] is set and is greater than
+    [(1 lsl 30) - 1] then the function raises [Invalid_argument].
+
+    The encoding is partial: attempting to de/serialise values which are
+    outside of the supported range will fail. In addition, in binary, a
+    maximum size for the serialised representation is computed based on the
+    maximum value in the range, and the de/serialisation process fails before
+    attempting any conversion if the size is exceeded.
+
+    @raise Invalid_argument if [max_value < 0] or
+    [max_value > (1 lsl 30) - 1] *)
+val uint_like_n : ?max_value:int -> unit -> int encoding
+
+(** [int_like_z ()] is an encoding for [int] which uses the same representation
+      as {!z}.
+
+      For compatibility with 32-bit machines, this encoding supports the same
+      range of encodings as [int31]. I.e., it supports the inclusive range
+      [-(1 lsl 30)] to [(1 lsl 30) - 1].
+
+      The optional parameters [?min_value] and [?max_value] can be used to
+      further restrict the
+      range of values. If [min_value] is set and less than [-(1 lsl 30)] or if
+      [max_value] is set and is greater than [(1 lsl 30) - 1] then the function
+      raises [Invalid_argument].
+
+      The encoding is partial: attempting to de/serialise values which are
+      outside of the supported range will fail. In addition, in binary, a
+      maximum size for the serialised representation is computed based on the
+      encoding's range, and the de/serialisation process fails before attempting
+      any conversion if the size is exceeded.
+
+      @raise Invalid_argument if [max_value < min_value]
+
+      @raise Invalid_argument if [max_value > (1 lsl 30) - 1]
+
+      @raise Invalid_argument if [min_value < -(1 lsl 30)] *)
+val int_like_z : ?min_value:int -> ?max_value:int -> unit -> int encoding
 (** {4 Other ground type encodings} *)
 
 (** Encoding of a boolean
@@ -3616,13 +3664,26 @@ val n : Z.t encoding
 val bool : bool encoding
 
 (** Encoding of a string
-    - encoded as a byte sequence in binary prefixed by the length
-      of the string
-    - encoded as a string in JSON. *)
+    - In binary, encoded as a byte sequence prefixed by the length
+      of the string. The length is represented as specified by the
+      [length_kind] parameter (default [`Uint30]).
+    - in JSON when [string_json_repr = Plain], encoded as a string
+    - in JSON when [string_json_repr = Hex],  encoded via hex. *)
+val string' :
+  ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+  string_json_repr ->
+  string encoding
+
+(** Encoding of arbitrary bytes. See [string'] *)
+val bytes' :
+  ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+  string_json_repr ->
+  Bytes.t encoding
+
+(** same as [string' Plain] *)
 val string : string encoding
 
-(** Encoding of arbitrary bytes
-    (encoded via hex in JSON and directly as a sequence byte in binary). *)
+(** same as [bytes' Hex] *)
 val bytes : Bytes.t encoding
 
 (** {3 Descriptor combinators} *)
@@ -3641,19 +3702,19 @@ val bytes : Bytes.t encoding
     as [null] in JSON. This includes an encoding of the form [option _],
     [conv _ _ (option _)], [dynamic_size (option _)], etc.
 
-    @raise Invalid_argument if called within the body of a {!mu}. *)
+      @raise Invalid_argument if called within the body of a {!mu}. *)
 val option : 'a encoding -> 'a option encoding
 
 (** Combinator to make a {!result} value
-    represented as a 1-byte tag followed by the data of either type in binary,
+    (represented as a 1-byte tag followed by the data of either type in binary,
     and either unwrapped value in JSON (the caller must ensure that both
-    encodings do not collide). *)
+    encodings do not collide)). *)
 val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding
 
 (** List combinator.
     - encoded as an array in JSON
     - encoded as the concatenation of all the element in binary
-     prefixed its length in bytes
+      prefixed its size in bytes
 
     @param [max_length]
     If [max_length] is passed and the encoding of elements has fixed
@@ -3662,6 +3723,27 @@ val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding
     @raise Invalid_argument if the inner encoding is variable. *)
 val list : ?max_length:int -> 'a encoding -> 'a list encoding
 
+(** List combinator.
+    - encoded as an array in JSON
+    - encoded as the concatenation of its length (number of elements) and all
+      the element in binary
+
+    @param kind ([[`N | `Uint8 | `Uint16 | `Uint30]]) controls the
+    representation of the length: {!uint_like_n}, {!uint8}, {!uint16}, or
+    {!int31} (but only positive values).
+
+
+    @param [max_length]
+    If [max_length] is passed and the encoding of elements has fixed
+    size, a {!check_size} is automatically added for earlier rejection.
+
+    @raise Invalid_argument if the inner encoding is variable. *)
+val list_with_length :
+  ?max_length:int ->
+  [`N | `Uint8 | `Uint16 | `Uint30] ->
+  'a encoding ->
+  'a list encoding
+
 (** Provide a transformer from one encoding to a different one.
 
     Used to simplify nested encodings or to change the generic tuples
@@ -3830,6 +3912,104 @@ val obj10 :
     tuples contains a variable field.. *)
 val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding
 
+(** [With_field_name_duplicate_checks] is a subset of [Encoding] where all the
+    constructed objects are checked for duplicates.
+
+    Note that the analysis can include false positives: it might fail on
+    encodings which will never serialise a value with duplicate fields.
+    Still, these false positives are uncommon and we recommend you use these
+    combinators when relevant.
+
+    {[
+    let e =
+      let open Data_encoding in
+      let open Data_encoding.With_field_name_duplicate_checks in
+      …
+    ]}
+    *)
+module With_field_name_duplicate_checks : sig
+  val obj1 : 'f1 field -> 'f1 encoding
+
+  val obj2 : 'f1 field -> 'f2 field -> ('f1 * 'f2) encoding
+
+  val obj3 : 'f1 field -> 'f2 field -> 'f3 field -> ('f1 * 'f2 * 'f3) encoding
+
+  val obj4 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    ('f1 * 'f2 * 'f3 * 'f4) encoding
+
+  val obj5 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding
+
+  val obj6 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding
+
+  val obj7 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding
+
+  val obj8 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding
+
+  val obj9 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    'f9 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding
+
+  val obj10 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    'f9 field ->
+    'f10 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding
+
+  (** Create a larger object from the encodings of two smaller ones.
+      @raise Invalid_argument if both arguments are not objects  or if both
+      tuples contains a variable field.. *)
+  val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding
+end
+
 (** {4 Constructors for tuples with N fields} *)
 
 (** These are serialized to binary by converting each internal
@@ -3941,7 +4121,7 @@ type case_tag = Tag of int | Json_only
    Note that in general you should use a total function (i.e., one defined
    over the whole of the ['a] type) for the [matching_function]. However, in
    the case where you have a good reason to use a partial function, you should
-   raise {!No_case_matched} in the dead branches. Reasons why you may want to
+   raise [No_case_matched] in the dead branches. Reasons why you may want to
    do so include:
    - ['a] is an open variant and you will complete the matching function
      later, and
@@ -4069,6 +4249,78 @@ val matching :
     can fit in the [tag_size] *)
 val union : ?tag_size:tag_size -> 't case list -> 't encoding
 
+(** [With_JSON_discriminant] is a subset of [Encoding] where the
+    union/matching combinators (and associated functions) add discriminant for
+    the JSON backend.
+
+    The following restrictions apply:
+    - The case encodings must be objects.
+    - The case encoding objects must not include a "kind" field.
+    - The case encoding objects must not have duplicate field names.
+    - The JSON discriminants must all be distinct.
+
+    {[
+    let e =
+      let open Data_encoding in
+      let open Data_encoding.With_JSON_discriminant in
+      …
+    ]} *)
+module With_JSON_discriminant : sig
+  (** [case_tag]'s only variant [Tag] includes both a numeric tag for the binary
+      encoding and a string tag for the JSON encoding. *)
+  type case_tag = Tag of (int * string)
+
+  type 't case
+
+  (** [case] is similar to [Encoding.case] but it takes a
+      [SaferEncoding.case_tag] parameter. This includes both a numeric tag and a
+      string tag.
+
+      In Binary:
+      This has no impact. The [case_tag] argument of [Encoding] already has a
+      numeric tag.
+
+      In JSON:
+      The [SaferEncoding] adds a field for discriminating the different cases,
+      making these encodings less likely to include accidental bugs. More
+      specifically, when you use [case (Tag (_, s)) e _ _] then the underlying
+      union uses an encoding based on [e] and [s]. Specifically, if [e] is an
+      object encoding, then it adds the field [(req "kind" (constant s))] to
+      [e].
+
+      @raise Invalid_argument if [e] is not an object.
+
+      @raise Invalid_argument if [e] is an object with a ["kind"] field (this
+      field name is reserved for the discriminating field added by [case]). *)
+  val case :
+    title:string ->
+    ?description:string ->
+    case_tag ->
+    'a encoding ->
+    ('t -> 'a option) ->
+    ('a -> 't) ->
+    't case
+
+  (** [union] and [matching] now check that there are no duplicate ["kind"]
+      discriminating values. If there is, they raises [Invalid_argument]. *)
+
+  (** Similarly to [case_tag], [matched] also takes an additional [string]
+      parameter. This parameter is used in the same way as [case] (to add a ["kind"] field
+      to the JSON encoding) and it fails in the same way as [case].
+
+      @raise Invalid_argument if the encoding is not an object.
+
+      @raise Invalid_argument if the encoding is an object with a ["kind"]
+      field. *)
+  val matched :
+    ?tag_size:tag_size -> int * string -> 'a encoding -> 'a -> match_result
+
+  val matching :
+    ?tag_size:tag_size -> 't matching_function -> 't case list -> 't encoding
+
+  val union : ?tag_size:tag_size -> 't case list -> 't encoding
+end
+
 (** {3 Specialized descriptors} *)
 
 (** Encode enumeration via association list
@@ -4084,7 +4336,13 @@ module Fixed : sig
   val string : int -> string encoding
 
   (** @raise Invalid_argument if the argument is less or equal to zero. *)
-  val bytes : int -> bytes encoding
+  val string' : string_json_repr -> int -> string encoding
+
+  (** @raise Invalid_argument if the argument is less or equal to zero. *)
+  val bytes : int -> Bytes.t encoding
+
+  (** @raise Invalid_argument if the argument is less or equal to zero. *)
+  val bytes' : string_json_repr -> int -> Bytes.t encoding
 
   (** [add_padding e n] is a padded version of the encoding [e]. In Binary,
       there are [n] null bytes ([\000]) added after the value encoded by [e].
@@ -4166,7 +4424,11 @@ end
 module Variable : sig
   val string : string encoding
 
-  val bytes : bytes encoding
+  val string' : string_json_repr -> string encoding
+
+  val bytes : Bytes.t encoding
+
+  val bytes' : string_json_repr -> Bytes.t encoding
 
   (** @raise Invalid_argument if the encoding argument is variable length
         or may lead to zero-width representation in binary. *)
@@ -4179,16 +4441,40 @@ end
 
 module Bounded : sig
   (** Encoding of a string whose length does not exceed the specified length.
-      The size field uses the smallest integer that can accommodate the
+
+      If [length_kind] is set, then it is used to encode the length of the
+      string in a header. If [length_kind] is omitted then the length field
+      uses the smallest fixed-width integer that can accommodate the
       maximum size - e.g., [`Uint8] for very short strings, [`Uint16] for
       longer strings, etc.
 
       Attempting to construct a string with a length that is too long causes
-      an [Invalid_argument] exception. *)
+      an [Invalid_argument] exception.
+
+      @raise Invalid_argument if [length_kind] is set but it cannot accommodate
+      the specified bound. E.g.,
+      [Bounded.string' ~length_kind:`Uint8 Hex 1000] raises.
+
+      @raise Invalid_argument if [length_kind] is unset and the specified
+      bound is larger than 2^30. *)
+  val string' :
+    ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+    string_json_repr ->
+    int ->
+    string encoding
+
+  (** Same as [string' Plain] *)
   val string : int -> string encoding
 
-  (** See {!string} above. *)
-  val bytes : int -> bytes encoding
+  (** See {!string'} above. *)
+  val bytes' :
+    ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+    string_json_repr ->
+    int ->
+    Bytes.t encoding
+
+  (** Same as [bytes' Hex] *)
+  val bytes : int -> Bytes.t encoding
 end
 
 (** Mark an encoding as being of dynamic size.
@@ -4196,12 +4482,14 @@ end
     Typically used to combine two variable encodings in a same
     objects or tuple, or to use a variable encoding in an array or a list. *)
 val dynamic_size :
-  ?kind:[`Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding
+  ?kind:[`N | `Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding
 
 (** [check_size size encoding] ensures that the binary encoding
     of a value will not be allowed to exceed [size] bytes. The reader
     and the writer fails otherwise. This function do not modify
-    the JSON encoding. *)
+    the JSON encoding.
+
+    @raise Invalid_argument if [size < 0] *)
 val check_size : int -> 'a encoding -> 'a encoding
 
 (** Define different encodings for JSON and binary serialization. *)
@@ -4380,9 +4668,19 @@ module Compact : sig
   (** A compact encoding of the singleton value [unit], which has zero
       memory footprint.
 
-      Uses zero (0) bits of tag. *)
+      Uses zero (0) bits of tag.
+
+      In JSON it is represented as the empty object [{}]. *)
   val unit : unit t
 
+  (** A compact encoding of the singleton value [unit], which has zero
+      memory footprint.
+
+      Uses zero (0) bits of tag.
+
+      In JSON it is represented as [null]. *)
+  val null : unit t
+
   (** Efficient encoding of boolean values. It uses one (1) bit in the
       shared tag, and zero bit in the payload. *)
   val bool : bool t
@@ -4741,7 +5039,7 @@ module Compact : sig
 
   (** [or_int32 ~i32_title ~alt_title ?alt_description c] creates a new
       compact encoding for the disjunction of
-      any type [a] (see {!case}) with [int32]. It uses the same number
+      any type [a] (see {!val-case}) with [int32]. It uses the same number
       of bits as {!int32}, that is 2, and uses the spare tag ([11]) within
       this size for values of type [a].
 
diff --git a/src/lib_protocol_environment/sigs/v8/data_encoding.mli b/src/lib_protocol_environment/sigs/v8/data_encoding.mli
index 081b42aeb829..9cbc2f07c8d6 100644
--- a/src/lib_protocol_environment/sigs/v8/data_encoding.mli
+++ b/src/lib_protocol_environment/sigs/v8/data_encoding.mli
@@ -45,6 +45,8 @@ type 'a t
 
 type 'a encoding = 'a t
 
+type string_json_repr = Hex | Plain
+
 val classify : 'a encoding -> [`Fixed of int | `Dynamic | `Variable]
 
 (** {3 Ground descriptors} *)
@@ -129,6 +131,52 @@ val z : Z.t encoding
 (** Positive big number, see [z]. *)
 val n : Z.t encoding
 
+(** [uint_like_n ()] is an encoding for [int] which uses the same representation
+    as {!n}.
+
+    For compatibility with 32-bit machines, this encoding supports the same
+    range of encodings as [int31], but only the positive ones. I.e., it
+    supports the inclusive range [0] to [(1 lsl 30) - 1].
+
+    The optional parameter [?max_value] can be used to further restrict the
+    range of values. If [max_value] is set and is greater than
+    [(1 lsl 30) - 1] then the function raises [Invalid_argument].
+
+    The encoding is partial: attempting to de/serialise values which are
+    outside of the supported range will fail. In addition, in binary, a
+    maximum size for the serialised representation is computed based on the
+    maximum value in the range, and the de/serialisation process fails before
+    attempting any conversion if the size is exceeded.
+
+    @raise Invalid_argument if [max_value < 0] or
+    [max_value > (1 lsl 30) - 1] *)
+val uint_like_n : ?max_value:int -> unit -> int encoding
+
+(** [int_like_z ()] is an encoding for [int] which uses the same representation
+      as {!z}.
+
+      For compatibility with 32-bit machines, this encoding supports the same
+      range of encodings as [int31]. I.e., it supports the inclusive range
+      [-(1 lsl 30)] to [(1 lsl 30) - 1].
+
+      The optional parameters [?min_value] and [?max_value] can be used to
+      further restrict the
+      range of values. If [min_value] is set and less than [-(1 lsl 30)] or if
+      [max_value] is set and is greater than [(1 lsl 30) - 1] then the function
+      raises [Invalid_argument].
+
+      The encoding is partial: attempting to de/serialise values which are
+      outside of the supported range will fail. In addition, in binary, a
+      maximum size for the serialised representation is computed based on the
+      encoding's range, and the de/serialisation process fails before attempting
+      any conversion if the size is exceeded.
+
+      @raise Invalid_argument if [max_value < min_value]
+
+      @raise Invalid_argument if [max_value > (1 lsl 30) - 1]
+
+      @raise Invalid_argument if [min_value < -(1 lsl 30)] *)
+val int_like_z : ?min_value:int -> ?max_value:int -> unit -> int encoding
 (** {4 Other ground type encodings} *)
 
 (** Encoding of a boolean
@@ -136,13 +184,26 @@ val n : Z.t encoding
 val bool : bool encoding
 
 (** Encoding of a string
-    - encoded as a byte sequence in binary prefixed by the length
-      of the string
-    - encoded as a string in JSON. *)
+    - In binary, encoded as a byte sequence prefixed by the length
+      of the string. The length is represented as specified by the
+      [length_kind] parameter (default [`Uint30]).
+    - in JSON when [string_json_repr = Plain], encoded as a string
+    - in JSON when [string_json_repr = Hex],  encoded via hex. *)
+val string' :
+  ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+  string_json_repr ->
+  string encoding
+
+(** Encoding of arbitrary bytes. See [string'] *)
+val bytes' :
+  ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+  string_json_repr ->
+  Bytes.t encoding
+
+(** same as [string' Plain] *)
 val string : string encoding
 
-(** Encoding of arbitrary bytes
-    (encoded via hex in JSON and directly as a sequence byte in binary). *)
+(** same as [bytes' Hex] *)
 val bytes : Bytes.t encoding
 
 (** {3 Descriptor combinators} *)
@@ -161,19 +222,19 @@ val bytes : Bytes.t encoding
     as [null] in JSON. This includes an encoding of the form [option _],
     [conv _ _ (option _)], [dynamic_size (option _)], etc.
 
-    @raise Invalid_argument if called within the body of a {!mu}. *)
+      @raise Invalid_argument if called within the body of a {!mu}. *)
 val option : 'a encoding -> 'a option encoding
 
 (** Combinator to make a {!result} value
-    represented as a 1-byte tag followed by the data of either type in binary,
+    (represented as a 1-byte tag followed by the data of either type in binary,
     and either unwrapped value in JSON (the caller must ensure that both
-    encodings do not collide). *)
+    encodings do not collide)). *)
 val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding
 
 (** List combinator.
     - encoded as an array in JSON
     - encoded as the concatenation of all the element in binary
-     prefixed its length in bytes
+      prefixed its size in bytes
 
     @param [max_length]
     If [max_length] is passed and the encoding of elements has fixed
@@ -182,6 +243,27 @@ val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding
     @raise Invalid_argument if the inner encoding is variable. *)
 val list : ?max_length:int -> 'a encoding -> 'a list encoding
 
+(** List combinator.
+    - encoded as an array in JSON
+    - encoded as the concatenation of its length (number of elements) and all
+      the element in binary
+
+    @param kind ([[`N | `Uint8 | `Uint16 | `Uint30]]) controls the
+    representation of the length: {!uint_like_n}, {!uint8}, {!uint16}, or
+    {!int31} (but only positive values).
+
+
+    @param [max_length]
+    If [max_length] is passed and the encoding of elements has fixed
+    size, a {!check_size} is automatically added for earlier rejection.
+
+    @raise Invalid_argument if the inner encoding is variable. *)
+val list_with_length :
+  ?max_length:int ->
+  [`N | `Uint8 | `Uint16 | `Uint30] ->
+  'a encoding ->
+  'a list encoding
+
 (** Provide a transformer from one encoding to a different one.
 
     Used to simplify nested encodings or to change the generic tuples
@@ -350,6 +432,104 @@ val obj10 :
     tuples contains a variable field.. *)
 val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding
 
+(** [With_field_name_duplicate_checks] is a subset of [Encoding] where all the
+    constructed objects are checked for duplicates.
+
+    Note that the analysis can include false positives: it might fail on
+    encodings which will never serialise a value with duplicate fields.
+    Still, these false positives are uncommon and we recommend you use these
+    combinators when relevant.
+
+    {[
+    let e =
+      let open Data_encoding in
+      let open Data_encoding.With_field_name_duplicate_checks in
+      …
+    ]}
+    *)
+module With_field_name_duplicate_checks : sig
+  val obj1 : 'f1 field -> 'f1 encoding
+
+  val obj2 : 'f1 field -> 'f2 field -> ('f1 * 'f2) encoding
+
+  val obj3 : 'f1 field -> 'f2 field -> 'f3 field -> ('f1 * 'f2 * 'f3) encoding
+
+  val obj4 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    ('f1 * 'f2 * 'f3 * 'f4) encoding
+
+  val obj5 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding
+
+  val obj6 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding
+
+  val obj7 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding
+
+  val obj8 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding
+
+  val obj9 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    'f9 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding
+
+  val obj10 :
+    'f1 field ->
+    'f2 field ->
+    'f3 field ->
+    'f4 field ->
+    'f5 field ->
+    'f6 field ->
+    'f7 field ->
+    'f8 field ->
+    'f9 field ->
+    'f10 field ->
+    ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding
+
+  (** Create a larger object from the encodings of two smaller ones.
+      @raise Invalid_argument if both arguments are not objects  or if both
+      tuples contains a variable field.. *)
+  val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding
+end
+
 (** {4 Constructors for tuples with N fields} *)
 
 (** These are serialized to binary by converting each internal
@@ -461,7 +641,7 @@ type case_tag = Tag of int | Json_only
    Note that in general you should use a total function (i.e., one defined
    over the whole of the ['a] type) for the [matching_function]. However, in
    the case where you have a good reason to use a partial function, you should
-   raise {!No_case_matched} in the dead branches. Reasons why you may want to
+   raise [No_case_matched] in the dead branches. Reasons why you may want to
    do so include:
    - ['a] is an open variant and you will complete the matching function
      later, and
@@ -589,6 +769,78 @@ val matching :
     can fit in the [tag_size] *)
 val union : ?tag_size:tag_size -> 't case list -> 't encoding
 
+(** [With_JSON_discriminant] is a subset of [Encoding] where the
+    union/matching combinators (and associated functions) add discriminant for
+    the JSON backend.
+
+    The following restrictions apply:
+    - The case encodings must be objects.
+    - The case encoding objects must not include a "kind" field.
+    - The case encoding objects must not have duplicate field names.
+    - The JSON discriminants must all be distinct.
+
+    {[
+    let e =
+      let open Data_encoding in
+      let open Data_encoding.With_JSON_discriminant in
+      …
+    ]} *)
+module With_JSON_discriminant : sig
+  (** [case_tag]'s only variant [Tag] includes both a numeric tag for the binary
+      encoding and a string tag for the JSON encoding. *)
+  type case_tag = Tag of (int * string)
+
+  type 't case
+
+  (** [case] is similar to [Encoding.case] but it takes a
+      [SaferEncoding.case_tag] parameter. This includes both a numeric tag and a
+      string tag.
+
+      In Binary:
+      This has no impact. The [case_tag] argument of [Encoding] already has a
+      numeric tag.
+
+      In JSON:
+      The [SaferEncoding] adds a field for discriminating the different cases,
+      making these encodings less likely to include accidental bugs. More
+      specifically, when you use [case (Tag (_, s)) e _ _] then the underlying
+      union uses an encoding based on [e] and [s]. Specifically, if [e] is an
+      object encoding, then it adds the field [(req "kind" (constant s))] to
+      [e].
+
+      @raise Invalid_argument if [e] is not an object.
+
+      @raise Invalid_argument if [e] is an object with a ["kind"] field (this
+      field name is reserved for the discriminating field added by [case]). *)
+  val case :
+    title:string ->
+    ?description:string ->
+    case_tag ->
+    'a encoding ->
+    ('t -> 'a option) ->
+    ('a -> 't) ->
+    't case
+
+  (** [union] and [matching] now check that there are no duplicate ["kind"]
+      discriminating values. If there is, they raises [Invalid_argument]. *)
+
+  (** Similarly to [case_tag], [matched] also takes an additional [string]
+      parameter. This parameter is used in the same way as [case] (to add a ["kind"] field
+      to the JSON encoding) and it fails in the same way as [case].
+
+      @raise Invalid_argument if the encoding is not an object.
+
+      @raise Invalid_argument if the encoding is an object with a ["kind"]
+      field. *)
+  val matched :
+    ?tag_size:tag_size -> int * string -> 'a encoding -> 'a -> match_result
+
+  val matching :
+    ?tag_size:tag_size -> 't matching_function -> 't case list -> 't encoding
+
+  val union : ?tag_size:tag_size -> 't case list -> 't encoding
+end
+
 (** {3 Specialized descriptors} *)
 
 (** Encode enumeration via association list
@@ -604,7 +856,13 @@ module Fixed : sig
   val string : int -> string encoding
 
   (** @raise Invalid_argument if the argument is less or equal to zero. *)
-  val bytes : int -> bytes encoding
+  val string' : string_json_repr -> int -> string encoding
+
+  (** @raise Invalid_argument if the argument is less or equal to zero. *)
+  val bytes : int -> Bytes.t encoding
+
+  (** @raise Invalid_argument if the argument is less or equal to zero. *)
+  val bytes' : string_json_repr -> int -> Bytes.t encoding
 
   (** [add_padding e n] is a padded version of the encoding [e]. In Binary,
       there are [n] null bytes ([\000]) added after the value encoded by [e].
@@ -686,7 +944,11 @@ end
 module Variable : sig
   val string : string encoding
 
-  val bytes : bytes encoding
+  val string' : string_json_repr -> string encoding
+
+  val bytes : Bytes.t encoding
+
+  val bytes' : string_json_repr -> Bytes.t encoding
 
   (** @raise Invalid_argument if the encoding argument is variable length
         or may lead to zero-width representation in binary. *)
@@ -699,16 +961,40 @@ end
 
 module Bounded : sig
   (** Encoding of a string whose length does not exceed the specified length.
-      The size field uses the smallest integer that can accommodate the
+
+      If [length_kind] is set, then it is used to encode the length of the
+      string in a header. If [length_kind] is omitted then the length field
+      uses the smallest fixed-width integer that can accommodate the
       maximum size - e.g., [`Uint8] for very short strings, [`Uint16] for
       longer strings, etc.
 
       Attempting to construct a string with a length that is too long causes
-      an [Invalid_argument] exception. *)
+      an [Invalid_argument] exception.
+
+      @raise Invalid_argument if [length_kind] is set but it cannot accommodate
+      the specified bound. E.g.,
+      [Bounded.string' ~length_kind:`Uint8 Hex 1000] raises.
+
+      @raise Invalid_argument if [length_kind] is unset and the specified
+      bound is larger than 2^30. *)
+  val string' :
+    ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+    string_json_repr ->
+    int ->
+    string encoding
+
+  (** Same as [string' Plain] *)
   val string : int -> string encoding
 
-  (** See {!string} above. *)
-  val bytes : int -> bytes encoding
+  (** See {!string'} above. *)
+  val bytes' :
+    ?length_kind:[`N | `Uint30 | `Uint16 | `Uint8] ->
+    string_json_repr ->
+    int ->
+    Bytes.t encoding
+
+  (** Same as [bytes' Hex] *)
+  val bytes : int -> Bytes.t encoding
 end
 
 (** Mark an encoding as being of dynamic size.
@@ -716,12 +1002,14 @@ end
     Typically used to combine two variable encodings in a same
     objects or tuple, or to use a variable encoding in an array or a list. *)
 val dynamic_size :
-  ?kind:[`Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding
+  ?kind:[`N | `Uint30 | `Uint16 | `Uint8] -> 'a encoding -> 'a encoding
 
 (** [check_size size encoding] ensures that the binary encoding
     of a value will not be allowed to exceed [size] bytes. The reader
     and the writer fails otherwise. This function do not modify
-    the JSON encoding. *)
+    the JSON encoding.
+
+    @raise Invalid_argument if [size < 0] *)
 val check_size : int -> 'a encoding -> 'a encoding
 
 (** Define different encodings for JSON and binary serialization. *)
@@ -900,9 +1188,19 @@ module Compact : sig
   (** A compact encoding of the singleton value [unit], which has zero
       memory footprint.
 
-      Uses zero (0) bits of tag. *)
+      Uses zero (0) bits of tag.
+
+      In JSON it is represented as the empty object [{}]. *)
   val unit : unit t
 
+  (** A compact encoding of the singleton value [unit], which has zero
+      memory footprint.
+
+      Uses zero (0) bits of tag.
+
+      In JSON it is represented as [null]. *)
+  val null : unit t
+
   (** Efficient encoding of boolean values. It uses one (1) bit in the
       shared tag, and zero bit in the payload. *)
   val bool : bool t
@@ -1261,7 +1559,7 @@ module Compact : sig
 
   (** [or_int32 ~i32_title ~alt_title ?alt_description c] creates a new
       compact encoding for the disjunction of
-      any type [a] (see {!case}) with [int32]. It uses the same number
+      any type [a] (see {!val-case}) with [int32]. It uses the same number
       of bits as {!int32}, that is 2, and uses the spare tag ([11]) within
       this size for values of type [a].
 
diff --git a/src/lib_protocol_environment/structs/tezos_protocol_environment_structs.ml b/src/lib_protocol_environment/structs/tezos_protocol_environment_structs.ml
index b3fb8569fbc5..dee2a27d0959 100644
--- a/src/lib_protocol_environment/structs/tezos_protocol_environment_structs.ml
+++ b/src/lib_protocol_environment/structs/tezos_protocol_environment_structs.ml
@@ -112,4 +112,9 @@ module V7 = struct
   module Plonk = V7_plonk
 end
 
-module V8 = V7
+module V8 = struct
+  module Data_encoding = V8_data_encoding
+  module Error_monad_infix_globals = V0_error_monad_infix_globals
+  module Array = V7_array
+  module Plonk = V7_plonk
+end
diff --git a/src/lib_protocol_environment/structs/v8_data_encoding.ml b/src/lib_protocol_environment/structs/v8_data_encoding.ml
new file mode 100644
index 000000000000..b41733a0b9a1
--- /dev/null
+++ b/src/lib_protocol_environment/structs/v8_data_encoding.ml
@@ -0,0 +1,62 @@
+(*****************************************************************************)
+(*                                                                           *)
+(* Open Source License                                                       *)
+(* Copyright (c) 2022 Nomadic Labs. <contact@nomadic-labs.com>               *)
+(*                                                                           *)
+(* Permission is hereby granted, free of charge, to any person obtaining a   *)
+(* copy of this software and associated documentation files (the "Software"),*)
+(* to deal in the Software without restriction, including without limitation *)
+(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
+(* and/or sell copies of the Software, and to permit persons to whom the     *)
+(* Software is furnished to do so, subject to the following conditions:      *)
+(*                                                                           *)
+(* The above copyright notice and this permission notice shall be included   *)
+(* in all copies or substantial portions of the Software.                    *)
+(*                                                                           *)
+(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
+(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
+(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
+(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
+(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
+(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
+(* DEALINGS IN THE SOFTWARE.                                                 *)
+(*                                                                           *)
+(*****************************************************************************)
+
+include Data_encoding
+
+module Encoding = struct
+  include Encoding
+
+  let lazy_encoding encoding =
+    let binary = lazy_encoding encoding in
+    let json =
+      let open Json_encoding in
+      let write (type value)
+          (module Repr : Json_repr.Repr with type value = value) le =
+        match force_decode le with
+        | Some r ->
+            Json_repr.convert
+              (module Json_repr.Ezjsonm)
+              (module Repr)
+              (Json.construct encoding r)
+        | None ->
+            apply_lazy
+              ~fun_value:(fun _ -> assert false)
+              ~fun_bytes:(fun b ->
+                let (`Hex h) = Hex.of_bytes b in
+                Repr.repr (`O [("unparsed-binary", Repr.repr (`String h))]))
+              ~fun_combine:(fun _ _ -> assert false)
+              le
+      in
+      let read (type value)
+          (module Repr : Json_repr.Repr with type value = value) j =
+        let j = Json_repr.convert (module Repr) (module Json_repr.Ezjsonm) j in
+        make_lazy encoding (Json.destruct encoding j)
+      in
+      repr_agnostic_custom {write; read} ~schema:Json_schema.any
+    in
+    Data_encoding__Encoding.raw_splitted ~json ~binary
+end
+
+include Encoding
-- 
GitLab