From 6490801ce18822db59e219bcc7ef54fa4383196c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Thu, 3 Oct 2024 14:37:37 +0200 Subject: [PATCH] Update dependencies, refactor - Tar.3.0.0 was released! So we can drop the pin and depend on it directly. - Use digestif in favor of the bygone mirage-crypto hash - Move most of the hashing logic into archive_checksum.ml - When checking the checksum we should check the checksum chosen, too. I believe this was a bug that was hidden due to opam packages always using one of md5, sha256 or sha512 for checksums. --- mirage/archive_checksum.ml | 127 +++++++++++++++++++++++++++ mirage/config.ml | 7 +- mirage/unikernel.ml | 171 ++++++------------------------------- 3 files changed, 159 insertions(+), 146 deletions(-) create mode 100644 mirage/archive_checksum.ml diff --git a/mirage/archive_checksum.ml b/mirage/archive_checksum.ml new file mode 100644 index 0000000..89d5514 --- /dev/null +++ b/mirage/archive_checksum.ml @@ -0,0 +1,127 @@ + + +module Hash = struct + type t = [ `MD5 | `SHA1 | `SHA224 | `SHA256 | `SHA384 | `SHA512 ] + + (* Make the compiler check that (t :> Digestif.hash') *) + let _ = fun (h :t) -> (h :> Digestif.hash') + + let compare h h' = + match h, h' with + | `SHA512, `SHA512 -> 0 + | `SHA512, _ -> 1 + | _, `SHA512 -> -1 + | `SHA384, `SHA384 -> 0 + | `SHA384, _ -> 1 + | _, `SHA384 -> -1 + | `SHA256, `SHA256 -> 0 + | `SHA256, _ -> 1 + | _, `SHA256 -> -1 + | `SHA224, `SHA224 -> 0 + | `SHA224, _ -> 1 + | _, `SHA224 -> -1 + | `SHA1, `SHA1 -> 0 + | `SHA1, `MD5 -> 1 + | `MD5, `MD5 -> 0 + | `MD5, _ -> -1 + + let to_string = function + | `MD5 -> "md5" + | `SHA1 -> "sha1" + | `SHA224 -> "sha224" + | `SHA256 -> "sha256" + | `SHA384 -> "sha384" + | `SHA512 -> "sha512" + + let of_string = function + | "md5" -> Ok `MD5 + | "sha256" -> Ok `SHA256 + | "sha512" -> Ok `SHA512 + | h -> Error (`Msg ("unknown hash algorithm: " ^ h)) +end + +module HM = Map.Make(Hash) + +module Running_hash = struct + type _ t = + | MD5 : Digestif.MD5.ctx -> [> `MD5 ] t + | SHA1 : Digestif.SHA1.ctx -> [> `SHA1 ] t + | SHA224 : Digestif.SHA224.ctx -> [> `SHA224 ] t + | SHA256 : Digestif.SHA256.ctx -> [> `SHA256 ] t + | SHA384 : Digestif.SHA384.ctx -> [> `SHA384 ] t + | SHA512 : Digestif.SHA512.ctx -> [> `SHA512 ] t + + let empty : _ -> _ t = function + | `MD5 -> MD5 Digestif.MD5.empty + | `SHA1 -> SHA1 Digestif.SHA1.empty + | `SHA224 -> SHA224 Digestif.SHA224.empty + | `SHA256 -> SHA256 Digestif.SHA256.empty + | `SHA384 -> SHA384 Digestif.SHA384.empty + | `SHA512 -> SHA512 Digestif.SHA512.empty + + let feed_string t data = + match t with + | MD5 t -> MD5 (Digestif.MD5.feed_string t data) + | SHA1 t -> SHA1 (Digestif.SHA1.feed_string t data) + | SHA224 t -> SHA224 (Digestif.SHA224.feed_string t data) + | SHA256 t -> SHA256 (Digestif.SHA256.feed_string t data) + | SHA384 t -> SHA384 (Digestif.SHA384.feed_string t data) + | SHA512 t -> SHA512 (Digestif.SHA512.feed_string t data) + + let get t = + match t with + | MD5 t -> Digestif.MD5.(to_raw_string (get t)) + | SHA1 t -> Digestif.SHA1.(to_raw_string (get t)) + | SHA224 t -> Digestif.SHA224.(to_raw_string (get t)) + | SHA256 t -> Digestif.SHA256.(to_raw_string (get t)) + | SHA384 t -> Digestif.SHA384.(to_raw_string (get t)) + | SHA512 t -> Digestif.SHA512.(to_raw_string (get t)) + + let hash_alg t = + match t with + | MD5 _ -> `MD5 + | SHA1 _ -> `SHA1 + | SHA224 _ -> `SHA224 + | SHA256 _ -> `SHA256 + | SHA384 _ -> `SHA384 + | SHA512 _ -> `SHA512 +end + +type 'a digests = { + md5 : Digestif.MD5.ctx; + sha256 : Digestif.SHA256.ctx; + sha512 : Digestif.SHA512.ctx; + csum : 'a Running_hash.t; +} + +let empty_digests h = + let csum = Running_hash.empty h in + { + md5 = Digestif.MD5.empty; + sha256 = Digestif.SHA256.empty; + sha512 = Digestif.SHA512.empty; + csum; + } + +let update_digests { md5; sha256; sha512; csum } data = + { + md5 = Digestif.MD5.feed_string md5 data; + sha256 = Digestif.SHA256.feed_string sha256 data; + sha512 = Digestif.SHA512.feed_string sha512 data; + csum = Running_hash.feed_string csum data; + } + +let init_write csums = + let hash, csum = HM.max_binding csums in + (hash, csum), Ok (empty_digests hash, `Init) + +let digests_to_hm digests = + HM.empty + |> HM.add `MD5 + Digestif.MD5.(to_raw_string (get digests.md5)) + |> HM.add `SHA256 + Digestif.SHA256.(to_raw_string (get digests.sha256)) + |> HM.add `SHA512 + Digestif.SHA512.(to_raw_string (get digests.sha512)) + |> HM.add (Running_hash.hash_alg digests.csum) + (Running_hash.get digests.csum) diff --git a/mirage/config.ml b/mirage/config.ml index 8bc59c4..2fa18f5 100644 --- a/mirage/config.ml +++ b/mirage/config.ml @@ -33,15 +33,16 @@ let mirror = ~packages:[ package ~min:"0.3.0" ~sublibs:[ "mirage" ] "paf" ; package "h2" ; - package "hex" ; + package "ohex" ; package "httpaf" ; package ~max:"0.0.5" "git-kv" ; package ~min:"3.10.0" "git-paf" ; package "opam-file-format" ; - package ~min:"2.2.0" ~sublibs:[ "gz" ] "tar" ~pin:"git+https://github.com/mirage/ocaml-tar.git#4215ff02d87486ade54e1a3ede43cce476f791cf"; - package ~min:"2.2.0" "tar-mirage" ~pin:"git+https://github.com/mirage/ocaml-tar.git#4215ff02d87486ade54e1a3ede43cce476f791cf" ; + package ~min:"3.0.0" ~sublibs:[ "gz" ] "tar" ; + package ~min:"3.0.0" "tar-mirage" ; package ~max:"0.2.0" "mirage-block-partition" ; package "oneffs" ; + package "digestif" ; ] (block @-> time @-> pclock @-> stackv4v6 @-> git_client @-> alpn_client @-> job) diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 1a76120..1da473b 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -102,58 +102,24 @@ module Make module SM = Map.Make(String) module SSet = Set.Make(String) - let compare_hash h h' = - match h, h' with - | `SHA512, `SHA512 -> 0 - | `SHA512, _ -> 1 - | _, `SHA512 -> -1 - | `SHA384, `SHA384 -> 0 - | `SHA384, _ -> 1 - | _, `SHA384 -> -1 - | `SHA256, `SHA256 -> 0 - | `SHA256, _ -> 1 - | _, `SHA256 -> -1 - | `SHA224, `SHA224 -> 0 - | `SHA224, _ -> 1 - | _, `SHA224 -> -1 - | `SHA1, `SHA1 -> 0 - | `SHA1, `MD5 -> 1 - | `MD5, `MD5 -> 0 - | `MD5, _ -> -1 + let compare_hash = Archive_checksum.Hash.compare - module HM = Map.Make(struct - type t = Mirage_crypto.Hash.hash - let compare = compare_hash - end) + module HM = Archive_checksum.HM - let hash_to_string = function - | `MD5 -> "md5" - | `SHA1 -> "sha1" - | `SHA224 -> "sha224" - | `SHA256 -> "sha256" - | `SHA384 -> "sha384" - | `SHA512 -> "sha512" + let hash_to_string = Archive_checksum.Hash.to_string - let hash_of_string = function - | "md5" -> Ok `MD5 - | "sha256" -> Ok `SHA256 - | "sha512" -> Ok `SHA512 - | h -> Error (`Msg ("unknown hash algorithm: " ^ h)) + let hash_of_string = Archive_checksum.Hash.of_string - let hex_to_string h = - let `Hex h = Hex.of_string h in - h - - let hex_to_key h = Mirage_kv.Key.v (hex_to_string h) + let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h) let hex_of_string s = - match Hex.to_string (`Hex s) with + match Ohex.decode s with | d -> Ok d | exception Invalid_argument err -> Error (`Msg err) let hm_to_s hm = HM.fold (fun h v acc -> - hash_to_string h ^ "=" ^ hex_to_string v ^ "\n" ^ acc) + hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) hm "" module Git = struct @@ -258,7 +224,7 @@ module Make | Some v' when String.equal v v' -> None | Some v' -> Logs.warn (fun m -> m "for %s, hash %s, multiple keys are present: %s %s" - (Option.value ~default:"NONE" url) (hash_to_string h) (hex_to_string v) (hex_to_string v')); + (Option.value ~default:"NONE" url) (hash_to_string h) (Ohex.encode v) (Ohex.encode v')); None) acc end @@ -331,9 +297,6 @@ module Make let empty dev dev_md5s dev_sha512s = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s } - let to_hex d = - let d = Cstruct.to_string d in - hex_to_string d let marshal_sm (sm : string SM.t) = let version = char_of_int 1 in @@ -427,74 +390,6 @@ module Make end *) - module Running_hash = struct - type _ t = - | MD5 : Mirage_crypto.Hash.MD5.t -> [> `MD5 ] t - | SHA1 : Mirage_crypto.Hash.SHA1.t -> [> `SHA1 ] t - | SHA224 : Mirage_crypto.Hash.SHA224.t -> [> `SHA224 ] t - | SHA256 : Mirage_crypto.Hash.SHA256.t -> [> `SHA256 ] t - | SHA384 : Mirage_crypto.Hash.SHA384.t -> [> `SHA384 ] t - | SHA512 : Mirage_crypto.Hash.SHA512.t -> [> `SHA512 ] t - - let empty : _ -> _ t = function - | `MD5 -> MD5 Mirage_crypto.Hash.MD5.empty - | `SHA1 -> SHA1 Mirage_crypto.Hash.SHA1.empty - | `SHA224 -> SHA224 Mirage_crypto.Hash.SHA224.empty - | `SHA256 -> SHA256 Mirage_crypto.Hash.SHA256.empty - | `SHA384 -> SHA384 Mirage_crypto.Hash.SHA384.empty - | `SHA512 -> SHA512 Mirage_crypto.Hash.SHA512.empty - - let feed t data = - let open Mirage_crypto.Hash in - match t with - | MD5 t -> MD5 (MD5.feed t data) - | SHA1 t -> SHA1 (SHA1.feed t data) - | SHA224 t -> SHA224 (SHA224.feed t data) - | SHA256 t -> SHA256 (SHA256.feed t data) - | SHA384 t -> SHA384 (SHA384.feed t data) - | SHA512 t -> SHA512 (SHA512.feed t data) - - let get t = - let open Mirage_crypto.Hash in - match t with - | MD5 t -> MD5.get t - | SHA1 t -> SHA1.get t - | SHA224 t -> SHA224.get t - | SHA256 t -> SHA256.get t - | SHA384 t -> SHA384.get t - | SHA512 t -> SHA512.get t - end - - type 'a digests = { - md5 : Mirage_crypto.Hash.MD5.t; - sha256 : Mirage_crypto.Hash.SHA256.t; - sha512 : Mirage_crypto.Hash.SHA512.t; - csum : 'a Running_hash.t; - } - - let empty_digests h = - let open Mirage_crypto.Hash in - { - md5 = MD5.empty; - sha256 = SHA256.empty; - sha512 = SHA512.empty; - csum = Running_hash.empty h; - } - - let update_digests { md5; sha256; sha512; csum } data = - let open Mirage_crypto.Hash in - let data = Cstruct.of_string data in - { - md5 = MD5.feed md5 data; - sha256 = SHA256.feed sha256 data; - sha512 = SHA512.feed sha512 data; - csum = Running_hash.feed csum data; - } - - let init_write csums = - let hash, csum = HM.max_binding csums in - (hash, csum), Ok (empty_digests hash, `Init) - let content_length_of_string s = match Int64.of_string s with | len when len >= 0L -> `Fixed len @@ -524,7 +419,7 @@ module Make (* We can't use hex because the filename would become too long for tar *) Mirage_kv.Key.(pending / hash_to_string hash / Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum) | _ -> - Mirage_kv.Key.(pending / hash_to_string hash / hex_to_string csum) + Mirage_kv.Key.(pending / hash_to_string hash / Ohex.encode csum) let to_delete_key (hash, csum) = let rand = "random" in (* FIXME: generate random string *) @@ -534,7 +429,7 @@ module Make (* We can't use hex because the filename would become too long for tar *) Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum | _ -> - hex_to_string csum + Ohex.encode csum in Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand)) @@ -544,7 +439,7 @@ module Make let ( >>>= ) = Lwt_result.bind in fun response r data -> Lwt.return r >>>= fun (digests, acc) -> - let digests = update_digests digests data in + let digests = Archive_checksum.update_digests digests data in match acc with | `Init -> begin match body_length response with @@ -570,17 +465,8 @@ module Make | `Unknown body -> Lwt.return_ok (digests, `Unknown (body ^ data)) - let digests_to_hm digests = - HM.empty - |> HM.add `MD5 - (Cstruct.to_string (Mirage_crypto.Hash.MD5.get digests.md5)) - |> HM.add `SHA256 - (Cstruct.to_string (Mirage_crypto.Hash.SHA256.get digests.sha256)) - |> HM.add `SHA512 - (Cstruct.to_string (Mirage_crypto.Hash.SHA512.get digests.sha512)) - let check_csums_digests csums digests = - let csums' = digests_to_hm digests in + let csums' = Archive_checksum.digests_to_hm digests in let common_bindings = List.filter (fun (h, _) -> HM.mem h csums) (HM.bindings csums') in List.length common_bindings > 0 && List.for_all @@ -596,9 +482,9 @@ module Make in let source = pending_key (hash, csum) in if check_csums_digests csums digests && sizes_match then - let sha256 = to_hex (Mirage_crypto.Hash.SHA256.get digests.sha256) - and md5 = to_hex (Mirage_crypto.Hash.MD5.get digests.md5) - and sha512 = to_hex (Mirage_crypto.Hash.SHA512.get digests.sha512) in + let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256)) + and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5)) + and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in let dest = Mirage_kv.Key.v sha256 in begin match body with | `Unknown body -> @@ -617,7 +503,7 @@ module Make else begin (if sizes_match then Logs.err (fun m -> m "Bad checksum %s: computed %s expected %s" url - (hash_to_string hash) (hex_to_string csum)) + (hash_to_string hash) (Ohex.encode csum)) else match body with | `Fixed_body (reported, actual) -> Logs.err (fun m -> m "Size mismatch %s: received %a bytes expected %Lu bytes" @@ -681,15 +567,15 @@ module Make Logs.warn (fun m -> m "unexpected dictionary at %a" Mirage_kv.Key.pp path); Lwt.return_unit | `Value -> - let open Mirage_crypto.Hash in + let open Digestif in let sha256_final = if verify_sha256 then let f s = - let digest = SHA256.get s in - if not (String.equal (Mirage_kv.Key.basename path) (to_hex digest)) then + let digest = SHA256.(to_raw_string (get s)) in + if not (String.equal (Mirage_kv.Key.basename path) (Ohex.encode digest)) then Logs.err (fun m -> m "corrupt SHA256 data for %a, \ computed %s (should remove)" - Mirage_kv.Key.pp path (to_hex digest)) + Mirage_kv.Key.pp path (Ohex.encode digest)) in Some f else @@ -697,8 +583,8 @@ module Make and md5_final = if not (SSet.mem (Mirage_kv.Key.basename path) md5s) then let f s = - let digest = MD5.get s in - t.md5s <- SM.add (to_hex digest) (Mirage_kv.Key.basename path) t.md5s + let digest = MD5.(to_raw_string (get s)) in + t.md5s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.md5s in Some f else @@ -706,8 +592,8 @@ module Make and sha512_final = if not (SSet.mem (Mirage_kv.Key.basename path) sha512s) then let f s = - let digest = SHA512.get s in - t.sha512s <- SM.add (to_hex digest) (Mirage_kv.Key.basename path) t.sha512s + let digest = SHA512.(to_raw_string (get s)) in + t.sha512s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.sha512s in Some f else @@ -718,11 +604,10 @@ module Make | _ -> read_chunked t `SHA256 path (fun (sha256, md5, sha512) data -> - let cs = Cstruct.of_string data in Lwt.return - (Option.map (fun t -> SHA256.feed t cs) sha256, - Option.map (fun t -> MD5.feed t cs) md5, - Option.map (fun t -> SHA512.feed t cs) sha512)) + (Option.map (fun t -> SHA256.feed_string t data) sha256, + Option.map (fun t -> MD5.feed_string t data) md5, + Option.map (fun t -> SHA512.feed_string t data) sha512)) (Option.map (fun _ -> SHA256.empty) sha256_final, Option.map (fun _ -> MD5.empty) md5_final, Option.map (fun _ -> SHA512.empty) sha512_final) >|= function @@ -1083,7 +968,7 @@ stamp: %S incr idx; if !idx mod 10 = 0 then Gc.full_major () ; Logs.info (fun m -> m "downloading %s" url); - let quux, body_init = Disk.init_write csums in + let quux, body_init = Archive_checksum.init_write csums in Http_mirage_client.request http_client url (Disk.write_partial disk quux) body_init >>= function | Ok (resp, r) -> begin match r with