This commit is contained in:
Robur 2023-05-02 12:03:25 +00:00
parent ebeadf69d8
commit fd8ce3be03

View file

@ -18,26 +18,28 @@ module Make
module SM = Map.Make(String) module SM = Map.Make(String)
module SSet = Set.Make(String) module SSet = Set.Make(String)
let compare_hash h h' =
match h, h' with
| `SHA512, `SHA512 -> 0
| `SHA512, _ -> 1
| _, `SHA512 -> -1
| `SHA384, `SHA384 -> 0
| `SHA384, _ -> 1
| _, `SHA384 -> -1
| `SHA256, `SHA256 -> 0
| `SHA256, _ -> 1
| _, `SHA256 -> -1
| `SHA224, `SHA224 -> 0
| `SHA224, _ -> 1
| _, `SHA224 -> -1
| `SHA1, `SHA1 -> 0
| `SHA1, `MD5 -> 1
| `MD5, `MD5 -> 0
| `MD5, _ -> -1
module HM = Map.Make(struct module HM = Map.Make(struct
type t = Mirage_crypto.Hash.hash type t = Mirage_crypto.Hash.hash
let compare h h' = let compare = compare_hash
match h, h' with
| `SHA512, `SHA512 -> 0
| `SHA512, _ -> 1
| _, `SHA512 -> -1
| `SHA384, `SHA384 -> 0
| `SHA384, _ -> 1
| _, `SHA384 -> -1
| `SHA256, `SHA256 -> 0
| `SHA256, _ -> 1
| _, `SHA256 -> -1
| `SHA224, `SHA224 -> 0
| `SHA224, _ -> 1
| _, `SHA224 -> -1
| `SHA1, `SHA1 -> 0
| `SHA1, `MD5 -> 1
| `MD5, `MD5 -> 0
| `MD5, _ -> -1
end) end)
let hash_to_string = function let hash_to_string = function
@ -312,6 +314,35 @@ module Make
in in
read_more a Optint.Int63.zero read_more a Optint.Int63.zero
(*
module HM_running = struct
let empty h =
let module H = (val Mirage_crypto.Hash.module_of h) in
(* We need MD5, SHA256 and SHA512. [h] is likely one of the
aforementioned and in that case we don't compute the same hash twice
*)
HM.empty
|> HM.add `MD5 Mirage_crypto.Hash.MD5.empty
|> HM.add `SHA256 Mirage_crypto.Hash.SHA256.empty
|> HM.add `SHA512 Mirage_crypto.Hash.SHA512.empty
|> HM.add h H.empty
let feed t data =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.feed v data)
t
let get =
HM.map (fun h v ->
let module H = (val Mirage_crypto.Hash.module_of h) in
H.get v)
end
*)
module Running_hash = struct module Running_hash = struct
type _ t = type _ t =
| MD5 : Mirage_crypto.Hash.MD5.t -> [> `MD5 ] t | MD5 : Mirage_crypto.Hash.MD5.t -> [> `MD5 ] t
@ -378,7 +409,7 @@ module Make
let init_write csums = let init_write csums =
let hash, csum = HM.max_binding csums in let hash, csum = HM.max_binding csums in
(hash, csum), Ok (empty_digests hash, Optint.Int63.zero, "") (hash, csum), Ok (empty_digests hash, `Init)
let content_length_of_string s = let content_length_of_string s =
match Int64.of_string s with match Int64.of_string s with
@ -423,29 +454,37 @@ module Make
in in
Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand)) Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand))
let write_partial t (hash, csum) : _ -> (_ * Optint.Int63.t * string, _) result -> string -> _ result Lwt.t = let write_partial t (hash, csum) =
(* XXX: we may be in trouble if different hash functions are used for the same archive *) (* XXX: we may be in trouble if different hash functions are used for the same archive *)
let key = pending_key (hash, csum) in let key = pending_key (hash, csum) in
let ( >>>= ) = Lwt_result.bind in let ( >>>= ) = Lwt_result.bind in
fun response r data -> fun response r data ->
Lwt.return r >>>= fun (digests, offset, body) -> Lwt.return r >>>= fun (digests, acc) ->
let len = String.length data in let digests = update_digests digests data in
match body_length response with match acc with
| `Bad_response -> Lwt.return (Error `Bad_response) | `Init ->
| `Fixed size -> begin match body_length response with
begin if Optint.Int63.equal offset Optint.Int63.zero then | `Bad_response -> Lwt.return (Error `Bad_response)
KV.allocate t.dev key (Optint.Int63.of_int64 size) | `Fixed size ->
|> Lwt_result.map_error (fun e -> `Write_error e) KV.allocate t.dev key (Optint.Int63.of_int64 size)
else |> Lwt_result.map_error (fun e -> `Write_error e)
Lwt.return (Ok ()) >>>= fun () ->
end >>>= fun () -> KV.set_partial t.dev key ~offset:Optint.Int63.zero data
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let len = String.length data in
let offset = Optint.Int63.of_int len in
Lwt.return_ok (digests, `Fixed_body (size, offset))
| `Unknown ->
Lwt.return_ok (digests, `Unknown data)
end
| `Fixed_body (size, offset) ->
KV.set_partial t.dev key ~offset data KV.set_partial t.dev key ~offset data
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () -> |> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
let digests = update_digests digests data in let len = String.length data in
Lwt.return_ok (digests, Optint.Int63.(add offset (of_int len)), body) let offset = Optint.Int63.(add offset (of_int len)) in
| `Unknown -> Lwt.return_ok (digests, `Fixed_body (size, offset))
let digests = update_digests digests data in | `Unknown body ->
Lwt.return_ok (digests, Optint.Int63.(add offset (of_int len)), body ^ data) Lwt.return_ok (digests, `Unknown (body ^ data))
let digests_to_hm digests = let digests_to_hm digests =
HM.empty HM.empty
@ -464,40 +503,57 @@ module Make
(fun (h, csum) -> String.equal csum (HM.find h csums)) (fun (h, csum) -> String.equal csum (HM.find h csums))
common_bindings common_bindings
let finalize_write t (hash, csum) csums digests = let finalize_write t (hash, csum) ~url body csums digests =
let source = pending_key (hash, csum) in let sizes_match, body_size_in_header =
if check_csums_digests csums digests then match body with
| `Fixed_body (reported, actual) -> Optint.Int63.(equal (of_int reported) actual), true
| `Unknown _ -> true, false
in
if check_csums_digests csums digests && sizes_match then
let sha256 = to_hex (Mirage_crypto.Hash.SHA256.get digests.sha256) let sha256 = to_hex (Mirage_crypto.Hash.SHA256.get digests.sha256)
and md5 = to_hex (Mirage_crypto.Hash.MD5.get digests.md5) and md5 = to_hex (Mirage_crypto.Hash.MD5.get digests.md5)
and sha512 = to_hex (Mirage_crypto.Hash.SHA512.get digests.sha512) in and sha512 = to_hex (Mirage_crypto.Hash.SHA512.get digests.sha512) in
let dest = Mirage_kv.Key.v sha256 in let dest = Mirage_kv.Key.v sha256 in
KV.rename t.dev ~source ~dest >|= function begin match body with
| `Unknown body ->
Logs.info (fun m -> m "downloaded %s, now writing" url);
KV.set t.dev dest body
| `Fixed_body (reported_size, actual_size) ->
Logs.info (fun m -> m "downloaded %s" url);
let source = pending_key (hash, csum) in
KV.rename t.dev ~source ~dest
end >|= function
| Ok () -> | Ok () ->
t.md5s <- SM.add md5 sha256 t.md5s; t.md5s <- SM.add md5 sha256 t.md5s;
t.sha512s <- SM.add sha512 sha256 t.sha512s; t.sha512s <- SM.add sha512 sha256 t.sha512s
Ok ()
| Error e -> Error (`Write_error e)
else
(* if the checksums mismatch we want to delete the file. We are only
able to do so if it was the latest created file, so we expect and
error. Ideally, we want to match for `Append_only or other errors *)
KV.remove t.dev source >>= function
| Ok () ->
Logs.info (fun m -> m "Removed %a" Mirage_kv.Key.pp source);
Lwt_result.fail (`Bad_checksum (hash, csum))
| Error e -> | Error e ->
Logs.debug (fun m -> m "Failed to remove %a: %a" Logs.err (fun m -> m "Write failure for %s: %a" url KV.pp_write_error e)
Mirage_kv.Key.pp source KV.pp_write_error e); else begin
(* we failed to delete the file so we mark it for deletion *) if sizes_match then
let dest = to_delete_key (hash, csum) in Logs.err (fun m -> m "Bad checksum %s: computed %s expected %s" url
Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a" (hash_to_string hash) (hex_to_string csum))
Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest); else
KV.rename t.dev ~source ~dest >|= function Logs.err (fun m -> m "Size mismatch %s: received %a bytes expected %a bytes" url
| Ok () -> Error (`Bad_checksum (hash, csum)) Optint.Int63.pp actual Optint.Int63.pp reported);
if body_size_in_header then
(* if the checksums mismatch we want to delete the file. We are only
able to do so if it was the latest created file, so we expect and
error. Ideally, we want to match for `Append_only or other errors *)
KV.remove t.dev source >>= function
| Ok () -> Lwt.return_unit
| Error e -> | Error e ->
Logs.warn (fun m -> m "Error renaming file %a -> %a: %a" (* we failed to delete the file so we mark it for deletion *)
Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e); let dest = to_delete_key (hash, csum) in
Error (`Bad_checksum (hash, csum)) Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a"
Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest);
KV.rename t.dev ~source ~dest >|= function
| Ok () -> ()
| Error e ->
Logs.warn (fun m -> m "Error renaming file %a -> %a: %a"
Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e)
else
Lwt.return_unit
end
(* on disk, we use a flat file system where the filename is the sha256 of the data *) (* on disk, we use a flat file system where the filename is the sha256 of the data *)
@ -593,28 +649,6 @@ module Make
update_caches t >|= fun () -> update_caches t >|= fun () ->
t t
let write t ~url data hm digests =
let cs = Cstruct.of_string data in
let sha256 = Mirage_crypto.Hash.digest `SHA256 cs |> to_hex
and md5 = Mirage_crypto.Hash.digest `MD5 cs |> to_hex
and sha512 = Mirage_crypto.Hash.digest `SHA512 cs |> to_hex
in
if check_csums_digests hm digests
then begin
KV.set t.dev (Mirage_kv.Key.v sha256) data >|= function
| Ok () ->
t.md5s <- SM.add md5 sha256 t.md5s;
t.sha512s <- SM.add sha512 sha256 t.sha512s;
Logs.debug (fun m -> m "wrote %s (%d bytes)" sha256
(String.length data))
| Error e ->
Logs.err (fun m -> m "error %a while writing %s (key %s)"
KV.pp_write_error e url sha256)
end else begin
Logs.err (fun m -> m "Bad checksum for %s" url);
Lwt.return_unit
end
let exists t h v = let exists t h v =
match find_key t h v with match find_key t h v with
| Error _ -> Lwt.return false | Error _ -> Lwt.return false
@ -952,26 +986,17 @@ stamp: %S
let quux, body_init = Disk.init_write csums in let quux, body_init = Disk.init_write csums in
Http_mirage_client.request http_client url (Disk.write_partial disk quux) body_init >>= function Http_mirage_client.request http_client url (Disk.write_partial disk quux) body_init >>= function
| Ok (resp, r) -> | Ok (resp, r) ->
begin match Disk.body_length resp, r with begin match r with
| `Bad_response, _ | _, Error `Bad_response -> | Error `Bad_response ->
Logs.warn (fun m -> m "%s: %a (reason %s)" Logs.warn (fun m -> m "%s: %a (reason %s)"
url H2.Status.pp_hum resp.status resp.reason); url H2.Status.pp_hum resp.status resp.reason);
Lwt.return_unit Lwt.return_unit
| _, Error `Write_error e -> | Error `Write_error e ->
Logs.warn (fun m -> m "%s: write error %a" Logs.err (fun m -> m "%s: write error %a"
url KV.pp_write_error e); url KV.pp_write_error e);
Lwt.return_unit Lwt.return_unit
| `Unknown, Ok (digests, _, body) -> | Ok (digests, body) ->
Logs.info (fun m -> m "downloaded %s, now writing..." url); Disk.finalize_write disk quux ~url body csums digests
Disk.write disk ~url body csums digests
| `Fixed _size, Ok (digests, _, _) ->
Logs.info (fun m -> m "downloaded %s" url);
Disk.finalize_write disk quux csums digests >|= function
| Ok () -> ()
| Error (`Write_error e) ->
Logs.warn (fun m -> m "Error writing %s: %a" url KV.pp_write_error e)
| Error `Bad_checksum (hash, csum) ->
Logs.err (fun m -> m "%s hash mismatch, expected %s:%s" url (hash_to_string hash) (hex_to_string csum))
end end
| _ -> Lwt.return_unit) | _ -> Lwt.return_unit)
(SM.bindings urls) >>= fun () -> (SM.bindings urls) >>= fun () ->