always use swap, remove the pending / to_delete stuff
This commit is contained in:
parent
456340562d
commit
e59f02a16f
2 changed files with 23 additions and 182 deletions
|
@ -52,7 +52,7 @@ let update_digests { md5; sha256; sha512 } data =
|
||||||
|
|
||||||
let init_write csums =
|
let init_write csums =
|
||||||
let hash, csum = HM.max_binding csums in
|
let hash, csum = HM.max_binding csums in
|
||||||
(hash, csum), Ok (empty_digests, `Init)
|
(hash, csum), empty_digests
|
||||||
|
|
||||||
let digests_to_hm digests =
|
let digests_to_hm digests =
|
||||||
HM.empty
|
HM.empty
|
||||||
|
|
|
@ -198,10 +198,6 @@ module Make
|
||||||
dev_swap : Swap.t ;
|
dev_swap : Swap.t ;
|
||||||
}
|
}
|
||||||
|
|
||||||
let pending = Mirage_kv.Key.v "pending"
|
|
||||||
|
|
||||||
let to_delete = Mirage_kv.Key.v "to-delete"
|
|
||||||
|
|
||||||
let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
|
let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap }
|
||||||
|
|
||||||
let marshal_sm (sm : string SM.t) =
|
let marshal_sm (sm : string SM.t) =
|
||||||
|
@ -267,118 +263,28 @@ module Make
|
||||||
in
|
in
|
||||||
read_more a Optint.Int63.zero
|
read_more a Optint.Int63.zero
|
||||||
|
|
||||||
(*
|
let key_of_hash_csum (hash, csum) = match hash with
|
||||||
module HM_running = struct
|
|
||||||
|
|
||||||
let empty h =
|
|
||||||
let module H = (val Mirage_crypto.Hash.module_of h) in
|
|
||||||
(* We need MD5, SHA256 and SHA512. [h] is likely one of the
|
|
||||||
aforementioned and in that case we don't compute the same hash twice
|
|
||||||
*)
|
|
||||||
HM.empty
|
|
||||||
|> HM.add `MD5 Mirage_crypto.Hash.MD5.empty
|
|
||||||
|> HM.add `SHA256 Mirage_crypto.Hash.SHA256.empty
|
|
||||||
|> HM.add `SHA512 Mirage_crypto.Hash.SHA512.empty
|
|
||||||
|> HM.add h H.empty
|
|
||||||
|
|
||||||
let feed t data =
|
|
||||||
HM.map (fun h v ->
|
|
||||||
let module H = (val Mirage_crypto.Hash.module_of h) in
|
|
||||||
H.feed v data)
|
|
||||||
t
|
|
||||||
|
|
||||||
let get =
|
|
||||||
HM.map (fun h v ->
|
|
||||||
let module H = (val Mirage_crypto.Hash.module_of h) in
|
|
||||||
H.get v)
|
|
||||||
|
|
||||||
|
|
||||||
end
|
|
||||||
*)
|
|
||||||
|
|
||||||
let content_length_of_string s =
|
|
||||||
match Int64.of_string s with
|
|
||||||
| len when len >= 0L -> `Fixed len
|
|
||||||
| _ | exception _ -> `Bad_response
|
|
||||||
|
|
||||||
let body_length headers =
|
|
||||||
match H2.Headers.get_multi headers "content-length" with
|
|
||||||
| [] -> `Unknown
|
|
||||||
| [ x ] -> content_length_of_string x
|
|
||||||
| hd :: tl ->
|
|
||||||
(* if there are multiple content-length headers we require them all to be
|
|
||||||
* exactly equal. *)
|
|
||||||
if List.for_all (String.equal hd) tl then
|
|
||||||
content_length_of_string hd
|
|
||||||
else
|
|
||||||
`Bad_response
|
|
||||||
|
|
||||||
let body_length (response : Http_mirage_client.response) =
|
|
||||||
if response.status <> `OK then
|
|
||||||
`Bad_response
|
|
||||||
else
|
|
||||||
body_length response.headers
|
|
||||||
|
|
||||||
let pending_key (hash, csum) =
|
|
||||||
match hash with
|
|
||||||
| `SHA512 ->
|
| `SHA512 ->
|
||||||
(* We can't use hex because the filename would become too long for tar *)
|
(* We can't use hex because the filename would become too long for tar *)
|
||||||
Mirage_kv.Key.(pending / hash_to_string hash / Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum)
|
Mirage_kv.Key.(v (hash_to_string hash) / Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum)
|
||||||
| _ ->
|
| _ ->
|
||||||
Mirage_kv.Key.(pending / hash_to_string hash / Ohex.encode csum)
|
Mirage_kv.Key.(v (hash_to_string hash) / Ohex.encode csum)
|
||||||
|
|
||||||
let to_delete_key (hash, csum) =
|
let init_write t csums =
|
||||||
let rand = "random" in (* FIXME: generate random string *)
|
let quux, csums = Archive_checksum.init_write csums in
|
||||||
let encoded_csum =
|
let swap = Swap.empty t.dev_swap in
|
||||||
match hash with
|
quux, Ok (csums, swap)
|
||||||
| `SHA512 ->
|
|
||||||
(* We can't use hex because the filename would become too long for tar *)
|
|
||||||
Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum
|
|
||||||
| _ ->
|
|
||||||
Ohex.encode csum
|
|
||||||
in
|
|
||||||
Mirage_kv.Key.(to_delete / hash_to_string hash / (encoded_csum ^ "." ^ rand))
|
|
||||||
|
|
||||||
let write_partial t (hash, csum) url =
|
let write_partial t (hash, csum) url =
|
||||||
(* XXX: we may be in trouble if different hash functions are used for the same archive *)
|
(* XXX: we may be in trouble if different hash functions are used for the same archive *)
|
||||||
let key = pending_key (hash, csum) in
|
|
||||||
let ( >>>= ) = Lwt_result.bind in
|
let ( >>>= ) = Lwt_result.bind in
|
||||||
fun response r data ->
|
fun response r data ->
|
||||||
Lwt.return r >>>= fun (digests, acc) ->
|
Lwt.return r >>>= fun (digests, swap) ->
|
||||||
let digests = Archive_checksum.update_digests digests data in
|
let digests = Archive_checksum.update_digests digests data in
|
||||||
match acc with
|
active_add_bytes url (String.length data);
|
||||||
| `Init ->
|
Swap.append swap data >|= function
|
||||||
begin match body_length response with
|
| Ok () -> Ok (digests, swap)
|
||||||
| `Bad_response -> Lwt.return (Error `Bad_response)
|
| Error swap_err -> Error (`Swap swap_err)
|
||||||
| `Fixed size ->
|
|
||||||
KV.allocate t.dev key (Optint.Int63.of_int64 size)
|
|
||||||
|> Lwt_result.map_error (fun e -> `Write_error e)
|
|
||||||
>>>= fun () ->
|
|
||||||
KV.set_partial t.dev key ~offset:Optint.Int63.zero data
|
|
||||||
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
|
|
||||||
let len = String.length data in
|
|
||||||
let offset = Optint.Int63.of_int len in
|
|
||||||
active_length url len (Int64.to_string size ^ " bytes");
|
|
||||||
Lwt.return_ok (digests, `Fixed_body (size, offset))
|
|
||||||
| `Unknown ->
|
|
||||||
active_add_bytes url (String.length data);
|
|
||||||
let h = Swap.empty t.dev_swap in
|
|
||||||
Swap.append h data >|= function
|
|
||||||
| Ok () -> Ok (digests, `Unknown h)
|
|
||||||
| Error swap_err -> Error (`Swap swap_err)
|
|
||||||
end
|
|
||||||
| `Fixed_body (size, offset) ->
|
|
||||||
KV.set_partial t.dev key ~offset data
|
|
||||||
|> Lwt_result.map_error (fun e -> `Write_error e) >>>= fun () ->
|
|
||||||
let len = String.length data in
|
|
||||||
let offset = Optint.Int63.(add offset (of_int len)) in
|
|
||||||
active_add_bytes url len;
|
|
||||||
Lwt.return_ok (digests, `Fixed_body (size, offset))
|
|
||||||
| `Unknown h ->
|
|
||||||
active_add_bytes url (String.length data);
|
|
||||||
Swap.append h data >|= function
|
|
||||||
| Ok () -> Ok (digests, `Unknown h)
|
|
||||||
| Error swap_err -> Error (`Swap swap_err)
|
|
||||||
|
|
||||||
let check_csums_digests csums digests =
|
let check_csums_digests csums digests =
|
||||||
let csums' = Archive_checksum.digests_to_hm digests in
|
let csums' = Archive_checksum.digests_to_hm digests in
|
||||||
|
@ -413,34 +319,16 @@ module Make
|
||||||
| Error e ->
|
| Error e ->
|
||||||
Lwt.return (Error (`Write_error e))
|
Lwt.return (Error (`Write_error e))
|
||||||
|
|
||||||
let finalize_write t (hash, csum) ~url (body : [ `Unknown of Swap.handle | `Fixed_body of int64 * Optint.Int63.t | `Init ]) csums digests =
|
let finalize_write t (hash, csum) ~url swap csums digests =
|
||||||
let sizes_match, body_size_in_header =
|
if check_csums_digests csums digests then
|
||||||
match body with
|
|
||||||
| `Fixed_body (reported, actual) -> Optint.Int63.(equal (of_int64 reported) actual), true
|
|
||||||
| `Unknown _ -> true, false
|
|
||||||
| `Init -> assert false
|
|
||||||
in
|
|
||||||
let source = pending_key (hash, csum) in
|
|
||||||
if check_csums_digests csums digests && sizes_match then
|
|
||||||
let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256))
|
let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256))
|
||||||
and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5))
|
and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5))
|
||||||
and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in
|
and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in
|
||||||
let dest = Mirage_kv.Key.v sha256 in
|
let dest = Mirage_kv.Key.v sha256 in
|
||||||
begin match body with
|
Logs.info (fun m -> m "downloaded %s, now writing" url);
|
||||||
| `Unknown h ->
|
(Lwt.finalize (fun () -> set_from_handle t.dev dest swap)
|
||||||
Logs.info (fun m -> m "downloaded %s, now writing" url);
|
(fun () -> Swap.free swap))
|
||||||
Lwt_result.bind
|
>|= function
|
||||||
(Lwt.finalize (fun () -> set_from_handle t.dev source h)
|
|
||||||
(fun () -> Swap.free h))
|
|
||||||
(fun () ->
|
|
||||||
KV.rename t.dev ~source ~dest
|
|
||||||
|> Lwt_result.map_error (fun e -> `Write_error e))
|
|
||||||
| `Fixed_body (_reported_size, _actual_size) ->
|
|
||||||
Logs.info (fun m -> m "downloaded %s" url);
|
|
||||||
KV.rename t.dev ~source ~dest
|
|
||||||
|> Lwt_result.map_error (fun e -> `Write_error e)
|
|
||||||
| `Init -> assert false
|
|
||||||
end >|= function
|
|
||||||
| Ok () ->
|
| Ok () ->
|
||||||
remove_active url;
|
remove_active url;
|
||||||
t.md5s <- SM.add md5 sha256 t.md5s;
|
t.md5s <- SM.add md5 sha256 t.md5s;
|
||||||
|
@ -453,45 +341,7 @@ module Make
|
||||||
Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e);
|
Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e);
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
||||||
(Fmt.str "Write failure for %s: %a" url pp_error e)
|
(Fmt.str "Write failure for %s: %a" url pp_error e)
|
||||||
else begin
|
else Lwt.return_unit
|
||||||
(if sizes_match then begin
|
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
||||||
(Fmt.str "Bad checksum %s:%s: computed %s expected %s" url
|
|
||||||
(hash_to_string hash)
|
|
||||||
(Ohex.encode (Archive_checksum.get digests hash))
|
|
||||||
(Ohex.encode csum));
|
|
||||||
Logs.err (fun m -> m "Bad checksum %s:%s: computed %s expected %s" url
|
|
||||||
(hash_to_string hash)
|
|
||||||
(Ohex.encode (Archive_checksum.get digests hash))
|
|
||||||
(Ohex.encode csum))
|
|
||||||
end else match body with
|
|
||||||
| `Fixed_body (reported, actual) ->
|
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
|
||||||
(Fmt.str "Size mismatch %s: received %a bytes expected %Lu bytes"
|
|
||||||
url Optint.Int63.pp actual reported);
|
|
||||||
Logs.err (fun m -> m "Size mismatch %s: received %a bytes expected %Lu bytes"
|
|
||||||
url Optint.Int63.pp actual reported)
|
|
||||||
| `Unknown _ -> assert false
|
|
||||||
| `Init -> assert false);
|
|
||||||
if body_size_in_header then
|
|
||||||
(* if the checksums mismatch we want to delete the file. We are only
|
|
||||||
able to do so if it was the latest created file, so we expect and
|
|
||||||
error. Ideally, we want to match for `Append_only or other errors *)
|
|
||||||
KV.remove t.dev source >>= function
|
|
||||||
| Ok () -> Lwt.return_unit
|
|
||||||
| Error e ->
|
|
||||||
(* we failed to delete the file so we mark it for deletion *)
|
|
||||||
let dest = to_delete_key (hash, csum) in
|
|
||||||
Logs.warn (fun m -> m "Failed to remove %a: %a. Moving it to %a"
|
|
||||||
Mirage_kv.Key.pp source KV.pp_write_error e Mirage_kv.Key.pp dest);
|
|
||||||
KV.rename t.dev ~source ~dest >|= function
|
|
||||||
| Ok () -> ()
|
|
||||||
| Error e ->
|
|
||||||
Logs.warn (fun m -> m "Error renaming file %a -> %a: %a"
|
|
||||||
Mirage_kv.Key.pp source Mirage_kv.Key.pp dest KV.pp_write_error e)
|
|
||||||
else
|
|
||||||
Lwt.return_unit
|
|
||||||
end
|
|
||||||
|
|
||||||
(* on disk, we use a flat file system where the filename is the sha256 of the data *)
|
(* on disk, we use a flat file system where the filename is the sha256 of the data *)
|
||||||
let init ~verify_sha256 dev dev_md5s dev_sha512s dev_swap =
|
let init ~verify_sha256 dev dev_md5s dev_sha512s dev_swap =
|
||||||
|
@ -516,12 +366,6 @@ module Make
|
||||||
let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s))
|
let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s))
|
||||||
and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in
|
and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in
|
||||||
let idx = ref 1 in
|
let idx = ref 1 in
|
||||||
(* XXX: should we do something about pending downloads?? *)
|
|
||||||
let entries =
|
|
||||||
List.filter (fun (p, _) ->
|
|
||||||
not (Mirage_kv.Key.equal p pending || Mirage_kv.Key.equal p to_delete))
|
|
||||||
entries
|
|
||||||
in
|
|
||||||
Lwt_list.iter_s (fun (path, typ) ->
|
Lwt_list.iter_s (fun (path, typ) ->
|
||||||
if !idx mod 10 = 0 then Gc.full_major () ;
|
if !idx mod 10 = 0 then Gc.full_major () ;
|
||||||
match typ with
|
match typ with
|
||||||
|
@ -961,7 +805,6 @@ stamp: %S
|
||||||
let idx = ref 0 in
|
let idx = ref 0 in
|
||||||
Lwt_list.iter_p (fun (url, csums) ->
|
Lwt_list.iter_p (fun (url, csums) ->
|
||||||
Lwt_pool.use pool @@ fun () ->
|
Lwt_pool.use pool @@ fun () ->
|
||||||
(* FIXME: check pending and to-delete *)
|
|
||||||
HM.fold (fun h v r ->
|
HM.fold (fun h v r ->
|
||||||
r >>= function
|
r >>= function
|
||||||
| true -> Disk.exists disk h (hex_to_key v)
|
| true -> Disk.exists disk h (hex_to_key v)
|
||||||
|
@ -974,7 +817,7 @@ stamp: %S
|
||||||
incr idx;
|
incr idx;
|
||||||
if !idx mod 10 = 0 then Gc.full_major () ;
|
if !idx mod 10 = 0 then Gc.full_major () ;
|
||||||
Logs.info (fun m -> m "downloading %s" url);
|
Logs.info (fun m -> m "downloading %s" url);
|
||||||
let quux, body_init = Archive_checksum.init_write csums in
|
let quux, body_init = Disk.init_write disk csums in
|
||||||
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
add_to_active url (Ptime.v (Pclock.now_d_ps ()));
|
||||||
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
Http_mirage_client.request http_client url (Disk.write_partial disk quux url) body_init >>= function
|
||||||
| Ok (resp, r) ->
|
| Ok (resp, r) ->
|
||||||
|
@ -986,17 +829,15 @@ stamp: %S
|
||||||
(Fmt.str "%a %s" H2.Status.pp_hum resp.status resp.reason);
|
(Fmt.str "%a %s" H2.Status.pp_hum resp.status resp.reason);
|
||||||
Lwt.return_unit
|
Lwt.return_unit
|
||||||
| Error `Write_error e ->
|
| Error `Write_error e ->
|
||||||
Logs.err (fun m -> m "%s: write error %a %a"
|
Logs.err (fun m -> m "%s: write error %a"
|
||||||
url
|
url
|
||||||
Mirage_kv.Key.pp (Disk.pending_key quux)
|
|
||||||
KV.pp_write_error e);
|
KV.pp_write_error e);
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
||||||
(Fmt.str "write error: %a" KV.pp_write_error e);
|
(Fmt.str "write error: %a" KV.pp_write_error e);
|
||||||
Lwt.return_unit
|
Lwt.return_unit
|
||||||
| Error `Swap e ->
|
| Error `Swap e ->
|
||||||
Logs.err (fun m -> m "%s: swap error %a %a"
|
Logs.err (fun m -> m "%s: swap error %a"
|
||||||
url
|
url
|
||||||
Mirage_kv.Key.pp (Disk.pending_key quux)
|
|
||||||
Swap.pp_error e);
|
Swap.pp_error e);
|
||||||
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
add_failed url (Ptime.v (Pclock.now_d_ps ()))
|
||||||
(Fmt.str "swap error: %a" Swap.pp_error e);
|
(Fmt.str "swap error: %a" Swap.pp_error e);
|
||||||
|
|
Loading…
Reference in a new issue