open Lwt.Infix let argument_error = 64 module K = struct open Cmdliner let check = let doc = Arg.info ~doc:"Only check the cache" ["check"] in Mirage_runtime.register_arg Arg.(value & flag doc) let verify_sha256 = let doc = Arg.info ~doc:"Verify the SHA256 checksums of the cache contents, and \ re-build the other checksum caches." ["verify-sha256"] in Mirage_runtime.register_arg Arg.(value & flag doc) let remote = let doc = Arg.info ~doc:"Remote repository url, use suffix #foo to specify a branch 'foo': \ https://github.com/ocaml/opam-repository.git" ["remote"] in Mirage_runtime.register_arg Arg.(value & opt string "https://github.com/ocaml/opam-repository.git#master" doc) let parallel_downloads = let doc = Arg.info ~doc:"Amount of parallel HTTP downloads" ["parallel-downloads"] in Mirage_runtime.register_arg Arg.(value & opt int 20 doc) let hook_url = let doc = Arg.info ~doc:"URL to conduct an update of the git repository" ["hook-url"] in Mirage_runtime.register_arg Arg.(value & opt string "update" doc) let port = let doc = Arg.info ~doc:"HTTP listen port." ["port"] in Mirage_runtime.register_arg Arg.(value & opt int 80 doc) let sectors_cache = let doc = "Number of sectors reserved for each checksum cache (md5, sha512). Only used with --initialize-disk." in let doc = Arg.info ~doc ["sectors-cache"] in Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 4L 2048L) doc) let sectors_git = let doc = "Number of sectors reserved for git dump. Only used with --initialize-disk" in let doc = Arg.info ~doc ["sectors-git"] in Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 40L (mul 2L 1024L)) doc) let sectors_swap = let doc = "Number of sectors reserved for swap. Only used with --initialize-disk" in let doc = Arg.info ~doc ["sectors-swap"] in Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 1024L 2048L) doc) let initialize_disk = let doc = "Initialize the disk with a partition table. THIS IS DESTRUCTIVE!" in let doc = Arg.info ~doc ["initialize-disk"] in Mirage_runtime.register_arg Arg.(value & flag doc) let ignore_local_git = let doc = "Ignore restoring locally saved git repository." in let doc = Arg.info ~doc ["ignore-local-git"] in Mirage_runtime.register_arg Arg.(value & flag doc) end module Make (BLOCK : Mirage_block.S) (Time : Mirage_time.S) (Pclock : Mirage_clock.PCLOCK) (Stack : Tcpip.Stack.V4V6) (_ : sig end) (HTTP : Http_mirage_client.S) = struct module Part = Partitions.Make(BLOCK) module KV = Tar_mirage.Make_KV_RW(Pclock)(Part) module Cache = OneFFS.Make(Part) module Swap = Swapfs.Make(Part) module Store = Git_kv.Make(Pclock) module SM = Map.Make(String) module SSet = Set.Make(String) let compare_hash = Archive_checksum.Hash.compare module HM = Archive_checksum.HM let hash_to_string = Archive_checksum.Hash.to_string let hash_of_string = Archive_checksum.Hash.of_string let hex_to_key h = Mirage_kv.Key.v (Ohex.encode h) let hm_to_s hm = HM.fold (fun h v acc -> hash_to_string h ^ "=" ^ Ohex.encode v ^ "\n" ^ acc) hm "" module Git = struct let find_contents store = let rec go store path acc = Store.list store path >>= function | Error e -> Logs.err (fun m -> m "error %a while listing %a" Store.pp_error e Mirage_kv.Key.pp path); Lwt.return acc | Ok steps -> Lwt_list.fold_left_s (fun acc (step, _) -> Store.exists store step >>= function | Error e -> Logs.err (fun m -> m "error %a for exists %a" Store.pp_error e Mirage_kv.Key.pp step); Lwt.return acc | Ok None -> Logs.warn (fun m -> m "no typ for %a" Mirage_kv.Key.pp step); Lwt.return acc | Ok Some `Value -> Lwt.return (step :: acc) | Ok Some `Dictionary -> go store step acc) acc steps in go store Mirage_kv.Key.empty [] let find_urls store = find_contents store >>= fun paths -> let opam_paths = List.filter (fun p -> Mirage_kv.Key.basename p = "opam") paths in Lwt_list.fold_left_s (fun acc path -> Store.get store path >|= function | Ok data -> (* TODO report parser errors *) (try let url_csums = Opam_file.extract_urls (Mirage_kv.Key.to_string path) data in List.fold_left (fun acc (url, csums) -> if HM.cardinal csums = 0 then (Logs.warn (fun m -> m "no checksums for %s, ignoring" url); acc) else SM.update url (function | None -> Some csums | Some csums' -> if HM.for_all (fun h v -> match HM.find_opt h csums with | None -> true | Some v' -> String.equal v v') csums' then Some (HM.union (fun _h v _v' -> Some v) csums csums') else begin Logs.warn (fun m -> m "mismatching hashes for %s: %s vs %s" url (hm_to_s csums') (hm_to_s csums)); None end) acc) acc url_csums with _ -> Logs.warn (fun m -> m "some error in %a, ignoring" Mirage_kv.Key.pp path); acc) | Error e -> Logs.warn (fun m -> m "Store.get: %a" Store.pp_error e); acc) SM.empty opam_paths end let active_downloads = ref SM.empty let add_to_active url ts = active_downloads := SM.add url (ts, 0, "unknown size") !active_downloads let remove_active url = active_downloads := SM.remove url !active_downloads let active_length url written length = match SM.find_opt url !active_downloads with | None -> () | Some (ts, written', _) -> active_downloads := SM.add url (ts, written + written', length) !active_downloads let active_add_bytes url written = match SM.find_opt url !active_downloads with | None -> () | Some (ts, written', l) -> active_downloads := SM.add url (ts, written + written', l) !active_downloads let failed_downloads = ref SM.empty let add_failed url ts reason = remove_active url; failed_downloads := SM.add url (ts, reason) !failed_downloads module Disk = struct type t = { mutable md5s : string SM.t ; mutable sha512s : string SM.t ; dev : KV.t ; dev_md5s : Cache.t ; dev_sha512s : Cache.t ; dev_swap : Swap.t ; } let empty dev dev_md5s dev_sha512s dev_swap = { md5s = SM.empty ; sha512s = SM.empty ; dev; dev_md5s; dev_sha512s ; dev_swap } let marshal_sm (sm : string SM.t) = let version = char_of_int 1 in String.make 1 version ^ Marshal.to_string sm [] let unmarshal_sm s = let version = int_of_char s.[0] in match version with | 1 -> Ok (Marshal.from_string s 1 : string SM.t) | _ -> Error ("Unsupported version " ^ string_of_int version) let update_caches t = Cache.write t.dev_md5s (marshal_sm t.md5s) >>= fun r -> (match r with | Ok () -> Logs.info (fun m -> m "Set 'md5s'") | Error e -> Logs.warn (fun m -> m "Failed to write 'md5s': %a" Cache.pp_write_error e)); Cache.write t.dev_sha512s (marshal_sm t.sha512s) >>= fun r -> match r with | Ok () -> Logs.info (fun m -> m "Set 'sha512s'"); Lwt.return_unit | Error e -> Logs.warn (fun m -> m "Failed to write 'sha512s': %a" Cache.pp_write_error e); Lwt.return_unit let find_key t h key = assert (List.length (Mirage_kv.Key.segments key) = 1); match match h with | `MD5 -> Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.md5s) | `SHA512 -> Option.map Mirage_kv.Key.v (SM.find_opt (Mirage_kv.Key.basename key) t.sha512s) | `SHA256 -> Some key | _ -> None with | None -> Error `Not_found | Some x -> Ok x let read_chunked t h v f a = match find_key t h v with | Error `Not_found -> Lwt.return (Error (`Not_found v)) | Ok key -> KV.size t.dev key >>= function | Error e -> Logs.err (fun m -> m "error %a while reading %s %a" KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v); Lwt.return (Error (`Not_found key)) | Ok len -> let chunk_size = 4096 in let rec read_more a offset = if offset < len then KV.get_partial t.dev key ~offset ~length:chunk_size >>= function | Ok data -> f a data >>= fun a -> read_more a Optint.Int63.(add offset (of_int chunk_size)) | Error e -> Logs.err (fun m -> m "error %a while reading %s %a" KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v); Lwt.return (Error e) else Lwt.return (Ok a) in read_more a Optint.Int63.zero let key_of_hash_csum (hash, csum) = match hash with | `SHA512 -> (* We can't use hex because the filename would become too long for tar *) Mirage_kv.Key.(v (hash_to_string hash) / Base64.encode_string ~alphabet:Base64.uri_safe_alphabet ~pad:false csum) | _ -> Mirage_kv.Key.(v (hash_to_string hash) / Ohex.encode csum) let init_write t csums = let quux, csums = Archive_checksum.init_write csums in let swap = Swap.empty t.dev_swap in quux, Ok (csums, swap) let write_partial t (hash, csum) url = (* XXX: we may be in trouble if different hash functions are used for the same archive *) let ( >>>= ) = Lwt_result.bind in fun response r data -> Lwt.return r >>>= fun (digests, swap) -> let digests = Archive_checksum.update_digests digests data in active_add_bytes url (String.length data); Swap.append swap data >|= function | Ok () -> Ok (digests, swap) | Error swap_err -> Error (`Swap swap_err) let check_csums_digests csums digests = let csums' = Archive_checksum.digests_to_hm digests in let common_bindings = List.filter (fun (h, _) -> HM.mem h csums) (HM.bindings csums') in List.length common_bindings > 0 && List.for_all (fun (h, csum) -> String.equal csum (HM.find h csums)) common_bindings let set_from_handle dev dest h = let size = Optint.Int63.of_int64 (Swap.size h) in KV.allocate dev dest size >>= fun r -> let rec loop offset = if offset = Swap.size h then Lwt.return_ok () else let length = Int64.(to_int (min 4096L (sub (Swap.size h) offset))) in Swap.get_partial h ~offset ~length >>= fun r -> match r with | Error e -> Lwt.return (Error (`Swap e)) | Ok data -> KV.set_partial dev dest ~offset:(Optint.Int63.of_int64 offset) data >>= fun r -> match r with | Error e -> Lwt.return (Error (`Write_error e)) | Ok () -> loop Int64.(add offset (of_int length)) in match r with | Ok () -> loop 0L | Error e -> Lwt.return (Error (`Write_error e)) let finalize_write t (hash, csum) ~url swap csums digests = if check_csums_digests csums digests then let sha256 = Ohex.encode Digestif.SHA256.(to_raw_string (get digests.sha256)) and md5 = Ohex.encode Digestif.MD5.(to_raw_string (get digests.md5)) and sha512 = Ohex.encode Digestif.SHA512.(to_raw_string (get digests.sha512)) in let dest = Mirage_kv.Key.v sha256 in Logs.info (fun m -> m "downloaded %s, now writing" url); (Lwt.finalize (fun () -> set_from_handle t.dev dest swap) (fun () -> Swap.free swap)) >|= function | Ok () -> remove_active url; t.md5s <- SM.add md5 sha256 t.md5s; t.sha512s <- SM.add sha512 sha256 t.sha512s | Error e -> let pp_error ppf = function | `Write_error e -> KV.pp_write_error ppf e | `Swap e -> Swap.pp_error ppf e in Logs.err (fun m -> m "Write failure for %s: %a" url pp_error e); add_failed url (Ptime.v (Pclock.now_d_ps ())) (Fmt.str "Write failure for %s: %a" url pp_error e) else Lwt.return_unit (* on disk, we use a flat file system where the filename is the sha256 of the data *) let init ~verify_sha256 dev dev_md5s dev_sha512s dev_swap = KV.list dev Mirage_kv.Key.empty >>= function | Error e -> Logs.err (fun m -> m "error %a listing kv" KV.pp_error e); assert false | Ok entries -> let t = empty dev dev_md5s dev_sha512s dev_swap in Cache.read t.dev_md5s >>= fun r -> (match r with | Ok Some s -> if not verify_sha256 then Result.iter (fun md5s -> t.md5s <- md5s) (unmarshal_sm s) | Ok None -> Logs.debug (fun m -> m "No md5s cached") | Error e -> Logs.warn (fun m -> m "Error reading md5s cache: %a" Cache.pp_error e)); Cache.read t.dev_sha512s >>= fun r -> (match r with | Ok Some s -> if not verify_sha256 then Result.iter (fun sha512s -> t.sha512s <- sha512s) (unmarshal_sm s) | Ok None -> Logs.debug (fun m -> m "No sha512s cached") | Error e -> Logs.warn (fun m -> m "Error reading sha512s cache: %a" Cache.pp_error e)); let md5s = SSet.of_list (List.map snd (SM.bindings t.md5s)) and sha512s = SSet.of_list (List.map snd (SM.bindings t.sha512s)) in let idx = ref 1 in Lwt_list.iter_s (fun (path, typ) -> if !idx mod 10 = 0 then Gc.full_major () ; match typ with | `Dictionary -> Logs.warn (fun m -> m "unexpected dictionary at %a" Mirage_kv.Key.pp path); Lwt.return_unit | `Value -> let open Digestif in let sha256_final = if verify_sha256 then let f s = let digest = SHA256.(to_raw_string (get s)) in if not (String.equal (Mirage_kv.Key.basename path) (Ohex.encode digest)) then Logs.err (fun m -> m "corrupt SHA256 data for %a, \ computed %s (should remove)" Mirage_kv.Key.pp path (Ohex.encode digest)) in Some f else None and md5_final = if not (SSet.mem (Mirage_kv.Key.basename path) md5s) then let f s = let digest = MD5.(to_raw_string (get s)) in t.md5s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.md5s in Some f else None and sha512_final = if not (SSet.mem (Mirage_kv.Key.basename path) sha512s) then let f s = let digest = SHA512.(to_raw_string (get s)) in t.sha512s <- SM.add (Ohex.encode digest) (Mirage_kv.Key.basename path) t.sha512s in Some f else None in match sha256_final, md5_final, sha512_final with | None, None, None -> Lwt.return_unit | _ -> read_chunked t `SHA256 path (fun (sha256, md5, sha512) data -> Lwt.return (Option.map (fun t -> SHA256.feed_string t data) sha256, Option.map (fun t -> MD5.feed_string t data) md5, Option.map (fun t -> SHA512.feed_string t data) sha512)) (Option.map (fun _ -> SHA256.empty) sha256_final, Option.map (fun _ -> MD5.empty) md5_final, Option.map (fun _ -> SHA512.empty) sha512_final) >|= function | Error e -> Logs.err (fun m -> m "error %a of %a while computing digests" KV.pp_error e Mirage_kv.Key.pp path) | Ok (sha256, md5, sha512) -> Option.iter (fun f -> f (Option.get sha256)) sha256_final; Option.iter (fun f -> f (Option.get md5)) md5_final; Option.iter (fun f -> f (Option.get sha512)) sha512_final; Logs.info (fun m -> m "added %a" Mirage_kv.Key.pp path)) entries >>= fun () -> update_caches t >|= fun () -> t let exists t h v = match find_key t h v with | Error _ -> Lwt.return false | Ok x -> KV.exists t.dev x >|= function | Ok Some `Value -> true | Ok Some `Dictionary -> Logs.err (fun m -> m "unexpected dictionary for %s %a" (hash_to_string h) Mirage_kv.Key.pp v); false | Ok None -> false | Error e -> Logs.err (fun m -> m "exists %s %a returned %a" (hash_to_string h) Mirage_kv.Key.pp v KV.pp_error e); false let last_modified t h v = match find_key t h v with | Error _ as e -> Lwt.return e | Ok x -> KV.last_modified t.dev x >|= function | Ok data -> Ok data | Error e -> Logs.err (fun m -> m "error %a while last_modified %s %a" KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v); Error `Not_found let size t h v = match find_key t h v with | Error _ as e -> Lwt.return e | Ok x -> KV.size t.dev x >|= function | Ok s -> Ok s | Error e -> Logs.err (fun m -> m "error %a while size %s %a" KV.pp_error e (hash_to_string h) Mirage_kv.Key.pp v); Error `Not_found end module Tarball = struct module High : sig type t type 'a s = 'a Lwt.t external inj : 'a s -> ('a, t) Tar.io = "%identity" external prj : ('a, t) Tar.io -> 'a s = "%identity" end = struct type t type 'a s = 'a Lwt.t external inj : 'a -> 'b = "%identity" external prj : 'a -> 'b = "%identity" end let to_buffer buf t = let rec run : type a. (a, [> `Msg of string ] as 'err, High.t) Tar.t -> (a, 'err) result Lwt.t = function | Tar.Write str -> Buffer.add_string buf str; Lwt.return_ok () | Tar.Read _ -> assert false | Tar.Really_read _ -> assert false | Tar.Seek _ -> assert false | Tar.Return value -> Lwt.return value | Tar.High value -> High.prj value | Tar.Bind (x, f) -> let open Lwt_result.Infix in run x >>= fun value -> run (f value) in run t let once data = let closed = ref false in fun () -> if !closed then Tar.High (High.inj (Lwt.return_ok None)) else begin closed := true; Tar.High (High.inj (Lwt.return_ok (Some data))) end let entries_of_git ~mtime store repo = Git.find_contents store >>= fun paths -> let entries = Lwt_stream.of_list paths in let to_entry path = Store.get store path >|= function | Ok data -> let data = if Mirage_kv.Key.(equal path (v "repo")) then repo else data in let file_mode = 0o644 and mod_time = Int64.of_int mtime and user_id = 0 and group_id = 0 and size = String.length data in let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id (Mirage_kv.Key.to_string path) (Int64.of_int size) in Some (Some Tar.Header.Ustar, hdr, once data) | Error _ -> None in let entries = Lwt_stream.filter_map_s to_entry entries in Lwt.return begin fun () -> Tar.High (High.inj (Lwt_stream.get entries >|= Result.ok)) end let of_git repo store = let now = Ptime.v (Pclock.now_d_ps ()) in let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in entries_of_git ~mtime store repo >>= fun entries -> let t = Tar.out ~level:Ustar entries in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in let buf = Buffer.create 1024 in to_buffer buf t >|= function | Ok () -> Buffer.contents buf | Error (`Msg msg) -> failwith msg end module Serve = struct let ptime_to_http_date ptime = let (y, m, d), ((hh, mm, ss), _) = Ptime.to_date_time ptime and weekday = match Ptime.weekday ptime with | `Mon -> "Mon" | `Tue -> "Tue" | `Wed -> "Wed" | `Thu -> "Thu" | `Fri -> "Fri" | `Sat -> "Sat" | `Sun -> "Sun" and month = [| "Jan" ; "Feb" ; "Mar" ; "Apr" ; "May" ; "Jun" ; "Jul" ; "Aug" ; "Sep" ; "Oct" ; "Nov" ; "Dec" |] in let m' = Array.get month (pred m) in Printf.sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT" weekday d m' y hh mm ss let commit_id git_kv = Store.digest git_kv Mirage_kv.Key.empty >|= fun r -> Result.fold r ~ok:Fun.id ~error:(fun e -> Logs.err (fun m -> m "%a" Store.pp_error e); exit 2) let repo remote commit = let upstream = List.hd (String.split_on_char '#' remote) in Fmt.str {|opam-version: "2.0" upstream: "%s#%s" archive-mirrors: "cache" stamp: %S |} upstream commit commit let modified git_kv = Store.last_modified git_kv Mirage_kv.Key.empty >|= fun r -> let v = Result.fold r ~ok:Fun.id ~error:(fun _ -> Ptime.v (Pclock.now_d_ps ())) in ptime_to_http_date v type t = { mutable commit_id : string ; mutable modified : string ; mutable repo : string ; mutable index : string ; } let create remote git_kv = commit_id git_kv >>= fun commit_id -> modified git_kv >>= fun modified -> let repo = repo remote commit_id in Tarball.of_git repo git_kv >|= fun index -> { commit_id ; modified ; repo ; index } let update_lock = Lwt_mutex.create () let update_git ~remote t git_kv = Lwt_mutex.with_lock update_lock (fun () -> Logs.info (fun m -> m "pulling the git repository"); Git_kv.pull git_kv >>= function | Error `Msg msg -> Logs.err (fun m -> m "error %s while updating git" msg); Lwt.return None | Ok [] -> Logs.info (fun m -> m "git changes are empty"); Lwt.return (Some []) | Ok changes -> commit_id git_kv >>= fun commit_id -> modified git_kv >>= fun modified -> Logs.info (fun m -> m "git: %s" commit_id); let repo = repo remote commit_id in Tarball.of_git repo git_kv >|= fun index -> t.commit_id <- commit_id ; t.modified <- modified ; t.repo <- repo ; t.index <- index; Some changes) let status disk = (* report status: - archive size (can we easily measure?) and number of "good" elements - list of current downloads - list of failed downloads *) let archive_stats = Fmt.str "