From d36a0714e77c8918032f0a1091a61ae4adf07a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Fri, 4 Oct 2024 16:03:54 +0200 Subject: [PATCH 1/6] Use GPTar table We expect the disk to be formatted already. --- mirage/config.ml | 2 + mirage/partitions.ml | 91 ++++++++++++++++++++++++++++++++++++++++++++ mirage/unikernel.ml | 15 ++------ 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 mirage/partitions.ml diff --git a/mirage/config.ml b/mirage/config.ml index 0d864f9..6a4dcbc 100644 --- a/mirage/config.ml +++ b/mirage/config.ml @@ -14,6 +14,8 @@ let mirror = package ~min:"3.0.0" ~sublibs:[ "gz" ] "tar" ; package ~min:"3.0.0" "tar-mirage" ; package ~max:"0.2.0" "mirage-block-partition" ; + package "gpt" ; + package "gptar" ~pin:"git+https://github.com/reynir/gptar.git" ; package "oneffs" ; package "digestif" ; ] diff --git a/mirage/partitions.ml b/mirage/partitions.ml new file mode 100644 index 0000000..c438b3c --- /dev/null +++ b/mirage/partitions.ml @@ -0,0 +1,91 @@ +open Lwt.Syntax + +module Make(BLOCK : Mirage_block.S) = struct + module Part = Mirage_block_partition.Make(BLOCK) + + include Part + + type partitions = { + tar : Part.t ; + git_dump : Part.t ; + md5s : Part.t ; + sha512s : Part.t ; + } + + let read_partition_table info block = + let b = Cstruct.create info.Mirage_block.sector_size in + (* We will ignore the protective MBR at lba [0L] *) + let* r = BLOCK.read block 1L [b] in + match r with + | Error e -> + Format.kasprintf failwith "Reading partition table: %a" + BLOCK.pp_error e + | Ok () -> + match Gpt.unmarshal b ~sector_size:info.Mirage_block.sector_size with + | Error e -> + Format.kasprintf failwith "Reading partition table: %s" e + | Ok (`Read_partition_table (lba, sectors), k) -> + let b = Cstruct.create (sectors * info.Mirage_block.sector_size) in + let* r = BLOCK.read block lba [b] in + match r with + | Error e -> + Format.kasprintf failwith "Reading partition table: %a" + BLOCK.pp_error e + | Ok () -> + match k b with + | Error e -> + Format.kasprintf failwith "Reading partition table: %s" e + | Ok gpt -> Lwt.return gpt + + let connect block = + let utf16be_of_ascii s = + String.init 72 + (fun i -> + if i mod 2 = 0 && i / 2 < String.length s then + s.[i/2] + else + '\000') + in + let* info = BLOCK.get_info block in + let* gpt = read_partition_table info block in + let tar, git_dump, md5s, sha512s = + match + List.fold_left + (fun (tar, git_dump, md5s, sha512s) p -> + if String.equal p.Gpt.Partition.name + (utf16be_of_ascii "tar") + then + (Some p, git_dump, md5s, sha512s) + else if String.equal p.name + (utf16be_of_ascii "git_dump") + then + (tar, Some p, md5s, sha512s) + else if String.equal p.name + (utf16be_of_ascii "md5s") + then + (tar, git_dump, Some p, sha512s) + else if String.equal p.name + (utf16be_of_ascii "sha512s") + then + (tar, git_dump, md5s, Some p) + else + Format.kasprintf failwith "Unknown partition %S" p.name) + (None, None, None, None) + gpt.partitions + with + | (Some tar, Some git_dump, Some md5s, Some sha512s) -> + (tar, git_dump, md5s, sha512s) + | _ -> + failwith "not all partitions found :(" + in + let+ (_empty, p) = Part.connect 0L block in + let get_part part = + let len = Int64.(succ (sub part.Gpt.Partition.ending_lba part.starting_lba)) in + let (_before, after) = Part.subpartition part.starting_lba p in + let (part, _after) = Part.subpartition len after in + part + in + let tar = get_part tar and git_dump = get_part git_dump + and md5s = get_part md5s and sha512s = get_part sha512s in + { tar ; git_dump ; md5s ; sha512s } +end diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 50c9a30..2b94efc 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -67,7 +67,7 @@ module Make (_ : sig end) (HTTP : Http_mirage_client.S) = struct - module Part = Mirage_block_partition.Make(BLOCK) + module Part = Partitions.Make(BLOCK) module KV = Tar_mirage.Make_KV_RW(Pclock)(Part) module Cache = OneFFS.Make(Part) module Store = Git_kv.Make(Pclock) @@ -985,18 +985,11 @@ stamp: %S module Paf = Paf_mirage.Make(Stack.TCP) let start block _time _pclock stack git_ctx http_ctx = - BLOCK.get_info block >>= fun info -> - let git_start = - let cache_size = Int64.(mul 2L (K.sectors_cache ())) in - Int64.(sub info.size_sectors (add cache_size (K.sectors_git ()))) - in - Part.connect git_start block >>= fun (kv, rest) -> - let git_dump, rest = Part.subpartition (K.sectors_git ()) rest in - let md5s, sha512s = Part.subpartition (K.sectors_cache ()) rest in - KV.connect kv >>= fun kv -> + Part.connect block >>= fun { Part.tar ; git_dump; md5s ; sha512s } -> + KV.connect tar >>= fun kv -> + Cache.connect git_dump >>= fun git_dump -> Cache.connect md5s >>= fun md5s -> Cache.connect sha512s >>= fun sha512s -> - Cache.connect git_dump >>= fun git_dump -> Logs.info (fun m -> m "Available bytes in tar storage: %Ld" (KV.free kv)); Disk.init ~verify_sha256:(K.verify_sha256 ()) kv md5s sha512s >>= fun disk -> let remote = K.remote () in From ceb4674ec2bc3c41252b60d7618ef523ab90cdd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 9 Oct 2024 18:42:01 +0200 Subject: [PATCH 2/6] Add code for formatting a disk --- mirage/partitions.ml | 97 ++++++++++++++++++++++++++++++++++++++++---- mirage/unikernel.ml | 29 +++++++++++-- 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/mirage/partitions.ml b/mirage/partitions.ml index c438b3c..35748a6 100644 --- a/mirage/partitions.ml +++ b/mirage/partitions.ml @@ -12,6 +12,20 @@ module Make(BLOCK : Mirage_block.S) = struct sha512s : Part.t ; } + (* I just made these ones up... *) + let tar_guid = Uuidm.of_string "53cd6812-46cc-474e-a141-30b3aed85f53" |> Option.get + let cache_guid = Uuidm.of_string "22ab9cf5-6e51-45c2-998a-862e23aab264" |> Option.get + let git_guid = Uuidm.of_string "30faa50a-4c9d-47ff-a1a5-ecfb3401c027" |> Option.get + + (* GPT uses a 72 byte utf16be encoded string for partition names *) + let utf16be_of_ascii s = + String.init 72 + (fun i -> + if i mod 2 = 0 && i / 2 < String.length s then + s.[i/2] + else + '\000') + let read_partition_table info block = let b = Cstruct.create info.Mirage_block.sector_size in (* We will ignore the protective MBR at lba [0L] *) @@ -38,14 +52,6 @@ module Make(BLOCK : Mirage_block.S) = struct | Ok gpt -> Lwt.return gpt let connect block = - let utf16be_of_ascii s = - String.init 72 - (fun i -> - if i mod 2 = 0 && i / 2 < String.length s then - s.[i/2] - else - '\000') - in let* info = BLOCK.get_info block in let* gpt = read_partition_table info block in let tar, git_dump, md5s, sha512s = @@ -88,4 +94,79 @@ module Make(BLOCK : Mirage_block.S) = struct let tar = get_part tar and git_dump = get_part git_dump and md5s = get_part md5s and sha512s = get_part sha512s in { tar ; git_dump ; md5s ; sha512s } + + let format block ~sectors_cache ~sectors_git = + let* { size_sectors; sector_size; _ } = BLOCK.get_info block in + let ( let*? ) = Lwt_result.bind in + (* ocaml-gpt uses a fixed size partition entries table. Create an empty GPT + to figure out the first usable LBA *) + let empty = + Gpt.make ~sector_size ~disk_sectors:size_sectors [] + |> Result.get_ok + in + let*? () = + if size_sectors < + (* protective MBR + GPT header + GPT table *) + let ( + ) = Int64.add in + empty.first_usable_lba + + min 1L (Int64.of_int (2 * Tar.Header.length / sector_size)) + sectors_cache + sectors_cache + sectors_git + + 1L (* backup GPT header *) then + Lwt.return_error (`Msg "too small disk") + else Lwt_result.return () + in + let gpt = + let partitions = + (* Current implementation of [Gpt.Partition.make] only returns [Ok _] or + raises [Invalid_argument _] :/ *) + let attributes = 1L in + let sha512s = + Gpt.Partition.make + ~name:(utf16be_of_ascii "sha512s") + ~type_guid:cache_guid + ~attributes + Int64.(succ (sub empty.last_usable_lba sectors_cache)) + empty.last_usable_lba + |> Result.get_ok + in + let md5s = + Gpt.Partition.make + ~name:(utf16be_of_ascii "md5s") + ~type_guid:cache_guid + ~attributes + (Int64.sub sha512s.starting_lba sectors_cache) + (Int64.pred sha512s.starting_lba) + |> Result.get_ok + in + let git_dump = + Gpt.Partition.make + ~name:(utf16be_of_ascii "git_dump") + ~type_guid:git_guid + ~attributes + (Int64.sub md5s.starting_lba sectors_git) + (Int64.pred md5s.starting_lba) + |> Result.get_ok + in + let tar = + Gpt.Partition.make + ~name:(utf16be_of_ascii "tar") + ~type_guid:tar_guid + ~attributes + empty.first_usable_lba + (Int64.pred git_dump.starting_lba) + |> Result.get_ok + in + [ tar; git_dump; md5s; sha512s ] + in + Gpt.make ~sector_size ~disk_sectors:size_sectors partitions + |> Result.get_ok + in + let buf = + Cstruct.create (sector_size * (Int64.to_int gpt.first_usable_lba + 2 * Tar.Header.length)) + in + Gptar.marshal_header ~sector_size buf gpt; + Gpt.marshal_partition_table ~sector_size + (Cstruct.shift buf (sector_size * Int64.to_int gpt.partition_entry_lba)) + gpt; + BLOCK.write block 0L [ buf ] + |> Lwt_result.map_error (fun e -> `Block e) end diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 2b94efc..af94243 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -44,15 +44,20 @@ module K = struct Mirage_runtime.register_arg Arg.(value & opt int 80 doc) let sectors_cache = - let doc = "Number of sectors reserved for each checksum cache (md5, sha512)." in + let doc = "Number of sectors reserved for each checksum cache (md5, sha512). Only used with --initialize-disk." in let doc = Arg.info ~doc ["sectors-cache"] in Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 4L 2048L) doc) let sectors_git = - let doc = "Number of sectors reserved for git dump." in + let doc = "Number of sectors reserved for git dump. Only used with --initialize-disk" in let doc = Arg.info ~doc ["sectors-git"] in Mirage_runtime.register_arg Arg.(value & opt int64 Int64.(mul 40L (mul 2L 1024L)) doc) + let initialize_disk = + let doc = "Initialize the disk with a partition table. THIS IS DESTRUCTIVE!" in + let doc = Arg.info ~doc ["initialize-disk"] in + Mirage_runtime.register_arg Arg.(value & flag doc) + let ignore_local_git = let doc = "Ignore restoring locally saved git repository." in let doc = Arg.info ~doc ["ignore-local-git"] in @@ -984,8 +989,7 @@ stamp: %S module Paf = Paf_mirage.Make(Stack.TCP) - let start block _time _pclock stack git_ctx http_ctx = - Part.connect block >>= fun { Part.tar ; git_dump; md5s ; sha512s } -> + let start_mirror { Part.tar; git_dump; md5s; sha512s } stack git_ctx http_ctx = KV.connect tar >>= fun kv -> Cache.connect git_dump >>= fun git_dump -> Cache.connect md5s >>= fun md5s -> @@ -1036,4 +1040,21 @@ stamp: %S go ()); download_archives (K.parallel_downloads ()) disk http_ctx git_kv >>= fun () -> (th >|= fun _v -> ()) + + let start block _time _pclock stack git_ctx http_ctx = + let initialize_disk = K.initialize_disk () + and sectors_cache = K.sectors_cache () + and sectors_git = K.sectors_git () in + if initialize_disk then + Part.format block ~sectors_cache ~sectors_git >>= function + | Ok () -> Lwt.return_unit + | Error `Msg e -> + Logs.err (fun m -> m "Error formatting disk: %s" e); + exit Mirage_runtime.argument_error + | Error `Block e -> + Logs.err (fun m -> m "Error formatting disk: %a" BLOCK.pp_write_error e); + exit 2 + else + Part.connect block >>= fun parts -> + start_mirror parts stack git_ctx http_ctx end From 719b4ea45d1bc6af2c6cc7cf308483a1c88e0c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 9 Oct 2024 19:54:40 +0200 Subject: [PATCH 3/6] Repo tarball: use level Ustar --- mirage/unikernel.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index af94243..0e2809d 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -693,7 +693,7 @@ module Make and size = String.length data in let hdr = Tar.Header.make ~file_mode ~mod_time ~user_id ~group_id (Mirage_kv.Key.to_string path) (Int64.of_int size) in - Some (None, hdr, once data) + Some (Some Tar.Header.Ustar, hdr, once data) | Error _ -> None in let entries = Lwt_stream.filter_map_s to_entry entries in Lwt.return begin fun () -> Tar.High (High.inj (Lwt_stream.get entries >|= Result.ok)) end @@ -702,7 +702,7 @@ module Make let now = Ptime.v (Pclock.now_d_ps ()) in let mtime = Option.value ~default:0 Ptime.(Span.to_int_s (to_span now)) in entries_of_git ~mtime store repo >>= fun entries -> - let t = Tar.out entries in + let t = Tar.out ~level:Ustar entries in let t = Tar_gz.out_gzipped ~level:4 ~mtime:(Int32.of_int mtime) Gz.Unix t in let buf = Buffer.create 1024 in to_buffer buf t >|= function From 068c640dacfcbc40b74c3d6c55b49ef1535376ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Thu, 10 Oct 2024 10:24:57 +0200 Subject: [PATCH 4/6] Reset the partitions when initializing the disk THIS DESTROYS DATA --- mirage/partitions.ml | 99 ++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 41 deletions(-) diff --git a/mirage/partitions.ml b/mirage/partitions.ml index 35748a6..583a629 100644 --- a/mirage/partitions.ml +++ b/mirage/partitions.ml @@ -114,47 +114,47 @@ module Make(BLOCK : Mirage_block.S) = struct Lwt.return_error (`Msg "too small disk") else Lwt_result.return () in + (* Current implementation of [Gpt.Partition.make] only returns [Ok _] or + raises [Invalid_argument _] :/ *) + let attributes = 1L in + let sha512s = + Gpt.Partition.make + ~name:(utf16be_of_ascii "sha512s") + ~type_guid:cache_guid + ~attributes + Int64.(succ (sub empty.last_usable_lba sectors_cache)) + empty.last_usable_lba + |> Result.get_ok + in + let md5s = + Gpt.Partition.make + ~name:(utf16be_of_ascii "md5s") + ~type_guid:cache_guid + ~attributes + (Int64.sub sha512s.starting_lba sectors_cache) + (Int64.pred sha512s.starting_lba) + |> Result.get_ok + in + let git_dump = + Gpt.Partition.make + ~name:(utf16be_of_ascii "git_dump") + ~type_guid:git_guid + ~attributes + (Int64.sub md5s.starting_lba sectors_git) + (Int64.pred md5s.starting_lba) + |> Result.get_ok + in + let tar = + Gpt.Partition.make + ~name:(utf16be_of_ascii "tar") + ~type_guid:tar_guid + ~attributes + empty.first_usable_lba + (Int64.pred git_dump.starting_lba) + |> Result.get_ok + in let gpt = let partitions = - (* Current implementation of [Gpt.Partition.make] only returns [Ok _] or - raises [Invalid_argument _] :/ *) - let attributes = 1L in - let sha512s = - Gpt.Partition.make - ~name:(utf16be_of_ascii "sha512s") - ~type_guid:cache_guid - ~attributes - Int64.(succ (sub empty.last_usable_lba sectors_cache)) - empty.last_usable_lba - |> Result.get_ok - in - let md5s = - Gpt.Partition.make - ~name:(utf16be_of_ascii "md5s") - ~type_guid:cache_guid - ~attributes - (Int64.sub sha512s.starting_lba sectors_cache) - (Int64.pred sha512s.starting_lba) - |> Result.get_ok - in - let git_dump = - Gpt.Partition.make - ~name:(utf16be_of_ascii "git_dump") - ~type_guid:git_guid - ~attributes - (Int64.sub md5s.starting_lba sectors_git) - (Int64.pred md5s.starting_lba) - |> Result.get_ok - in - let tar = - Gpt.Partition.make - ~name:(utf16be_of_ascii "tar") - ~type_guid:tar_guid - ~attributes - empty.first_usable_lba - (Int64.pred git_dump.starting_lba) - |> Result.get_ok - in [ tar; git_dump; md5s; sha512s ] in Gpt.make ~sector_size ~disk_sectors:size_sectors partitions @@ -167,6 +167,23 @@ module Make(BLOCK : Mirage_block.S) = struct Gpt.marshal_partition_table ~sector_size (Cstruct.shift buf (sector_size * Int64.to_int gpt.partition_entry_lba)) gpt; - BLOCK.write block 0L [ buf ] - |> Lwt_result.map_error (fun e -> `Block e) + let write block sector_start buffers = + BLOCK.write block sector_start buffers + |> Lwt_result.map_error (fun e -> `Block e) + in + let*? () = + write block 0L [ buf ] + in + (* Format the file systems by writing zeroes *) + let zeroes = Cstruct.create (max (2 * Tar.Header.length) sector_size) in + let*? () = + write block tar.starting_lba [ Cstruct.sub zeroes 0 (2 * Tar.Header.length) ] + in + let*? () = + write block git_dump.starting_lba [ Cstruct.sub zeroes 0 sector_size ] + in + let*? () = + write block md5s.starting_lba [ Cstruct.sub zeroes 0 sector_size ] + in + write block sha512s.starting_lba [ Cstruct.sub zeroes 0 sector_size ] end From 30266f4e09e805e0e51f2696ab1da1aa64f8053b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Thu, 10 Oct 2024 10:31:58 +0200 Subject: [PATCH 5/6] Fix sector alignment bug in formatting --- mirage/partitions.ml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/mirage/partitions.ml b/mirage/partitions.ml index 583a629..2c1d19f 100644 --- a/mirage/partitions.ml +++ b/mirage/partitions.ml @@ -175,15 +175,23 @@ module Make(BLOCK : Mirage_block.S) = struct write block 0L [ buf ] in (* Format the file systems by writing zeroes *) - let zeroes = Cstruct.create (max (2 * Tar.Header.length) sector_size) in + (* For tar we need to zero (at least) the first 2*512 bytes so we round up + to the nearest sector alignment *) + let zeroes = + let sectors = + (2 * Tar.Header.length + sector_size - 1) / sector_size * sector_size + in + Cstruct.create sectors in let*? () = - write block tar.starting_lba [ Cstruct.sub zeroes 0 (2 * Tar.Header.length) ] + write block tar.starting_lba [ zeroes ] + in + (* For the OneFFS filesystems we just need to zero out the first sector *) + let zero_sector = Cstruct.create sector_size in + let*? () = + write block git_dump.starting_lba [ zero_sector ] in let*? () = - write block git_dump.starting_lba [ Cstruct.sub zeroes 0 sector_size ] + write block md5s.starting_lba [ zero_sector ] in - let*? () = - write block md5s.starting_lba [ Cstruct.sub zeroes 0 sector_size ] - in - write block sha512s.starting_lba [ Cstruct.sub zeroes 0 sector_size ] + write block sha512s.starting_lba [ zero_sector ] end From 26643fbcde41625fa868b822370516b717a681d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reynir=20Bj=C3=B6rnsson?= Date: Wed, 16 Oct 2024 11:06:48 +0200 Subject: [PATCH 6/6] Add a message on successful --initialize-disk --- mirage/unikernel.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mirage/unikernel.ml b/mirage/unikernel.ml index 0e2809d..7201eba 100644 --- a/mirage/unikernel.ml +++ b/mirage/unikernel.ml @@ -1047,7 +1047,9 @@ stamp: %S and sectors_git = K.sectors_git () in if initialize_disk then Part.format block ~sectors_cache ~sectors_git >>= function - | Ok () -> Lwt.return_unit + | Ok () -> + Logs.app (fun m -> m "Successfully initialized the disk! You may restart now without --initialize-disk."); + Lwt.return_unit | Error `Msg e -> Logs.err (fun m -> m "Error formatting disk: %s" e); exit Mirage_runtime.argument_error