Merge pull request 'Implement a way to serialize the Git state' (#8) from store into main

Reviewed-on: https://git.robur.io/robur/git-kv/pulls/8
This commit is contained in:
Hannes Mehnert 2022-09-27 14:30:49 +00:00
commit d3263053dc
7 changed files with 443 additions and 26 deletions

4
app/dune Normal file
View file

@ -0,0 +1,4 @@
(executable
(name mgit)
(public_name mgit)
(libraries git-unix git-kv))

97
app/mgit.ml Normal file
View file

@ -0,0 +1,97 @@
let () = Printexc.record_backtrace true
open Rresult
open Lwt.Infix
let get ~quiet store key =
Git_kv.get store key >>= function
| Ok contents when not quiet ->
Fmt.pr "@[<hov>%a@]\n%!" (Hxd_string.pp Hxd.default) contents ;
Lwt.return (Ok 0)
| Ok _ -> Lwt.return (Ok 0)
| Error err ->
if not quiet then Fmt.epr "%a.\n%!" Git_kv.pp_error err ;
Lwt.return (Ok 1)
let list ~quiet store key =
Git_kv.list store key >>= function
| Ok lst when not quiet ->
List.iter (fun (name, k) -> match k with
| `Dictionary -> Fmt.pr "d %s\n%!" name
| `Value -> Fmt.pr "- %s\n%!" name) lst ;
Lwt.return (Ok 0)
| Ok _ -> Lwt.return (Ok 0)
| Error err ->
if not quiet then Fmt.epr "%a.\n%!" Git_kv.pp_error err ;
Lwt.return (Ok 1)
let pull ~quiet store =
Git_kv.pull store >>= function
| Error (`Msg err) -> if not quiet then Fmt.epr "%s.\n%!" err ; Lwt.return (Ok 1)
| Ok diff when not quiet ->
List.iter (function
| `Add key -> Fmt.pr "+ %a\n%!" Mirage_kv.Key.pp key
| `Remove key -> Fmt.pr "- %a\n%!" Mirage_kv.Key.pp key
| `Change key -> Fmt.pr "* %a\n%!" Mirage_kv.Key.pp key) diff ;
Lwt.return (Ok 0)
| Ok _ -> Lwt.return (Ok 0)
let save store filename =
let oc = open_out filename in
Git_kv.to_octets store >>= fun contents ->
output_string oc contents ;
close_out oc ;
Lwt.return (Ok 0)
let trim lst =
List.fold_left (fun acc -> function
| "" -> acc
| str -> str :: acc) [] lst |> List.rev
let with_key ~f key =
match Mirage_kv.Key.v key with
| key -> f key
| exception _ ->
Fmt.epr "Invalid key: %S.\n%!" key ;
Lwt.return (Ok 1)
let repl store ic =
let rec go () = Fmt.pr "# %!" ; match String.split_on_char ' ' (input_line ic) |> trim with
| [ "get"; key; ] ->
with_key ~f:(get ~quiet:false store) key >|= ignore >>= go
| [ "list"; key; ] ->
with_key ~f:(list ~quiet:false store) key >|= ignore >>= go
| [ "pull"; ] ->
Fmt.pr "\n%!" ; pull ~quiet:false store >|= ignore >>= go
| [ "quit"; ] -> Lwt.return ()
| [ "save"; filename ] ->
save store filename >|= ignore >>= fun _ ->
Fmt.pr "\n%!" ; go ()
| _ -> Fmt.epr "Invalid command.\n%!" ; go ()
| exception End_of_file -> Lwt.return () in
go ()
let run remote = function
| None ->
Lwt_main.run @@
(Git_unix.ctx (Happy_eyeballs_lwt.create ()) >>= fun ctx ->
Git_kv.connect ctx remote >>= fun t ->
repl t stdin)
| Some filename ->
let contents =
let ic = open_in filename in
let ln = in_channel_length ic in
let bs = Bytes.create ln in
really_input ic bs 0 ln ;
Bytes.unsafe_to_string bs in
Lwt_main.run
( Git_unix.ctx (Happy_eyeballs_lwt.create ()) >>= fun ctx ->
Git_kv.of_octets ctx ~remote contents >>= function
| Ok t -> repl t stdin
| Error (`Msg err) -> Fmt.failwith "%s." err )
let () = match Sys.argv with
| [| _; remote; |] -> run remote None
| [| _; remote; filename; |] when Sys.file_exists filename ->
run remote (Some filename)
| _ -> Fmt.epr "%s <remote> [<filename>]\n%!" Sys.argv.(0)

View file

@ -1,3 +1,4 @@
(lang dune 2.6) (lang dune 2.8)
(name git-kv) (name git-kv)
(formatting disabled) (formatting disabled)
(cram enable)

View file

@ -37,11 +37,13 @@ let split_url s =
Smart_git.Endpoint.of_string s |> to_invalid, main Smart_git.Endpoint.of_string s |> to_invalid, main
let fpath_to_key ~root v = let fpath_to_key ~root v =
Mirage_kv.Key.v (Fpath.to_string (Option.get (Fpath.relativize ~root v))) if Fpath.equal root v
then Mirage_kv.Key.empty
else Mirage_kv.Key.v (Fpath.to_string (Option.get (Fpath.relativize ~root v)))
let diff store commit0 commit1 = let diff store commit0 commit1 =
let open Lwt.Infix in let open Lwt.Infix in
let root = Fpath.v "." in let root = Fpath.v "./" in
let tbl0 = Hashtbl.create 0x10 in let tbl0 = Hashtbl.create 0x10 in
Store.fold store (fun () ?name ~length:_ hash _value -> Store.fold store (fun () ?name ~length:_ hash _value ->
Option.iter (fun name -> Hashtbl.add tbl0 (fpath_to_key ~root name) hash) name ; Option.iter (fun name -> Hashtbl.add tbl0 (fpath_to_key ~root name) hash) name ;
@ -50,12 +52,13 @@ let diff store commit0 commit1 =
Store.fold store (fun () ?name ~length:_ hash _value -> Store.fold store (fun () ?name ~length:_ hash _value ->
Option.iter (fun name -> Hashtbl.add tbl1 (fpath_to_key ~root name) hash) name ; Option.iter (fun name -> Hashtbl.add tbl1 (fpath_to_key ~root name) hash) name ;
Lwt.return ()) () ~path:root commit1 >>= fun () -> Lwt.return ()) () ~path:root commit1 >>= fun () ->
let diff = Hashtbl.fold (fun name hash diff -> match Hashtbl.find_opt tbl1 name with let diff = Hashtbl.fold (fun name hash diff ->
match Hashtbl.find_opt tbl1 name with
| Some hash' when not (Digestif.SHA1.equal hash hash') -> `Change name :: diff | Some hash' when not (Digestif.SHA1.equal hash hash') -> `Change name :: diff
| Some _ -> diff | Some _ -> diff
| None -> `Remove name :: diff) tbl0 [] in | None -> `Remove name :: diff) tbl0 [] in
let diff = Hashtbl.fold (fun name _hash diff -> let diff = Hashtbl.fold (fun name _hash diff ->
if Hashtbl.mem tbl0 name if not (Hashtbl.mem tbl0 name)
then `Add name :: diff else diff) tbl1 diff in then `Add name :: diff else diff) tbl1 diff in
Lwt.return diff Lwt.return diff
@ -115,29 +118,291 @@ let pp_error ppf = Mirage_kv.pp_error ppf
let disconnect _t = Lwt.return_unit let disconnect _t = Lwt.return_unit
let to_octets t = module SHA1 = struct
(* TODO maybe preserve edn and branch as well? *) include Digestif.SHA1
let hash x = Hashtbl.hash x
let feed ctx ?off ?len ba = feed_bigstring ctx ?off ?len ba
let null = digest_string ""
let length = digest_size
let compare a b =
String.compare
(to_raw_string a) (to_raw_string b)
end
module Verbose = struct
type 'a fiber = 'a Lwt.t
let succ _ = Lwt.return_unit
let print _ = Lwt.return_unit
end
module Lwt_scheduler = struct
module Mutex = struct
type 'a fiber = 'a Lwt.t
type t = Lwt_mutex.t
let create () = Lwt_mutex.create ()
let lock t = Lwt_mutex.lock t
let unlock t = Lwt_mutex.unlock t
end
module Condition = struct
type 'a fiber = 'a Lwt.t
type mutex = Mutex.t
type t = unit Lwt_condition.t
let create () = Lwt_condition.create ()
let wait t mutex = Lwt_condition.wait ~mutex t
let signal t = Lwt_condition.signal t ()
let broadcast t = Lwt_condition.broadcast t ()
end
type 'a t = 'a Lwt.t
let bind x f = Lwt.bind x f
let return x = Lwt.return x
let parallel_map ~f lst = Lwt_list.map_p f lst
let parallel_iter ~f lst = Lwt_list.iter_p f lst
let detach f =
let th, wk = Lwt.wait () in
Lwt.async (fun () ->
let res = f () in
Lwt.wakeup_later wk res ;
Lwt.return_unit) ;
th
end
module Scheduler = Carton.Make (Lwt)
module Delta = Carton_lwt.Enc.Delta (SHA1) (Verbose)
module First_pass = Carton.Dec.Fp (SHA1)
module Verify = Carton.Dec.Verify (SHA1) (Scheduler) (Lwt_scheduler)
let pack t ~commit stream =
let open Lwt.Infix in let open Lwt.Infix in
match t.head with let load t hash =
| None -> Lwt.return "" Store.read_inflated t hash >|= function
| Some head -> | None -> Fmt.failwith "%a not found" Digestif.SHA1.pp hash
Store.read_exn t.store head >|= function | Some (`Commit, cs) -> Carton.Dec.v ~kind:`A (Cstruct.to_bigarray cs)
| Commit c -> | Some (`Tree, cs) -> Carton.Dec.v ~kind:`B (Cstruct.to_bigarray cs)
let l = Encore.to_lavoisier Git_commit.format in | Some (`Blob, cs) -> Carton.Dec.v ~kind:`C (Cstruct.to_bigarray cs)
Encore.Lavoisier.emit_string c l | Some (`Tag, cs) -> Carton.Dec.v ~kind:`D (Cstruct.to_bigarray cs) in
| _ -> assert false let to_entry ~length hash = function
| Git.Value.Commit _ -> Carton_lwt.Enc.make_entry ~kind:`A ~length hash
| Git.Value.Tree _ -> Carton_lwt.Enc.make_entry ~kind:`B ~length hash
| Git.Value.Blob _ -> Carton_lwt.Enc.make_entry ~kind:`C ~length hash
| Git.Value.Tag _ -> Carton_lwt.Enc.make_entry ~kind:`D ~length hash in
Store.fold t (fun acc ?name:_ ~length hash value ->
Lwt.return ((to_entry ~length:(Int64.to_int length) hash value) :: acc))
~path:(Fpath.v ".") [] commit >|= Array.of_list >>= fun entries ->
Delta.delta ~threads:(List.init 4 (fun _ -> load t))
~weight:10 ~uid_ln:Digestif.SHA1.digest_size entries
>>= fun targets ->
let offsets = Hashtbl.create (Array.length targets) in
let find hash = Lwt.return (Option.map Int64.to_int (Hashtbl.find_opt offsets hash)) in
let uid =
{ Carton.Enc.uid_ln= SHA1.digest_size
; Carton.Enc.uid_rw= SHA1.to_raw_string } in
let b =
{ Carton.Enc.o= Bigstringaf.create De.io_buffer_size
; Carton.Enc.i= Bigstringaf.create De.io_buffer_size
; Carton.Enc.q= De.Queue.create 0x10000
; Carton.Enc.w= De.Lz77.make_window ~bits:15 } in
let ctx = ref SHA1.empty in
let cursor = ref 0L in
let header = Bigstringaf.create 12 in
Carton.Enc.header_of_pack ~length:(Array.length targets) header 0 12 ;
stream (Some (Bigstringaf.to_string header)) ;
ctx := SHA1.feed_bigstring !ctx header ~off:0 ~len:12 ;
cursor := Int64.add !cursor 12L ;
let encode_target idx =
Hashtbl.add offsets (Carton.Enc.target_uid targets.(idx)) !cursor ;
Carton_lwt.Enc.encode_target ~b ~find ~load:(load t) ~uid targets.(idx) ~cursor:(Int64.to_int !cursor)
>>= fun (len, encoder) ->
let payload = Bigstringaf.substring b.o ~off:0 ~len in
stream (Some payload) ;
ctx := SHA1.feed_bigstring !ctx b.o ~off:0 ~len ;
cursor := Int64.add !cursor (Int64.of_int len) ;
let rec go encoder = match Carton.Enc.N.encode ~o:b.o encoder with
| `Flush (encoder, len) ->
let payload = Bigstringaf.substring b.o ~off:0 ~len in
stream (Some payload) ;
ctx := SHA1.feed_bigstring !ctx b.o ~off:0 ~len ;
cursor := Int64.add !cursor (Int64.of_int len) ;
let encoder = Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) in
go encoder
| `End -> Lwt.return_unit in
let encoder = Carton.Enc.N.dst encoder b.o 0 (Bigstringaf.length b.o) in
go encoder in
let rec go idx =
if idx < Array.length targets
then encode_target idx >>= fun () -> go (succ idx)
else Lwt.return_unit in
go 0 >>= fun () ->
let hash = SHA1.get !ctx in
stream (Some (SHA1.to_raw_string hash)) ;
stream None ;
Lwt.return_unit
let to_octets t = match t.head with
| None -> assert false (* TODO(dinosaure): empty PACK file *)
| Some commit ->
let buf = Buffer.create 0x100 in
let stream = Option.iter (Buffer.add_string buf) in
let open Lwt.Infix in
pack t.store ~commit stream >|= fun () ->
Buffer.contents buf
let digest ~kind ?(off = 0) ?len buf =
let len =
match len with Some len -> len | None -> Bigstringaf.length buf - off in
let ctx = SHA1.empty in
let ctx =
match kind with
| `A -> SHA1.feed_string ctx (Fmt.str "commit %d\000" len)
| `B -> SHA1.feed_string ctx (Fmt.str "tree %d\000" len)
| `C -> SHA1.feed_string ctx (Fmt.str "blob %d\000" len)
| `D -> SHA1.feed_string ctx (Fmt.str "tag %d\000" len) in
let ctx = SHA1.feed_bigstring ctx ~off ~len buf in
SHA1.get ctx
let analyze stream =
let where = Hashtbl.create 0x100 in
let child = Hashtbl.create 0x100 in
let sizes = Hashtbl.create 0x100 in
let replace tbl k v = match Hashtbl.find_opt tbl k with
| Some v' -> if v' < v then Hashtbl.replace tbl k v
| _ -> Hashtbl.add tbl k v in
let rec go acc tmp decoder = let open Lwt.Infix in
match First_pass.decode decoder with
| `Await decoder ->
( stream () >>= function
| Some str ->
let tmp = Bigstringaf.of_string str ~off:0 ~len:(String.length str) in
go acc tmp (First_pass.src decoder tmp 0 (String.length str))
| None -> failwith "Truncated PACK file" )
| `Peek decoder ->
let keep = First_pass.src_rem decoder in
( stream () >>= function
| Some str ->
let tmp = Bigstringaf.create (keep + String.length str) in
Bigstringaf.blit tmp ~src_off:0 tmp ~dst_off:0 ~len:keep ;
Bigstringaf.blit_from_string str ~src_off:0 tmp ~dst_off:keep
~len:(String.length str) ;
go acc tmp (First_pass.src decoder tmp 0 (keep + String.length str))
| None -> failwith "Truncated PACK file" )
| `Entry ({ First_pass.kind= Base _; offset; size; _ }, decoder) ->
Hashtbl.add where offset (First_pass.count decoder - 1) ;
Hashtbl.add sizes offset size ;
go (Verify.unresolved_base ~cursor:offset :: acc) tmp decoder
| `Entry ({ First_pass.kind= Ofs { sub= v; source; target; }
; offset; _ }, decoder) ->
Hashtbl.add where offset (First_pass.count decoder - 1) ;
replace sizes Int64.(sub offset (of_int v)) source ;
replace sizes offset target ;
( try let vs = Hashtbl.find child (`Ofs Int64.(sub offset (of_int v))) in
Hashtbl.replace child (`Ofs Int64.(sub offset (of_int v))) (offset :: vs)
with _ -> Hashtbl.add child (`Ofs Int64.(sub offset (of_int v))) [ offset ] ) ;
go (Verify.unresolved_node :: acc) tmp decoder
| `Entry ({ First_pass.kind= Ref { ptr; target; source; }
; offset; _ }, decoder) ->
Hashtbl.add where offset (First_pass.count decoder - 1) ;
replace sizes offset (Stdlib.max target source) ;
( try let vs = Hashtbl.find child (`Ref ptr) in
Hashtbl.replace child (`Ref ptr) (offset :: vs)
with _ -> Hashtbl.add child (`Ref ptr) [ offset ] ) ;
go (Verify.unresolved_node :: acc) tmp decoder
| `End _hash ->
let where ~cursor = Hashtbl.find where cursor in
let children ~cursor ~uid =
match Hashtbl.find_opt child (`Ofs cursor),
Hashtbl.find_opt child (`Ref uid) with
| Some a, Some b -> List.sort_uniq compare (a @ b)
| Some x, None | None, Some x -> x
| None, None -> [] in
let weight ~cursor = Hashtbl.find sizes cursor in
let oracle = { Carton.Dec.where
; Carton.Dec.children
; Carton.Dec.digest
; Carton.Dec.weight } in
Lwt.return (List.rev acc, oracle)
| `Malformed err -> failwith err in
let o = Bigstringaf.create De.io_buffer_size in
let allocate _ = De.make_window ~bits:15 in
let decoder = First_pass.decoder ~o ~allocate `Manual in
let open Lwt.Infix in
go [] De.bigstring_empty decoder >>= fun (matrix, oracle) ->
Lwt.return (Array.of_list matrix, oracle)
let stream_of_string str =
let closed = ref false in
fun () -> match !closed with
| true -> Lwt.return_none
| false -> closed := true ; Lwt.return_some str
let map contents ~pos len =
let off = Int64.to_int pos in
let len = min (String.length contents - off) len in
Bigstringaf.of_string ~off:(Int64.to_int pos) ~len contents
let unpack contents =
let open Lwt.Infix in
analyze (stream_of_string contents) >>= fun (matrix, oracle) ->
let z = De.bigstring_create De.io_buffer_size in
let allocate bits = De.make_window ~bits in
let never _ = assert false in
let pack = Carton.Dec.make contents ~allocate ~z ~uid_ln:SHA1.length
~uid_rw:SHA1.of_raw_string never in
Verify.verify ~threads:4 pack ~map ~oracle ~verbose:ignore ~matrix >>= fun () ->
let index = Hashtbl.create (Array.length matrix) in
let iter v =
let offset = Verify.offset_of_status v in
let hash = Verify.uid_of_status v in
Hashtbl.add index hash offset in
Array.iter iter matrix ;
let pack =
Carton.Dec.make contents ~allocate ~z ~uid_ln:SHA1.length
~uid_rw:SHA1.of_raw_string (Hashtbl.find index) in
init_store ()
>|= Rresult.R.reword_error (Rresult.R.msgf "%a" Store.pp_error)
>|= Rresult.R.failwith_error_msg >>= fun store ->
let rec go commit idx =
if idx < Array.length matrix
then
let cursor = Verify.offset_of_status matrix.(idx) in
let weight = Carton.Dec.weight_of_offset ~map pack ~weight:Carton.Dec.null cursor in
let raw = Carton.Dec.make_raw ~weight in
let v = Carton.Dec.of_offset ~map pack raw ~cursor in
let kind = match Carton.Dec.kind v with
| `A -> `Commit
| `B -> `Tree
| `C -> `Blob
| `D -> `Tag in
Store.write_inflated store ~kind
(Cstruct.of_bigarray ~off:0 ~len:(Carton.Dec.len v) (Carton.Dec.raw v)) >>= fun hash ->
( if kind = `Commit
then Store.shallow store hash
else Lwt.return_unit ) >>= fun () ->
go (if kind = `Commit then Some hash else None) (succ idx)
else Lwt.return commit in
go None 0 >>= fun head -> Lwt.return (store, head)
let of_octets ctx ~remote data = let of_octets ctx ~remote data =
(* TODO maybe recover edn and branch from data as well? *) (* TODO maybe recover edn and branch from data as well? *)
let open Lwt_result.Infix in let open Lwt.Infix in
let l = Encore.to_angstrom Git_commit.format in Lwt.catch
Lwt.return (fun () ->
(Result.map_error (fun e -> `Msg e) unpack data >>= fun (store, head) ->
(Angstrom.parse_string ~consume:All l data)) >>= fun head -> let edn, branch = split_url remote in
let head = Git_commit.tree head in Lwt.return_ok { ctx ; edn ; branch ; store ; head; })
init_store () >|= fun store -> (fun exn ->
let edn, branch = split_url remote in Fmt.epr ">>> Got an exception: %s.\n%!" (Printexc.to_string exn) ;
{ ctx ; edn ; branch ; store ; head = Some head } Fmt.epr ">>> %s.\n%!"
(Printexc.raw_backtrace_to_string (Printexc.get_raw_backtrace ())) ;
Lwt.return_error (`Msg "Invalid PACK file"))
let exists t key = let exists t key =
let open Lwt.Infix in let open Lwt.Infix in
@ -164,7 +429,7 @@ let get t key =
| Blob b -> Ok (Git.Blob.to_string b) | Blob b -> Ok (Git.Blob.to_string b)
| _ -> Error (`Value_expected key) | _ -> Error (`Value_expected key)
let get_partial t key ~offset ~length = let _get_partial t key ~offset ~length =
let open Lwt_result.Infix in let open Lwt_result.Infix in
get t key >|= fun data -> get t key >|= fun data ->
if String.length data < offset then if String.length data < offset then
@ -221,7 +486,7 @@ let digest t key =
~some:(fun x -> Ok (Store.Hash.to_hex x)) ~some:(fun x -> Ok (Store.Hash.to_hex x))
t.head |> Lwt.return t.head |> Lwt.return
let size t key = let _size t key =
let open Lwt_result.Infix in let open Lwt_result.Infix in
get t key >|= fun data -> get t key >|= fun data ->
String.length data String.length data

1
src/pack.ml Normal file
View file

@ -0,0 +1 @@

3
test/dune Normal file
View file

@ -0,0 +1,3 @@
(cram
(package git-kv)
(deps %{bin:mgit}))

46
test/simple.t Normal file
View file

@ -0,0 +1,46 @@
Simple test of our Git Key-Value store
$ mkdir simple
$ cd simple
$ git init -q 2> /dev/null
$ git config init.defaultBranch main
$ git checkout -b main -q
$ git config user.email "romain@mirage.io"
$ git config user.name "Romain Calascibetta"
$ echo "Hello World!" > foo
$ git add foo
$ export DATE="2016-08-21 17:18:43 +0200"
$ export GIT_COMMITTER_DATE="2016-08-21 17:18:43 +0200"
$ git commit --date "$DATE" -q -m .
$ cd ..
$ git daemon --base-path=. --export-all --reuseaddr --pid-file=pid --detach
$ mgit git://localhost/simple <<EOF
> get /foo
> save db.pack
> quit
# 00000000: 4865 6c6c 6f20 576f 726c 6421 0a Hello World!.
#
#
$ tail -c20 db.pack | base64 -
5LI0Ny5+PX6FCDkSPYcRzZlCcUc=
$ mgit git://localhost/simple db.pack <<EOF
> get /foo
> quit
# 00000000: 4865 6c6c 6f20 576f 726c 6421 0a Hello World!.
#
$ cd simple
$ echo "Git rocks!" > bar
$ git add bar
$ git commit --date "$DATE" -q -m .
$ cd ..
$ mgit git://localhost/simple db.pack <<EOF
> pull
> get /bar
> get /foo
> quit
#
+ /bar
* /
# 00000000: 4769 7420 726f 636b 7321 0a Git rocks!.
# 00000000: 4865 6c6c 6f20 576f 726c 6421 0a Hello World!.
#
$ kill $(cat pid)