From d1cfefb53b23d837ef158e67024ca81d9bc4502c Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Sat, 22 Oct 2022 00:17:56 +0200 Subject: [PATCH 01/11] Implement {set,remove,rename}_and_push and delete push function All of these actions "shallow" the last commit. A subsequent `push` will do nothing due to that incapacity to walk through the history due to the shallowed commit. To be able to push the last change, we must provide {set,remove,rename} functions with an explicit call to push **before** the "shallow". --- src/git_kv.ml | 46 +++++++++++++++++++++++++++++++--------------- src/git_kv.mli | 14 ++++---------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index 8b391e1..4dab651 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -92,12 +92,6 @@ let pull t = t.head <- Some head; Lwt.return (Ok diff) -let push t = - let open Lwt.Infix in - Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] - >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" - Git.Reference.pp t.branch Sync.pp_error err)) - let connect ctx endpoint = let open Lwt.Infix in init_store () >>= fun store -> @@ -486,7 +480,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct unroll_tree t ?head (name, hash) rest | _ -> assert false ) - let set t key contents = + let set ?(push= false) t key contents = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match segs with @@ -504,6 +498,11 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Lwt.Infix.(if push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + else Lwt.return_ok ()) >>= fun () -> Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () @@ -513,9 +512,9 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | `Msg err -> `Msg err | err -> Rresult.R.msgf "%a" Store.pp_error err - let set t key contents = + let set ?push t key contents = let open Lwt.Infix in - set t key contents >|= Rresult.R.reword_error to_write_error + set ?push t key contents >|= Rresult.R.reword_error to_write_error let set_partial t key ~offset chunk = let open Lwt_result.Infix in @@ -529,7 +528,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let batch t ?retries:_ f = f t - let remove t key = + let remove ?(push= false) t key = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match List.rev segs, t.head with @@ -548,6 +547,11 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Lwt.Infix.(if push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + else Lwt.return_ok ()) >>= fun () -> Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | name :: pred_name :: rest, Some head -> @@ -565,18 +569,30 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Lwt.Infix.(if push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + else Lwt.return_ok ()) >>= fun () -> Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | _ -> Lwt.return_ok () - let remove t key = + let remove ?push t key = let open Lwt.Infix in - remove t key >|= Rresult.R.reword_error to_write_error + remove ?push t key >|= Rresult.R.reword_error to_write_error - let rename t ~source ~dest = + let rename ?(push= false) t ~source ~dest = (* TODO(dinosaure): optimize it! It was done on the naive way. *) let open Lwt_result.Infix in get t source >>= fun contents -> - remove t source >>= fun () -> - set t dest contents + remove ~push t source >>= fun () -> + set ~push t dest contents + + let set_and_push t k v = set ~push:true t k v + let set t k v = set ~push:false t k v + let remove_and_push t k = remove ~push:true t k + let remove t k = remove ~push:false t k + let rename_and_push t ~source ~dest = rename ~push:true t ~source ~dest + let rename t ~source ~dest = rename ~push:false t ~source ~dest end diff --git a/src/git_kv.mli b/src/git_kv.mli index 55fd4cb..e6019f2 100644 --- a/src/git_kv.mli +++ b/src/git_kv.mli @@ -51,16 +51,6 @@ val pull : t -> (change list, [> `Msg of string ]) result Lwt.t [store] Git repository. It returns a list of changes between the old state of your store and what you have remotely. *) -val push : t -> (unit, [> `Msg of string ]) result Lwt.t -(** [push store] tries to push any changes from your local Git repository - [store] to the remoe Git repository. The [push] function can fails for many - reasons. Currently, we don't handle merge politics and how we can resolve - conflicts between local and remote Git repositories. That mostly means that - if you are the only one who push to the Git repository (into a specific - branch), everything should be fine. But, if someone else push into the same - remote Git repository, your change can be discarded by the remote server - (due to conflicts). *) - module Make (Pclock : Mirage_clock.PCLOCK) : sig include Mirage_kv.RW with type t = t @@ -68,4 +58,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) : sig | `Hash_not_found of Digestif.SHA1.t | `Reference_not_found of Git.Reference.t | Mirage_kv.write_error ] + + val set_and_push : t -> key -> string -> (unit, write_error) result Lwt.t + val remove_and_push : t -> key -> (unit, write_error) result Lwt.t + val rename_and_push : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t end From bc99b9d25f1c46564b1b12bfde9f2a402e8d910c Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Sat, 22 Oct 2022 00:35:41 +0200 Subject: [PATCH 02/11] And don't forget to set the reference with the last commit --- src/git_kv.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/git_kv.ml b/src/git_kv.ml index 4dab651..b16b2b3 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -498,6 +498,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> Lwt.Infix.(if push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" @@ -547,6 +548,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> Lwt.Infix.(if push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" @@ -569,6 +571,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> Lwt.Infix.(if push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" From d49a406691d207e099d0cbd68624addf51e80519 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Sat, 22 Oct 2022 00:46:33 +0200 Subject: [PATCH 03/11] Fix set and remove function --- src/git_kv.ml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index b16b2b3..77f6aed 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -447,7 +447,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct ; email= "git@mirage.io" ; date= now (), None } - let rec unroll_tree t ?head (pred_name, pred_hash) rpath = + let rec unroll_tree t ?head (pred_perm, pred_name, pred_hash) rpath = let open Lwt.Infix in let ( >>? ) = Lwt_result.bind in let ( >>! ) x f = match x with @@ -457,27 +457,27 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | [] -> ( match head with | None -> - let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in + let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash | Some head -> Search.find t.store head (`Commit (`Path [])) >|= Option.get >>= fun tree_root_hash -> ( Store.read_exn t.store tree_root_hash >>= function | Git.Value.Tree tree -> - let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in + let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash | _ -> assert false ) ) | name :: rest -> (head >>! fun head -> Search.find t.store head (`Commit (`Path (List.rev rpath)))) >>= function | None -> - let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in + let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> - unroll_tree t ?head (name, hash) rest + unroll_tree t ?head (`Dir, name, hash) rest | Some tree_hash -> ( Store.read_exn t.store tree_hash >>= function | Git.Value.Tree tree -> - let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in + let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> - unroll_tree t ?head (name, hash) rest + unroll_tree t ?head (`Dir, name, hash) rest | _ -> assert false ) let set ?(push= false) t key contents = @@ -491,7 +491,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let name = List.hd rpath in let open Lwt_result.Infix in Store.write t.store (Git.Value.Blob blob) >>= fun (hash, _) -> - unroll_tree t ?head:t.head (name, hash) (List.tl rpath) >>= fun tree_root_hash -> + unroll_tree t ?head:t.head (`Normal, name, hash) (List.tl rpath) >>= fun tree_root_hash -> let committer = author ~now in let author = author ~now in let parents = Option.value ~default:[] (Option.map (fun head -> [ head ]) t.head) in @@ -565,7 +565,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let tree = Git.Tree.remove ~name tree in let open Lwt_result.Infix in Store.write t.store (Git.Value.Tree tree) >>= fun (pred_hash, _) -> - unroll_tree t ~head (pred_name, pred_hash) rest >>= fun tree_root_hash -> + unroll_tree t ~head (`Dir, pred_name, pred_hash) rest >>= fun tree_root_hash -> let committer = author ~now in let author = author ~now in let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer From 30d5ad365aa2fe2cafd2f8e7350638fc678579a6 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Mon, 24 Oct 2022 10:44:21 +0200 Subject: [PATCH 04/11] Complete the implementation with push and improve the documentation --- src/git_kv.ml | 6 ++++-- src/git_kv.mli | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index 77f6aed..0003cb9 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -517,7 +517,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let open Lwt.Infix in set ?push t key contents >|= Rresult.R.reword_error to_write_error - let set_partial t key ~offset chunk = + let set_partial ?push t key ~offset chunk = let open Lwt_result.Infix in get t key >>= fun contents -> let len = String.length contents in @@ -525,7 +525,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let res = Bytes.make (max len (offset + add)) '\000' in Bytes.blit_string contents 0 res 0 len ; Bytes.blit_string chunk 0 res offset add ; - set t key (Bytes.unsafe_to_string res) + set ?push t key (Bytes.unsafe_to_string res) let batch t ?retries:_ f = f t @@ -592,6 +592,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct remove ~push t source >>= fun () -> set ~push t dest contents + let set_partial_and_push t k ~offset v = set_partial ~push:true t k ~offset v + let set_partial t k ~offset v = set_partial ~push:false t k ~offset v let set_and_push t k v = set ~push:true t k v let set t k v = set ~push:false t k v let remove_and_push t k = remove ~push:true t k diff --git a/src/git_kv.mli b/src/git_kv.mli index e6019f2..39163a0 100644 --- a/src/git_kv.mli +++ b/src/git_kv.mli @@ -3,7 +3,7 @@ This module implements the ability to manipulate a Git repository as a Key-Value store. It allows you to create a local (in-memory) Git repository that can come from either: - - a remote Git repository + - a remote Git repository (with {!val:connect}) - a state serialized by the {!val:to_octets} function The first case is interesting if you want to be synchronised with the @@ -13,20 +13,21 @@ In the second case, the synchronisation can be done later with {!val:pull}. - As far as {!val:push} is concerned, a synchronisation with the remote - repository is necessary before {b changing} and sending the new information - (a use of {!val:Make.set}/{!val:Make.rename} should be preceded by a - {!val:pull}). This is because we do not handle conflicts that may exist - between your local repository and the remote repository - in other words, - if you want to ensure consistency between reading ({!val:pull}) and writing - ({!val:push}) to a remote repository, the instance that uses this code - should be the only one to handle said remote repository. *) + The user can modify the repository (add files, remove files, etc.). He can + do this locally and thus assume a possible desynchronisation between the + remote repository and what exists locally or he can share these changes + with the remote repository with [{set,rename,remove}_and_push]. + + In the latter case, the notion of {i merge} and conflicts is not handled by + our implementation. This means that if the remote repository is manipulated + by another instance than yours, it can result in conflicts that will make + the above functions fail. *) type t (** The type of the Git store. *) val connect : Mimic.ctx -> string -> t Lwt.t -(** [connect ctx remote] creates a new Git store which synchronizes +(** [connect ctx remote] creates a new Git store which synchronises with [remote] {i via} protocols available into the given [ctx]. @raise [Invalid_argument _] if we can not initialize the store, or if @@ -47,7 +48,7 @@ type change = [ `Add of Mirage_kv.Key.t | `Change of Mirage_kv.Key.t ] val pull : t -> (change list, [> `Msg of string ]) result Lwt.t -(** [pull store] tries to synchronize the remote Git repository with your local +(** [pull store] tries to synchronise the remote Git repository with your local [store] Git repository. It returns a list of changes between the old state of your store and what you have remotely. *) @@ -60,6 +61,23 @@ module Make (Pclock : Mirage_clock.PCLOCK) : sig | Mirage_kv.write_error ] val set_and_push : t -> key -> string -> (unit, write_error) result Lwt.t + (** [set_and_push t k v] replaces the binding [k -> v] in [t] and pushes this + modification to the remote repository. This function can fail on the + synchronisation mechanism if the remote Git repository is {i in advance} + of your local repository. It is advisable to use {!val:pull} before. *) + val remove_and_push : t -> key -> (unit, write_error) result Lwt.t + (** Same as {!val:remove} with the synchronisation mechanism, see + {!val:set_and_push} for more details about possible failures and how to + use it. *) + val rename_and_push : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t + (** Same as {!val:rename} with the synchronisation mechanism, see + {!val:rename_and_push} for more details about possible failures and how + to use it. *) + + val set_partial_and_push : t -> key -> offset:int -> string -> (unit, write_error) result Lwt.t + (** Same as {!val:set_partial} with the synchronisation mechanism, see + {!val:set_and_push} for more details about possible failures and how to + use it. *) end From 6d6b0bd9c703c73f129f74180b41d75225ab5815 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Mon, 24 Oct 2022 11:06:44 +0200 Subject: [PATCH 05/11] Add README.md --- README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/README.md b/README.md index e69de29..e9342ea 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,66 @@ +# Git-kv, a simple Key-Value store synchronized with a Git repository + +This library is a simple implementation of a Git repository that can be read +and/or modified. It offers two ways to create such a local repository: +1) The local repository can be created in a serialized state +2) The local repository can be created from a remote repository + +The first method has the advantage of not requiring an internet connection. The +serialized state can be created with the `mgit` tool: +```sh +$ mgit https://github.com/mirage/mirage < save db.pack +> quit +$ ls db.pack +db.pack +``` + +The disadvantage is that the serialized state may be out of sync with the state +of the remote repository. In this case, the user has access to the `pull` +function, which allows the internet state of the local repository to be +re-synchronised with the remote repository. +```ocaml +let contents_of_file filename = + let ic = open_in filename in + let ln = in_channel_length ic in + let rs = Bytes.create ln in + really_input ic rs 0 ln ; + Bytes.unsafe_to_string rs + +let _ = + Git_kv.of_octets ctx + ~remote:"git@github.com:mirage/mirage.git" + (contents_of_file "db.pack") >>= fun t -> + Git_kv.pull t >>= fun diff -> + ... +``` + +The second method initiates a connection to the remote repository in order to +download its state and reproduce a synchronised internal state. The type of +connections supported are described in the given `ctx`. We recommend the +tutorial about [Mimic][mimic] to understand its use. +```sh +let _ = + Git_kv.connect ctx "git@github.com:mirage/mirage.git" >>= fun t -> + ... +``` + +The user can manipulate the repository as an [RW][mirage-kv-rw] repository. Any +change to the repository requires a new commit. These changes can be sent to +the remote repository by deriving manipulation functions with the `and_push` +suffix: +```ocaml +module Store = Git_kv.Make (Pclock) + +let new_file_locally t = + Store.set t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () -> + ... + +let new_file_locally_and_remotely t = + Git_kv.pull t >>= fun _diff -> + Store.set_and_push t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () -> + ... +``` + +[mimic]: https://dinosaure.github.io/mimic/mimic/index.html +[mirage-kv-rw]: https://github.com/mirage/mirage-kv From 98bd2bfe3ce8721113e27c3d9238311c0ea7cff0 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Thu, 27 Oct 2022 16:29:12 +0200 Subject: [PATCH 06/11] Implement correctly a batch operation (which will push at the end of the given closure and provide a Local sub-module to be able to manipulate the Git repository without connections --- src/git_kv.ml | 72 ++++++++++++++++++++++++++++++++------------------ src/git_kv.mli | 26 +++++------------- 2 files changed, 52 insertions(+), 46 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index 0003cb9..1271368 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -8,6 +8,7 @@ type t = ; edn : Smart_git.Endpoint.t ; branch : Git.Reference.t ; store : Store.t + ; mutable batch : bool ; mutable head : Store.hash option } let init_store () = @@ -97,7 +98,7 @@ let connect ctx endpoint = init_store () >>= fun store -> let store = to_invalid store in let edn, branch = split_url endpoint in - let t = { ctx ; edn ; branch ; store ; head = None } in + let t = { ctx ; edn ; branch ; store ; batch= false; head = None } in pull t >>= fun r -> let _r = to_invalid r in Lwt.return t @@ -330,7 +331,7 @@ let of_octets ctx ~remote data = >|= Rresult.R.failwith_error_msg >>= fun store -> analyze store data >>= fun head -> let edn, branch = split_url remote in - Lwt.return_ok { ctx ; edn ; branch ; store ; head; }) + Lwt.return_ok { ctx ; edn ; branch ; store ; batch= false; head; }) (fun exn -> Fmt.epr ">>> Got an exception: %s.\n%!" (Printexc.to_string exn) ; Fmt.epr ">>> %s.\n%!" @@ -480,7 +481,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct unroll_tree t ?head (`Dir, name, hash) rest | _ -> assert false ) - let set ?(push= false) t key contents = + let set ~and_push t key contents = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match segs with @@ -499,7 +500,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct ~parents (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> - Lwt.Infix.(if push then + Lwt.Infix.(if and_push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) @@ -513,23 +514,23 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | `Msg err -> `Msg err | err -> Rresult.R.msgf "%a" Store.pp_error err - let set ?push t key contents = + let set ?(and_push= true) t key contents = let open Lwt.Infix in - set ?push t key contents >|= Rresult.R.reword_error to_write_error + let and_push = not t.batch && and_push in + set ~and_push t key contents >|= Rresult.R.reword_error to_write_error - let set_partial ?push t key ~offset chunk = + let set_partial ?(and_push= true) t key ~offset chunk = let open Lwt_result.Infix in + let and_push = not t.batch && and_push in get t key >>= fun contents -> let len = String.length contents in let add = String.length chunk in let res = Bytes.make (max len (offset + add)) '\000' in Bytes.blit_string contents 0 res 0 len ; Bytes.blit_string chunk 0 res offset add ; - set ?push t key (Bytes.unsafe_to_string res) + set ~and_push t key (Bytes.unsafe_to_string res) - let batch t ?retries:_ f = f t - - let remove ?(push= false) t key = + let remove ~and_push t key = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match List.rev segs, t.head with @@ -549,7 +550,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> - Lwt.Infix.(if push then + Lwt.Infix.(if and_push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) @@ -572,7 +573,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> - Lwt.Infix.(if push then + Lwt.Infix.(if and_push then Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) @@ -581,23 +582,42 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct t.head <- Some hash ; Lwt.return_ok () | _ -> Lwt.return_ok () - let remove ?push t key = + let remove ?(and_push= true) t key = let open Lwt.Infix in - remove ?push t key >|= Rresult.R.reword_error to_write_error + let and_push = not t.batch && and_push in + remove ~and_push t key >|= Rresult.R.reword_error to_write_error - let rename ?(push= false) t ~source ~dest = + let rename ?(and_push= true) t ~source ~dest = (* TODO(dinosaure): optimize it! It was done on the naive way. *) let open Lwt_result.Infix in get t source >>= fun contents -> - remove ~push t source >>= fun () -> - set ~push t dest contents + remove ~and_push t source >>= fun () -> + set ~and_push t dest contents - let set_partial_and_push t k ~offset v = set_partial ~push:true t k ~offset v - let set_partial t k ~offset v = set_partial ~push:false t k ~offset v - let set_and_push t k v = set ~push:true t k v - let set t k v = set ~push:false t k v - let remove_and_push t k = remove ~push:true t k - let remove t k = remove ~push:false t k - let rename_and_push t ~source ~dest = rename ~push:true t ~source ~dest - let rename t ~source ~dest = rename ~push:false t ~source ~dest + let batch t ?(retries= 3) f = + let open Lwt.Infix in + if retries < 0 + then Fmt.invalid_arg "Git_kv.Make.batch: retries must be equal or greater than 0" ; + t.batch <- true ; + f t >>= fun res -> + let rec force_push limit = + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function + | Ok () -> Lwt.return_unit + | Error _ when limit > 0 -> force_push (pred limit) + | Error err -> + Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err in + force_push retries >>= fun () -> + t.batch <- false ; Lwt.return res + + module Local = struct + let set_partial t k ~offset v = set_partial ~and_push:false t k ~offset v + let set t k v = set ~and_push:false t k v + let remove t k = remove ~and_push:false t k + let rename t ~source ~dest = rename ~and_push:false t ~source ~dest + end + + let set_partial t k ~offset v = set_partial ~and_push:true t k ~offset v + let set t k v = set ~and_push:true t k v + let remove t k = remove ~and_push:true t k + let rename t ~source ~dest = rename ~and_push:true t ~source ~dest end diff --git a/src/git_kv.mli b/src/git_kv.mli index 39163a0..42d62ee 100644 --- a/src/git_kv.mli +++ b/src/git_kv.mli @@ -60,24 +60,10 @@ module Make (Pclock : Mirage_clock.PCLOCK) : sig | `Reference_not_found of Git.Reference.t | Mirage_kv.write_error ] - val set_and_push : t -> key -> string -> (unit, write_error) result Lwt.t - (** [set_and_push t k v] replaces the binding [k -> v] in [t] and pushes this - modification to the remote repository. This function can fail on the - synchronisation mechanism if the remote Git repository is {i in advance} - of your local repository. It is advisable to use {!val:pull} before. *) - - val remove_and_push : t -> key -> (unit, write_error) result Lwt.t - (** Same as {!val:remove} with the synchronisation mechanism, see - {!val:set_and_push} for more details about possible failures and how to - use it. *) - - val rename_and_push : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t - (** Same as {!val:rename} with the synchronisation mechanism, see - {!val:rename_and_push} for more details about possible failures and how - to use it. *) - - val set_partial_and_push : t -> key -> offset:int -> string -> (unit, write_error) result Lwt.t - (** Same as {!val:set_partial} with the synchronisation mechanism, see - {!val:set_and_push} for more details about possible failures and how to - use it. *) + module Local : sig + val set : t -> key -> string -> (unit, write_error) result Lwt.t + val remove : t -> key -> (unit, write_error) result Lwt.t + val rename : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t + val set_partial : t -> key -> offset:int -> string -> (unit, write_error) result Lwt.t + end end From 934ed7d960cec5d6e862dbbf2742fa195c2bad44 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Fri, 28 Oct 2022 10:49:59 +0200 Subject: [PATCH 07/11] Serialize sequence of batches operations --- src/git_kv.ml | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index 1271368..89ba555 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -8,7 +8,7 @@ type t = ; edn : Smart_git.Endpoint.t ; branch : Git.Reference.t ; store : Store.t - ; mutable batch : bool + ; mutable batch : unit Lwt.t option ; mutable head : Store.hash option } let init_store () = @@ -98,7 +98,7 @@ let connect ctx endpoint = init_store () >>= fun store -> let store = to_invalid store in let edn, branch = split_url endpoint in - let t = { ctx ; edn ; branch ; store ; batch= false; head = None } in + let t = { ctx ; edn ; branch ; store ; batch= None; head= None } in pull t >>= fun r -> let _r = to_invalid r in Lwt.return t @@ -331,7 +331,7 @@ let of_octets ctx ~remote data = >|= Rresult.R.failwith_error_msg >>= fun store -> analyze store data >>= fun head -> let edn, branch = split_url remote in - Lwt.return_ok { ctx ; edn ; branch ; store ; batch= false; head; }) + Lwt.return_ok { ctx ; edn ; branch ; store ; batch= None; head; }) (fun exn -> Fmt.epr ">>> Got an exception: %s.\n%!" (Printexc.to_string exn) ; Fmt.epr ">>> %s.\n%!" @@ -480,6 +480,12 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> unroll_tree t ?head (`Dir, name, hash) rest | _ -> assert false ) + + let no_batch = function + | None -> true + | Some th -> match Lwt.state th with + | Sleep -> true + | Return _ | Fail _ -> false let set ~and_push t key contents = let segs = Mirage_kv.Key.segments key in @@ -516,12 +522,12 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let set ?(and_push= true) t key contents = let open Lwt.Infix in - let and_push = not t.batch && and_push in + let and_push = no_batch t.batch && and_push in set ~and_push t key contents >|= Rresult.R.reword_error to_write_error let set_partial ?(and_push= true) t key ~offset chunk = let open Lwt_result.Infix in - let and_push = not t.batch && and_push in + let and_push = no_batch t.batch && and_push in get t key >>= fun contents -> let len = String.length contents in let add = String.length chunk in @@ -584,7 +590,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let remove ?(and_push= true) t key = let open Lwt.Infix in - let and_push = not t.batch && and_push in + let and_push = no_batch t.batch && and_push in remove ~and_push t key >|= Rresult.R.reword_error to_write_error let rename ?(and_push= true) t ~source ~dest = @@ -598,7 +604,11 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let open Lwt.Infix in if retries < 0 then Fmt.invalid_arg "Git_kv.Make.batch: retries must be equal or greater than 0" ; - t.batch <- true ; + ( match t.batch with + | None -> Lwt.return_unit + | Some th -> th ) >>= fun () -> + let th, wk = Lwt.wait () in + t.batch <- Some th ; f t >>= fun res -> let rec force_push limit = Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function @@ -607,7 +617,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | Error err -> Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err in force_push retries >>= fun () -> - t.batch <- false ; Lwt.return res + Lwt.wakeup_later wk () ; + Lwt.return res module Local = struct let set_partial t k ~offset v = set_partial ~and_push:false t k ~offset v From d7765ff6aab38fd8a665f09b93b709c298caa987 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Fri, 28 Oct 2022 10:59:27 +0200 Subject: [PATCH 08/11] Shallow only when we push --- src/git_kv.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index 89ba555..eead4a8 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -510,8 +510,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) + >>= Store.shallow t.store hash >|= Result.ok else Lwt.return_ok ()) >>= fun () -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () let to_write_error (error : Store.error) = match error with @@ -560,8 +560,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) + >>= Store.shallow t.store hash >|= Result.ok else Lwt.return_ok ()) >>= fun () -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | name :: pred_name :: rest, Some head -> let open Lwt.Infix in @@ -583,8 +583,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) + >>= Store.shallow t.store hash >|= Result.ok else Lwt.return_ok ()) >>= fun () -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | _ -> Lwt.return_ok () From 5aa887f48f687cb7f35a1b0ae271fe067ad8e2c0 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Fri, 28 Oct 2022 11:02:49 +0200 Subject: [PATCH 09/11] Be sure to shallow to our last commit after a push from batch --- src/git_kv.ml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index eead4a8..a433a7c 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -486,6 +486,8 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | Some th -> match Lwt.state th with | Sleep -> true | Return _ | Fail _ -> false + + let ( >>? ) = Lwt_result.bind let set ~and_push t key contents = let segs = Mirage_kv.Key.segments key in @@ -510,7 +512,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) - >>= Store.shallow t.store hash >|= Result.ok + >>? fun () -> (Store.shallow t.store hash >|= Result.ok) else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () @@ -560,7 +562,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) - >>= Store.shallow t.store hash >|= Result.ok + >>? fun () -> Store.shallow t.store hash >|= Result.ok else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | name :: pred_name :: rest, Some head -> @@ -583,7 +585,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err)) - >>= Store.shallow t.store hash >|= Result.ok + >>? fun () -> Store.shallow t.store hash >|= Result.ok else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | _ -> Lwt.return_ok () @@ -612,7 +614,10 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct f t >>= fun res -> let rec force_push limit = Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function - | Ok () -> Lwt.return_unit + | Ok () -> + ( match t.head with + | None -> Lwt.return_unit + | Some hash -> Store.shallow t.store hash ) | Error _ when limit > 0 -> force_push (pred limit) | Error err -> Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err in From 6bf4dc7f07d7732cf1d8b78b60d3b1645f23cfc1 Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Fri, 28 Oct 2022 11:56:11 +0200 Subject: [PATCH 10/11] Improve the documentation --- src/git_kv.mli | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/git_kv.mli b/src/git_kv.mli index 42d62ee..13837b6 100644 --- a/src/git_kv.mli +++ b/src/git_kv.mli @@ -13,15 +13,39 @@ In the second case, the synchronisation can be done later with {!val:pull}. - The user can modify the repository (add files, remove files, etc.). He can - do this locally and thus assume a possible desynchronisation between the - remote repository and what exists locally or he can share these changes - with the remote repository with [{set,rename,remove}_and_push]. + {2: Pushing and synchronisation.} + + The user can modify the repository (add files, remove files, etc.). They + can do this locally (with the {!module:Make.Local} module) and thus assume + a possible desynchronisation between the remote repository and what exists + locally or they can share these changes with the remote repository (default + behavior). In the latter case, the notion of {i merge} and conflicts is not handled by our implementation. This means that if the remote repository is manipulated by another instance than yours, it can result in conflicts that will make - the above functions fail. *) + the above functions fail. + + The only check done by the remote Git repository when you want to submit + your change is the ability to find a path between a commit available + remotely, the commit-graph given by the transmission, and your last commit. + In that way, our [push] is most similar to a [git push --force]! + + To save I/O, the {!val:Make.batch} operation allows you to do some change + into a closure and the implementation will push only at the end of this + closure. By this way, you can {!val:Make.set}, {!val:Make.rename} or + {!val:Make.remove} without a systematic [push] on these actions. Only one + will be done at the end of your closure. + + {2: Serialization of the Git repository.} + + Finally, the KV-store tries to keep the minimal set of commits required + between you and the remote repository. In other words, only {i un}pushed + changes are kept by the KV-store. However, if these changes are not pushed, + they will be stored into the final state produced by {!val:to_octets}. In + other words, the more changes you make out of sync with the remote + repository (without pushing them), the bigger the state serialization will + be. *) type t (** The type of the Git store. *) From 993db937f8f5885ffe7d7f64508b7c2fb2c4248e Mon Sep 17 00:00:00 2001 From: Romain Calascibetta Date: Fri, 28 Oct 2022 14:14:40 +0200 Subject: [PATCH 11/11] Don't retry if push fail into the batch function --- src/git_kv.ml | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/git_kv.ml b/src/git_kv.ml index a433a7c..ccc7fb9 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -602,26 +602,21 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct remove ~and_push t source >>= fun () -> set ~and_push t dest contents - let batch t ?(retries= 3) f = + let batch t ?retries:_ f = let open Lwt.Infix in - if retries < 0 - then Fmt.invalid_arg "Git_kv.Make.batch: retries must be equal or greater than 0" ; ( match t.batch with | None -> Lwt.return_unit | Some th -> th ) >>= fun () -> let th, wk = Lwt.wait () in t.batch <- Some th ; f t >>= fun res -> - let rec force_push limit = - Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function - | Ok () -> - ( match t.head with - | None -> Lwt.return_unit - | Some hash -> Store.shallow t.store hash ) - | Error _ when limit > 0 -> force_push (pred limit) - | Error err -> - Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err in - force_push retries >>= fun () -> + ( Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function + | Ok () -> + ( match t.head with + | None -> Lwt.return_unit + | Some hash -> Store.shallow t.store hash ) + | Error err -> + Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err ) >>= fun () -> Lwt.wakeup_later wk () ; Lwt.return res