diff --git a/README.md b/README.md index e69de29..e9342ea 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,66 @@ +# Git-kv, a simple Key-Value store synchronized with a Git repository + +This library is a simple implementation of a Git repository that can be read +and/or modified. It offers two ways to create such a local repository: +1) The local repository can be created in a serialized state +2) The local repository can be created from a remote repository + +The first method has the advantage of not requiring an internet connection. The +serialized state can be created with the `mgit` tool: +```sh +$ mgit https://github.com/mirage/mirage < save db.pack +> quit +$ ls db.pack +db.pack +``` + +The disadvantage is that the serialized state may be out of sync with the state +of the remote repository. In this case, the user has access to the `pull` +function, which allows the internet state of the local repository to be +re-synchronised with the remote repository. +```ocaml +let contents_of_file filename = + let ic = open_in filename in + let ln = in_channel_length ic in + let rs = Bytes.create ln in + really_input ic rs 0 ln ; + Bytes.unsafe_to_string rs + +let _ = + Git_kv.of_octets ctx + ~remote:"git@github.com:mirage/mirage.git" + (contents_of_file "db.pack") >>= fun t -> + Git_kv.pull t >>= fun diff -> + ... +``` + +The second method initiates a connection to the remote repository in order to +download its state and reproduce a synchronised internal state. The type of +connections supported are described in the given `ctx`. We recommend the +tutorial about [Mimic][mimic] to understand its use. +```sh +let _ = + Git_kv.connect ctx "git@github.com:mirage/mirage.git" >>= fun t -> + ... +``` + +The user can manipulate the repository as an [RW][mirage-kv-rw] repository. Any +change to the repository requires a new commit. These changes can be sent to +the remote repository by deriving manipulation functions with the `and_push` +suffix: +```ocaml +module Store = Git_kv.Make (Pclock) + +let new_file_locally t = + Store.set t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () -> + ... + +let new_file_locally_and_remotely t = + Git_kv.pull t >>= fun _diff -> + Store.set_and_push t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () -> + ... +``` + +[mimic]: https://dinosaure.github.io/mimic/mimic/index.html +[mirage-kv-rw]: https://github.com/mirage/mirage-kv diff --git a/src/git_kv.ml b/src/git_kv.ml index 8b391e1..ccc7fb9 100644 --- a/src/git_kv.ml +++ b/src/git_kv.ml @@ -8,6 +8,7 @@ type t = ; edn : Smart_git.Endpoint.t ; branch : Git.Reference.t ; store : Store.t + ; mutable batch : unit Lwt.t option ; mutable head : Store.hash option } let init_store () = @@ -92,18 +93,12 @@ let pull t = t.head <- Some head; Lwt.return (Ok diff) -let push t = - let open Lwt.Infix in - Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] - >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" - Git.Reference.pp t.branch Sync.pp_error err)) - let connect ctx endpoint = let open Lwt.Infix in init_store () >>= fun store -> let store = to_invalid store in let edn, branch = split_url endpoint in - let t = { ctx ; edn ; branch ; store ; head = None } in + let t = { ctx ; edn ; branch ; store ; batch= None; head= None } in pull t >>= fun r -> let _r = to_invalid r in Lwt.return t @@ -336,7 +331,7 @@ let of_octets ctx ~remote data = >|= Rresult.R.failwith_error_msg >>= fun store -> analyze store data >>= fun head -> let edn, branch = split_url remote in - Lwt.return_ok { ctx ; edn ; branch ; store ; head; }) + Lwt.return_ok { ctx ; edn ; branch ; store ; batch= None; head; }) (fun exn -> Fmt.epr ">>> Got an exception: %s.\n%!" (Printexc.to_string exn) ; Fmt.epr ">>> %s.\n%!" @@ -453,7 +448,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct ; email= "git@mirage.io" ; date= now (), None } - let rec unroll_tree t ?head (pred_name, pred_hash) rpath = + let rec unroll_tree t ?head (pred_perm, pred_name, pred_hash) rpath = let open Lwt.Infix in let ( >>? ) = Lwt_result.bind in let ( >>! ) x f = match x with @@ -463,30 +458,38 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | [] -> ( match head with | None -> - let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in + let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash | Some head -> Search.find t.store head (`Commit (`Path [])) >|= Option.get >>= fun tree_root_hash -> ( Store.read_exn t.store tree_root_hash >>= function | Git.Value.Tree tree -> - let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in + let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash | _ -> assert false ) ) | name :: rest -> (head >>! fun head -> Search.find t.store head (`Commit (`Path (List.rev rpath)))) >>= function | None -> - let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in + let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> - unroll_tree t ?head (name, hash) rest + unroll_tree t ?head (`Dir, name, hash) rest | Some tree_hash -> ( Store.read_exn t.store tree_hash >>= function | Git.Value.Tree tree -> - let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in + let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> - unroll_tree t ?head (name, hash) rest + unroll_tree t ?head (`Dir, name, hash) rest | _ -> assert false ) + + let no_batch = function + | None -> true + | Some th -> match Lwt.state th with + | Sleep -> true + | Return _ | Fail _ -> false + + let ( >>? ) = Lwt_result.bind - let set t key contents = + let set ~and_push t key contents = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match segs with @@ -497,14 +500,20 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let name = List.hd rpath in let open Lwt_result.Infix in Store.write t.store (Git.Value.Blob blob) >>= fun (hash, _) -> - unroll_tree t ?head:t.head (name, hash) (List.tl rpath) >>= fun tree_root_hash -> + unroll_tree t ?head:t.head (`Normal, name, hash) (List.tl rpath) >>= fun tree_root_hash -> let committer = author ~now in let author = author ~now in let parents = Option.value ~default:[] (Option.map (fun head -> [ head ]) t.head) in let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> + Lwt.Infix.(if and_push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + >>? fun () -> (Store.shallow t.store hash >|= Result.ok) + else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () let to_write_error (error : Store.error) = match error with @@ -513,23 +522,23 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct | `Msg err -> `Msg err | err -> Rresult.R.msgf "%a" Store.pp_error err - let set t key contents = + let set ?(and_push= true) t key contents = let open Lwt.Infix in - set t key contents >|= Rresult.R.reword_error to_write_error + let and_push = no_batch t.batch && and_push in + set ~and_push t key contents >|= Rresult.R.reword_error to_write_error - let set_partial t key ~offset chunk = + let set_partial ?(and_push= true) t key ~offset chunk = let open Lwt_result.Infix in + let and_push = no_batch t.batch && and_push in get t key >>= fun contents -> let len = String.length contents in let add = String.length chunk in let res = Bytes.make (max len (offset + add)) '\000' in Bytes.blit_string contents 0 res 0 len ; Bytes.blit_string chunk 0 res offset add ; - set t key (Bytes.unsafe_to_string res) + set ~and_push t key (Bytes.unsafe_to_string res) - let batch t ?retries:_ f = f t - - let remove t key = + let remove ~and_push t key = let segs = Mirage_kv.Key.segments key in let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in match List.rev segs, t.head with @@ -548,7 +557,13 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> + Lwt.Infix.(if and_push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + >>? fun () -> Store.shallow t.store hash >|= Result.ok + else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | name :: pred_name :: rest, Some head -> let open Lwt.Infix in @@ -559,24 +574,61 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct let tree = Git.Tree.remove ~name tree in let open Lwt_result.Infix in Store.write t.store (Git.Value.Tree tree) >>= fun (pred_hash, _) -> - unroll_tree t ~head (pred_name, pred_hash) rest >>= fun tree_root_hash -> + unroll_tree t ~head (`Dir, pred_name, pred_hash) rest >>= fun tree_root_hash -> let committer = author ~now in let author = author ~now in let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer ~parents:[ head ] (Some "Committed by git-kv") in Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) -> - Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () -> + Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () -> + Lwt.Infix.(if and_push then + Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] + >|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a" + Git.Reference.pp t.branch Sync.pp_error err)) + >>? fun () -> Store.shallow t.store hash >|= Result.ok + else Lwt.return_ok ()) >>= fun () -> t.head <- Some hash ; Lwt.return_ok () | _ -> Lwt.return_ok () - let remove t key = + let remove ?(and_push= true) t key = let open Lwt.Infix in - remove t key >|= Rresult.R.reword_error to_write_error + let and_push = no_batch t.batch && and_push in + remove ~and_push t key >|= Rresult.R.reword_error to_write_error - let rename t ~source ~dest = + let rename ?(and_push= true) t ~source ~dest = (* TODO(dinosaure): optimize it! It was done on the naive way. *) let open Lwt_result.Infix in get t source >>= fun contents -> - remove t source >>= fun () -> - set t dest contents + remove ~and_push t source >>= fun () -> + set ~and_push t dest contents + + let batch t ?retries:_ f = + let open Lwt.Infix in + ( match t.batch with + | None -> Lwt.return_unit + | Some th -> th ) >>= fun () -> + let th, wk = Lwt.wait () in + t.batch <- Some th ; + f t >>= fun res -> + ( Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function + | Ok () -> + ( match t.head with + | None -> Lwt.return_unit + | Some hash -> Store.shallow t.store hash ) + | Error err -> + Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err ) >>= fun () -> + Lwt.wakeup_later wk () ; + Lwt.return res + + module Local = struct + let set_partial t k ~offset v = set_partial ~and_push:false t k ~offset v + let set t k v = set ~and_push:false t k v + let remove t k = remove ~and_push:false t k + let rename t ~source ~dest = rename ~and_push:false t ~source ~dest + end + + let set_partial t k ~offset v = set_partial ~and_push:true t k ~offset v + let set t k v = set ~and_push:true t k v + let remove t k = remove ~and_push:true t k + let rename t ~source ~dest = rename ~and_push:true t ~source ~dest end diff --git a/src/git_kv.mli b/src/git_kv.mli index 55fd4cb..13837b6 100644 --- a/src/git_kv.mli +++ b/src/git_kv.mli @@ -3,7 +3,7 @@ This module implements the ability to manipulate a Git repository as a Key-Value store. It allows you to create a local (in-memory) Git repository that can come from either: - - a remote Git repository + - a remote Git repository (with {!val:connect}) - a state serialized by the {!val:to_octets} function The first case is interesting if you want to be synchronised with the @@ -13,20 +13,45 @@ In the second case, the synchronisation can be done later with {!val:pull}. - As far as {!val:push} is concerned, a synchronisation with the remote - repository is necessary before {b changing} and sending the new information - (a use of {!val:Make.set}/{!val:Make.rename} should be preceded by a - {!val:pull}). This is because we do not handle conflicts that may exist - between your local repository and the remote repository - in other words, - if you want to ensure consistency between reading ({!val:pull}) and writing - ({!val:push}) to a remote repository, the instance that uses this code - should be the only one to handle said remote repository. *) + {2: Pushing and synchronisation.} + + The user can modify the repository (add files, remove files, etc.). They + can do this locally (with the {!module:Make.Local} module) and thus assume + a possible desynchronisation between the remote repository and what exists + locally or they can share these changes with the remote repository (default + behavior). + + In the latter case, the notion of {i merge} and conflicts is not handled by + our implementation. This means that if the remote repository is manipulated + by another instance than yours, it can result in conflicts that will make + the above functions fail. + + The only check done by the remote Git repository when you want to submit + your change is the ability to find a path between a commit available + remotely, the commit-graph given by the transmission, and your last commit. + In that way, our [push] is most similar to a [git push --force]! + + To save I/O, the {!val:Make.batch} operation allows you to do some change + into a closure and the implementation will push only at the end of this + closure. By this way, you can {!val:Make.set}, {!val:Make.rename} or + {!val:Make.remove} without a systematic [push] on these actions. Only one + will be done at the end of your closure. + + {2: Serialization of the Git repository.} + + Finally, the KV-store tries to keep the minimal set of commits required + between you and the remote repository. In other words, only {i un}pushed + changes are kept by the KV-store. However, if these changes are not pushed, + they will be stored into the final state produced by {!val:to_octets}. In + other words, the more changes you make out of sync with the remote + repository (without pushing them), the bigger the state serialization will + be. *) type t (** The type of the Git store. *) val connect : Mimic.ctx -> string -> t Lwt.t -(** [connect ctx remote] creates a new Git store which synchronizes +(** [connect ctx remote] creates a new Git store which synchronises with [remote] {i via} protocols available into the given [ctx]. @raise [Invalid_argument _] if we can not initialize the store, or if @@ -47,20 +72,10 @@ type change = [ `Add of Mirage_kv.Key.t | `Change of Mirage_kv.Key.t ] val pull : t -> (change list, [> `Msg of string ]) result Lwt.t -(** [pull store] tries to synchronize the remote Git repository with your local +(** [pull store] tries to synchronise the remote Git repository with your local [store] Git repository. It returns a list of changes between the old state of your store and what you have remotely. *) -val push : t -> (unit, [> `Msg of string ]) result Lwt.t -(** [push store] tries to push any changes from your local Git repository - [store] to the remoe Git repository. The [push] function can fails for many - reasons. Currently, we don't handle merge politics and how we can resolve - conflicts between local and remote Git repositories. That mostly means that - if you are the only one who push to the Git repository (into a specific - branch), everything should be fine. But, if someone else push into the same - remote Git repository, your change can be discarded by the remote server - (due to conflicts). *) - module Make (Pclock : Mirage_clock.PCLOCK) : sig include Mirage_kv.RW with type t = t @@ -68,4 +83,11 @@ module Make (Pclock : Mirage_clock.PCLOCK) : sig | `Hash_not_found of Digestif.SHA1.t | `Reference_not_found of Git.Reference.t | Mirage_kv.write_error ] + + module Local : sig + val set : t -> key -> string -> (unit, write_error) result Lwt.t + val remove : t -> key -> (unit, write_error) result Lwt.t + val rename : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t + val set_partial : t -> key -> offset:int -> string -> (unit, write_error) result Lwt.t + end end