Merge pull request 'Fix the push function' (#20) from push-before-shallow into main

Reviewed-on: https://git.robur.io/robur/git-kv/pulls/20
This commit is contained in:
dinosaure 2022-10-28 13:11:34 +00:00
commit 631249e333
3 changed files with 194 additions and 54 deletions

View file

@ -0,0 +1,66 @@
# Git-kv, a simple Key-Value store synchronized with a Git repository
This library is a simple implementation of a Git repository that can be read
and/or modified. It offers two ways to create such a local repository:
1) The local repository can be created in a serialized state
2) The local repository can be created from a remote repository
The first method has the advantage of not requiring an internet connection. The
serialized state can be created with the `mgit` tool:
```sh
$ mgit https://github.com/mirage/mirage <<EOF
> save db.pack
> quit
$ ls db.pack
db.pack
```
The disadvantage is that the serialized state may be out of sync with the state
of the remote repository. In this case, the user has access to the `pull`
function, which allows the internet state of the local repository to be
re-synchronised with the remote repository.
```ocaml
let contents_of_file filename =
let ic = open_in filename in
let ln = in_channel_length ic in
let rs = Bytes.create ln in
really_input ic rs 0 ln ;
Bytes.unsafe_to_string rs
let _ =
Git_kv.of_octets ctx
~remote:"git@github.com:mirage/mirage.git"
(contents_of_file "db.pack") >>= fun t ->
Git_kv.pull t >>= fun diff ->
...
```
The second method initiates a connection to the remote repository in order to
download its state and reproduce a synchronised internal state. The type of
connections supported are described in the given `ctx`. We recommend the
tutorial about [Mimic][mimic] to understand its use.
```sh
let _ =
Git_kv.connect ctx "git@github.com:mirage/mirage.git" >>= fun t ->
...
```
The user can manipulate the repository as an [RW][mirage-kv-rw] repository. Any
change to the repository requires a new commit. These changes can be sent to
the remote repository by deriving manipulation functions with the `and_push`
suffix:
```ocaml
module Store = Git_kv.Make (Pclock)
let new_file_locally t =
Store.set t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () ->
...
let new_file_locally_and_remotely t =
Git_kv.pull t >>= fun _diff ->
Store.set_and_push t Mirage_kv.Key.(empty / "foo") "foo" >>= fun () ->
...
```
[mimic]: https://dinosaure.github.io/mimic/mimic/index.html
[mirage-kv-rw]: https://github.com/mirage/mirage-kv

View file

@ -8,6 +8,7 @@ type t =
; edn : Smart_git.Endpoint.t
; branch : Git.Reference.t
; store : Store.t
; mutable batch : unit Lwt.t option
; mutable head : Store.hash option }
let init_store () =
@ -92,18 +93,12 @@ let pull t =
t.head <- Some head;
Lwt.return (Ok diff)
let push t =
let open Lwt.Infix in
Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ]
>|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a"
Git.Reference.pp t.branch Sync.pp_error err))
let connect ctx endpoint =
let open Lwt.Infix in
init_store () >>= fun store ->
let store = to_invalid store in
let edn, branch = split_url endpoint in
let t = { ctx ; edn ; branch ; store ; head = None } in
let t = { ctx ; edn ; branch ; store ; batch= None; head= None } in
pull t >>= fun r ->
let _r = to_invalid r in
Lwt.return t
@ -336,7 +331,7 @@ let of_octets ctx ~remote data =
>|= Rresult.R.failwith_error_msg >>= fun store ->
analyze store data >>= fun head ->
let edn, branch = split_url remote in
Lwt.return_ok { ctx ; edn ; branch ; store ; head; })
Lwt.return_ok { ctx ; edn ; branch ; store ; batch= None; head; })
(fun exn ->
Fmt.epr ">>> Got an exception: %s.\n%!" (Printexc.to_string exn) ;
Fmt.epr ">>> %s.\n%!"
@ -453,7 +448,7 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
; email= "git@mirage.io"
; date= now (), None }
let rec unroll_tree t ?head (pred_name, pred_hash) rpath =
let rec unroll_tree t ?head (pred_perm, pred_name, pred_hash) rpath =
let open Lwt.Infix in
let ( >>? ) = Lwt_result.bind in
let ( >>! ) x f = match x with
@ -463,30 +458,38 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
| [] ->
( match head with
| None ->
let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in
let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in
Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash
| Some head ->
Search.find t.store head (`Commit (`Path [])) >|= Option.get >>= fun tree_root_hash ->
( Store.read_exn t.store tree_root_hash >>= function
| Git.Value.Tree tree ->
let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in
let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in
Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) -> Lwt.return_ok hash
| _ -> assert false ) )
| name :: rest ->
(head >>! fun head -> Search.find t.store head (`Commit (`Path (List.rev rpath)))) >>= function
| None ->
let tree = Git.Tree.(v [ entry ~name:pred_name `Dir pred_hash ]) in
let tree = Git.Tree.(v [ entry ~name:pred_name pred_perm pred_hash ]) in
Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) ->
unroll_tree t ?head (name, hash) rest
unroll_tree t ?head (`Dir, name, hash) rest
| Some tree_hash ->
( Store.read_exn t.store tree_hash >>= function
| Git.Value.Tree tree ->
let tree = Git.Tree.(add (entry ~name:pred_name `Dir pred_hash) (remove ~name:pred_name tree)) in
let tree = Git.Tree.(add (entry ~name:pred_name pred_perm pred_hash) (remove ~name:pred_name tree)) in
Store.write t.store (Git.Value.Tree tree) >>? fun (hash, _) ->
unroll_tree t ?head (name, hash) rest
unroll_tree t ?head (`Dir, name, hash) rest
| _ -> assert false )
let no_batch = function
| None -> true
| Some th -> match Lwt.state th with
| Sleep -> true
| Return _ | Fail _ -> false
let ( >>? ) = Lwt_result.bind
let set t key contents =
let set ~and_push t key contents =
let segs = Mirage_kv.Key.segments key in
let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in
match segs with
@ -497,14 +500,20 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
let name = List.hd rpath in
let open Lwt_result.Infix in
Store.write t.store (Git.Value.Blob blob) >>= fun (hash, _) ->
unroll_tree t ?head:t.head (name, hash) (List.tl rpath) >>= fun tree_root_hash ->
unroll_tree t ?head:t.head (`Normal, name, hash) (List.tl rpath) >>= fun tree_root_hash ->
let committer = author ~now in
let author = author ~now in
let parents = Option.value ~default:[] (Option.map (fun head -> [ head ]) t.head) in
let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer
~parents (Some "Committed by git-kv") in
Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) ->
Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () ->
Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () ->
Lwt.Infix.(if and_push then
Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ]
>|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a"
Git.Reference.pp t.branch Sync.pp_error err))
>>? fun () -> (Store.shallow t.store hash >|= Result.ok)
else Lwt.return_ok ()) >>= fun () ->
t.head <- Some hash ; Lwt.return_ok ()
let to_write_error (error : Store.error) = match error with
@ -513,23 +522,23 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
| `Msg err -> `Msg err
| err -> Rresult.R.msgf "%a" Store.pp_error err
let set t key contents =
let set ?(and_push= true) t key contents =
let open Lwt.Infix in
set t key contents >|= Rresult.R.reword_error to_write_error
let and_push = no_batch t.batch && and_push in
set ~and_push t key contents >|= Rresult.R.reword_error to_write_error
let set_partial t key ~offset chunk =
let set_partial ?(and_push= true) t key ~offset chunk =
let open Lwt_result.Infix in
let and_push = no_batch t.batch && and_push in
get t key >>= fun contents ->
let len = String.length contents in
let add = String.length chunk in
let res = Bytes.make (max len (offset + add)) '\000' in
Bytes.blit_string contents 0 res 0 len ;
Bytes.blit_string chunk 0 res offset add ;
set t key (Bytes.unsafe_to_string res)
set ~and_push t key (Bytes.unsafe_to_string res)
let batch t ?retries:_ f = f t
let remove t key =
let remove ~and_push t key =
let segs = Mirage_kv.Key.segments key in
let now () = Int64.of_float (Ptime.to_float_s (Ptime.v (Pclock.now_d_ps ()))) in
match List.rev segs, t.head with
@ -548,7 +557,13 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer
~parents:[ head ] (Some "Committed by git-kv") in
Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) ->
Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () ->
Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () ->
Lwt.Infix.(if and_push then
Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ]
>|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a"
Git.Reference.pp t.branch Sync.pp_error err))
>>? fun () -> Store.shallow t.store hash >|= Result.ok
else Lwt.return_ok ()) >>= fun () ->
t.head <- Some hash ; Lwt.return_ok ()
| name :: pred_name :: rest, Some head ->
let open Lwt.Infix in
@ -559,24 +574,61 @@ module Make (Pclock : Mirage_clock.PCLOCK) = struct
let tree = Git.Tree.remove ~name tree in
let open Lwt_result.Infix in
Store.write t.store (Git.Value.Tree tree) >>= fun (pred_hash, _) ->
unroll_tree t ~head (pred_name, pred_hash) rest >>= fun tree_root_hash ->
unroll_tree t ~head (`Dir, pred_name, pred_hash) rest >>= fun tree_root_hash ->
let committer = author ~now in
let author = author ~now in
let commit = Store.Value.Commit.make ~tree:tree_root_hash ~author ~committer
~parents:[ head ] (Some "Committed by git-kv") in
Store.write t.store (Git.Value.Commit commit) >>= fun (hash, _) ->
Lwt.Infix.(Store.shallow t.store hash >|= Result.ok) >>= fun () ->
Store.Ref.write t.store t.branch (Git.Reference.uid hash) >>= fun () ->
Lwt.Infix.(if and_push then
Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ]
>|= Result.map_error (fun err -> `Msg (Fmt.str "error pushing branch %a: %a"
Git.Reference.pp t.branch Sync.pp_error err))
>>? fun () -> Store.shallow t.store hash >|= Result.ok
else Lwt.return_ok ()) >>= fun () ->
t.head <- Some hash ; Lwt.return_ok ()
| _ -> Lwt.return_ok ()
let remove t key =
let remove ?(and_push= true) t key =
let open Lwt.Infix in
remove t key >|= Rresult.R.reword_error to_write_error
let and_push = no_batch t.batch && and_push in
remove ~and_push t key >|= Rresult.R.reword_error to_write_error
let rename t ~source ~dest =
let rename ?(and_push= true) t ~source ~dest =
(* TODO(dinosaure): optimize it! It was done on the naive way. *)
let open Lwt_result.Infix in
get t source >>= fun contents ->
remove t source >>= fun () ->
set t dest contents
remove ~and_push t source >>= fun () ->
set ~and_push t dest contents
let batch t ?retries:_ f =
let open Lwt.Infix in
( match t.batch with
| None -> Lwt.return_unit
| Some th -> th ) >>= fun () ->
let th, wk = Lwt.wait () in
t.batch <- Some th ;
f t >>= fun res ->
( Sync.push ~capabilities ~ctx:t.ctx t.edn t.store [ `Update (t.branch, t.branch) ] >>= function
| Ok () ->
( match t.head with
| None -> Lwt.return_unit
| Some hash -> Store.shallow t.store hash )
| Error err ->
Fmt.failwith "error pushing branch %a: %a" Git.Reference.pp t.branch Sync.pp_error err ) >>= fun () ->
Lwt.wakeup_later wk () ;
Lwt.return res
module Local = struct
let set_partial t k ~offset v = set_partial ~and_push:false t k ~offset v
let set t k v = set ~and_push:false t k v
let remove t k = remove ~and_push:false t k
let rename t ~source ~dest = rename ~and_push:false t ~source ~dest
end
let set_partial t k ~offset v = set_partial ~and_push:true t k ~offset v
let set t k v = set ~and_push:true t k v
let remove t k = remove ~and_push:true t k
let rename t ~source ~dest = rename ~and_push:true t ~source ~dest
end

View file

@ -3,7 +3,7 @@
This module implements the ability to manipulate a Git repository as a
Key-Value store. It allows you to create a local (in-memory) Git repository
that can come from either:
- a remote Git repository
- a remote Git repository (with {!val:connect})
- a state serialized by the {!val:to_octets} function
The first case is interesting if you want to be synchronised with the
@ -13,20 +13,45 @@
In the second case, the synchronisation can be done later with {!val:pull}.
As far as {!val:push} is concerned, a synchronisation with the remote
repository is necessary before {b changing} and sending the new information
(a use of {!val:Make.set}/{!val:Make.rename} should be preceded by a
{!val:pull}). This is because we do not handle conflicts that may exist
between your local repository and the remote repository - in other words,
if you want to ensure consistency between reading ({!val:pull}) and writing
({!val:push}) to a remote repository, the instance that uses this code
should be the only one to handle said remote repository. *)
{2: Pushing and synchronisation.}
The user can modify the repository (add files, remove files, etc.). They
can do this locally (with the {!module:Make.Local} module) and thus assume
a possible desynchronisation between the remote repository and what exists
locally or they can share these changes with the remote repository (default
behavior).
In the latter case, the notion of {i merge} and conflicts is not handled by
our implementation. This means that if the remote repository is manipulated
by another instance than yours, it can result in conflicts that will make
the above functions fail.
The only check done by the remote Git repository when you want to submit
your change is the ability to find a path between a commit available
remotely, the commit-graph given by the transmission, and your last commit.
In that way, our [push] is most similar to a [git push --force]!
To save I/O, the {!val:Make.batch} operation allows you to do some change
into a closure and the implementation will push only at the end of this
closure. By this way, you can {!val:Make.set}, {!val:Make.rename} or
{!val:Make.remove} without a systematic [push] on these actions. Only one
will be done at the end of your closure.
{2: Serialization of the Git repository.}
Finally, the KV-store tries to keep the minimal set of commits required
between you and the remote repository. In other words, only {i un}pushed
changes are kept by the KV-store. However, if these changes are not pushed,
they will be stored into the final state produced by {!val:to_octets}. In
other words, the more changes you make out of sync with the remote
repository (without pushing them), the bigger the state serialization will
be. *)
type t
(** The type of the Git store. *)
val connect : Mimic.ctx -> string -> t Lwt.t
(** [connect ctx remote] creates a new Git store which synchronizes
(** [connect ctx remote] creates a new Git store which synchronises
with [remote] {i via} protocols available into the given [ctx].
@raise [Invalid_argument _] if we can not initialize the store, or if
@ -47,20 +72,10 @@ type change = [ `Add of Mirage_kv.Key.t
| `Change of Mirage_kv.Key.t ]
val pull : t -> (change list, [> `Msg of string ]) result Lwt.t
(** [pull store] tries to synchronize the remote Git repository with your local
(** [pull store] tries to synchronise the remote Git repository with your local
[store] Git repository. It returns a list of changes between the old state
of your store and what you have remotely. *)
val push : t -> (unit, [> `Msg of string ]) result Lwt.t
(** [push store] tries to push any changes from your local Git repository
[store] to the remoe Git repository. The [push] function can fails for many
reasons. Currently, we don't handle merge politics and how we can resolve
conflicts between local and remote Git repositories. That mostly means that
if you are the only one who push to the Git repository (into a specific
branch), everything should be fine. But, if someone else push into the same
remote Git repository, your change can be discarded by the remote server
(due to conflicts). *)
module Make (Pclock : Mirage_clock.PCLOCK) : sig
include Mirage_kv.RW
with type t = t
@ -68,4 +83,11 @@ module Make (Pclock : Mirage_clock.PCLOCK) : sig
| `Hash_not_found of Digestif.SHA1.t
| `Reference_not_found of Git.Reference.t
| Mirage_kv.write_error ]
module Local : sig
val set : t -> key -> string -> (unit, write_error) result Lwt.t
val remove : t -> key -> (unit, write_error) result Lwt.t
val rename : t -> source:key -> dest:key -> (unit, write_error) result Lwt.t
val set_partial : t -> key -> offset:int -> string -> (unit, write_error) result Lwt.t
end
end