From 5a90af0f1a2616d766f04517791928ca78eebbef Mon Sep 17 00:00:00 2001 From: Calascibetta Romain Date: Fri, 8 Nov 2024 12:40:10 +0100 Subject: [PATCH] Continue to improve --- cachet.opam | 1 - lib/cachet.ml | 31 +++++++++++++++++++++ lib/cachet.mli | 74 ++++++++++++++++++++++++++++++++++++++++++++------ test/dune | 2 +- test/test.ml | 16 +++++------ 5 files changed, 105 insertions(+), 19 deletions(-) diff --git a/cachet.opam b/cachet.opam index ddbef78..694ffce 100644 --- a/cachet.opam +++ b/cachet.opam @@ -18,6 +18,5 @@ run-test: [ "dune" "runtest" "-p" name "-j" jobs ] depends: [ "ocaml" {>= "4.14.0"} "dune" {>= "3.5.0"} - "bigstringaf" {with-test & >= "0.9.0"} "alcotest" {with-test & >= "1.7.0"} ] diff --git a/lib/cachet.ml b/lib/cachet.ml index 4249c8f..f531b75 100644 --- a/lib/cachet.ml +++ b/lib/cachet.ml @@ -82,6 +82,37 @@ module Bstr = struct let to_string bstr = sub_string bstr ~off:0 ~len:(length bstr) let is_empty bstr = length bstr == 0 + + let is_prefix ~affix bstr = + let len_affix = String.length affix in + let len_bstr = length bstr in + if len_affix > len_bstr then false + else + let max_idx_affix = len_affix - 1 in + let rec go idx = + if idx > max_idx_affix then true + else if affix.[idx] != bstr.{idx} then false + else go (succ idx) + in + go 0 + + let is_infix ~affix bstr = + let len_affix = String.length affix in + let len_bstr = length bstr in + if len_affix > len_bstr then false + else + let max_idx_affix = len_affix - 1 in + let max_idx_bstr = len_bstr - len_affix in + let rec go idx k = + if idx > max_idx_bstr then false + else if k > max_idx_affix then true + else if k > 0 then + if affix.[k] == bstr.{idx + k} then go idx (succ k) + else go (succ idx) 0 + else if affix.[0] = bstr.{idx} then go idx 1 + else go (succ idx) 0 + in + go 0 0 end external hash : (int32[@unboxed]) -> int -> (int32[@unboxed]) diff --git a/lib/cachet.mli b/lib/cachet.mli index 6ea0c96..fdeb50b 100644 --- a/lib/cachet.mli +++ b/lib/cachet.mli @@ -21,30 +21,64 @@ module Bstr : sig @raise Invalid_argument if [i] is not an index of [bstr]. *) val get_int8 : t -> int -> int + (** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index + [i]. *) + val get_uint8 : t -> int -> int + (** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte + index [i]. *) + val get_int16_ne : t -> int -> int + (** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer + starting at byte index [i]. *) + val get_int16_le : t -> int -> int + (** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer + starting at byte index [i]. *) + val get_int16_be : t -> int -> int + (** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer + starting at byte index [i]. *) + val get_int32_ne : t -> int -> int32 + (** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting + at byte index [i]. *) + val get_int32_le : t -> int -> int32 + (** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting + at byte index [i]. *) + val get_int32_be : t -> int -> int32 + (** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at + byte index [i]. *) + val get_int64_ne : t -> int -> int64 + (** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting + at byte index [i]. *) + val get_int64_le : t -> int -> int64 + (** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting + at byte index [i]. *) + val get_int64_be : t -> int -> int64 + (** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at + byte index [i]. *) val sub : t -> off:int -> len:int -> t - (** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a new + (** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a + new view into [bstr] starting at [off], and with length [len]. {b Note} that this does not allocate a new buffer, but instead shares the buffer of [bstr] with the newly-returned bigstring. *) val sub_string : t -> off:int -> len:int -> string - (** [sub_string bstr ~off ~len] returns a string of length [len] containing the - bytes of [t] starting at [off]. *) + (** [sub_string bstr ~off ~len] returns a string of length [len] containing + the bytes of [t] starting at [off]. *) val to_string : t -> string - (** [to_string bstr] is equivalent to [sub_string bstr ~off:0 ~len:(length bstr)]. *) + (** [to_string bstr] is equivalent to [sub_string bstr ~off:0 ~len:(length + bstr)]. *) val blit_to_bytes : t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit @@ -57,10 +91,9 @@ module Bstr : sig of [dst]. *) val is_empty : t -> bool - - (* val is_prefix : affix:string -> t -> bool val is_infix : affix:string -> t -> bool + (* val is_suffix : affix:string -> t -> bool val for_all : (char -> bool) -> t -> bool val exists : (char -> bool) -> t -> bool @@ -105,7 +138,30 @@ type 'fd map = 'fd -> pos:int -> int -> bigstring {!val:load} or the {{!user_friendly} user-friendly functions}. These functions can read one or more pages. {!val:load} reads one page at - most. *) + most. + + {2 Note about large file and [Cachet].} + + For performance reasons, Cachet has chosen to use an [int] rather than an + [int64] for the offset (the logical address). On a 64-bit architecture, + addressing in the block device should not be a problem and Cachet is able + to manage large block devices. However, on a 32-bit architecture, Cachet + should only be able to handle ~2 GB files. + + We consider that it is up to the developer to check this: + {[ + let _max_int31 = 2147483647L (* (1 lsl 31) - 1 *) + + let () = + let fd = Unix.openfile "disk.img" Unix.[ O_RDONLY ] 0o644 in + let stat = Unix.LargeFile.fstat fd in + if Sys.word_size = 32 && stat.Unix.LargeFile.st_size > _max_int31 + then failwith "Too big block-device"; + ... + ]} + + So that, as soon as possible, the user can find out whether or not the + program can handle large block-devices. *) type 'fd t @@ -162,8 +218,8 @@ val get_int8 : 'fd t -> int -> int index [logical_address]. *) val get_uint8 : 'fd t -> int -> int -(** [get_uint8 t logical_address] is [t]'s unsigned 8-bit integer starting at byte - index [logical_address]. *) +(** [get_uint8 t logical_address] is [t]'s unsigned 8-bit integer starting at + byte index [logical_address]. *) val get_uint16_ne : 'fd t -> int -> int val get_uint16_le : 'fd t -> int -> int diff --git a/test/dune b/test/dune index d62c811..9be7c5d 100644 --- a/test/dune +++ b/test/dune @@ -1,3 +1,3 @@ (test (name test) - (libraries cachet bigstringaf alcotest)) + (libraries cachet alcotest)) diff --git a/test/test.ml b/test/test.ml index c832f4f..081daab 100644 --- a/test/test.ml +++ b/test/test.ml @@ -1,12 +1,12 @@ let random ?g len = - let bstr = Bigstringaf.create len in + let bstr = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in for i = 0 to len - 1 do let chr = match g with | Some g -> Char.unsafe_chr (Random.State.bits g land 0xff) | None -> Char.unsafe_chr (Random.bits () land 0xff) in - Bigstringaf.set bstr i chr + bstr.{i} <- chr done; bstr @@ -18,24 +18,24 @@ let make ?cachesize ?pagesize ?g len = let len' = Int.min (Bigarray.Array1.dim bstr - pos) len in Bigarray.Array1.sub bstr pos len' in - (Cachet.make ?cachesize ?pagesize ~map (), bstr) + (Cachet.make ?cachesize ?pagesize ~map (), Cachet.Bstr.of_bigstring bstr) let test01 = Alcotest.test_case "test01" `Quick @@ fun () -> let t, oracle = make ~cachesize:0x100 ~pagesize:0x100 0xe000 in let a = Cachet.get_uint8 t 0xdead in - let b = Char.code (Bigstringaf.get oracle 0xdead) in + let b = Char.code (Cachet.Bstr.get oracle 0xdead) in Alcotest.(check int) "0xdead" a b; let a = Cachet.get_string t 0xdead ~len:10 in - let b = Bigstringaf.substring oracle ~off:0xdead ~len:10 in + let b = Cachet.Bstr.sub_string oracle ~off:0xdead ~len:10 in Alcotest.(check string) "0xdead" a b; let a = Cachet.get_string t 0xdea0 ~len:10 in - let b = Bigstringaf.substring oracle ~off:0xdea0 ~len:10 in + let b = Cachet.Bstr.sub_string oracle ~off:0xdea0 ~len:10 in Alcotest.(check string) "0xdea0" a b; let a = Cachet.get_seq t 0 in - let b = Bigstringaf.to_string oracle in + let b = Cachet.Bstr.to_string oracle in let a = List.of_seq a in let a = String.concat "" a in Alcotest.(check string) "all" a b -let () = Alcotest.run "chat" [ ("simple", [ test01 ]) ] +let () = Alcotest.run "cachet" [ ("simple", [ test01 ]) ]