Continue to improve
This commit is contained in:
parent
0615657525
commit
5a90af0f1a
5 changed files with 105 additions and 19 deletions
|
@ -18,6 +18,5 @@ run-test: [ "dune" "runtest" "-p" name "-j" jobs ]
|
||||||
depends: [
|
depends: [
|
||||||
"ocaml" {>= "4.14.0"}
|
"ocaml" {>= "4.14.0"}
|
||||||
"dune" {>= "3.5.0"}
|
"dune" {>= "3.5.0"}
|
||||||
"bigstringaf" {with-test & >= "0.9.0"}
|
|
||||||
"alcotest" {with-test & >= "1.7.0"}
|
"alcotest" {with-test & >= "1.7.0"}
|
||||||
]
|
]
|
||||||
|
|
|
@ -82,6 +82,37 @@ module Bstr = struct
|
||||||
|
|
||||||
let to_string bstr = sub_string bstr ~off:0 ~len:(length bstr)
|
let to_string bstr = sub_string bstr ~off:0 ~len:(length bstr)
|
||||||
let is_empty bstr = length bstr == 0
|
let is_empty bstr = length bstr == 0
|
||||||
|
|
||||||
|
let is_prefix ~affix bstr =
|
||||||
|
let len_affix = String.length affix in
|
||||||
|
let len_bstr = length bstr in
|
||||||
|
if len_affix > len_bstr then false
|
||||||
|
else
|
||||||
|
let max_idx_affix = len_affix - 1 in
|
||||||
|
let rec go idx =
|
||||||
|
if idx > max_idx_affix then true
|
||||||
|
else if affix.[idx] != bstr.{idx} then false
|
||||||
|
else go (succ idx)
|
||||||
|
in
|
||||||
|
go 0
|
||||||
|
|
||||||
|
let is_infix ~affix bstr =
|
||||||
|
let len_affix = String.length affix in
|
||||||
|
let len_bstr = length bstr in
|
||||||
|
if len_affix > len_bstr then false
|
||||||
|
else
|
||||||
|
let max_idx_affix = len_affix - 1 in
|
||||||
|
let max_idx_bstr = len_bstr - len_affix in
|
||||||
|
let rec go idx k =
|
||||||
|
if idx > max_idx_bstr then false
|
||||||
|
else if k > max_idx_affix then true
|
||||||
|
else if k > 0 then
|
||||||
|
if affix.[k] == bstr.{idx + k} then go idx (succ k)
|
||||||
|
else go (succ idx) 0
|
||||||
|
else if affix.[0] = bstr.{idx} then go idx 1
|
||||||
|
else go (succ idx) 0
|
||||||
|
in
|
||||||
|
go 0 0
|
||||||
end
|
end
|
||||||
|
|
||||||
external hash : (int32[@unboxed]) -> int -> (int32[@unboxed])
|
external hash : (int32[@unboxed]) -> int -> (int32[@unboxed])
|
||||||
|
|
|
@ -21,30 +21,64 @@ module Bstr : sig
|
||||||
@raise Invalid_argument if [i] is not an index of [bstr]. *)
|
@raise Invalid_argument if [i] is not an index of [bstr]. *)
|
||||||
|
|
||||||
val get_int8 : t -> int -> int
|
val get_int8 : t -> int -> int
|
||||||
|
(** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index
|
||||||
|
[i]. *)
|
||||||
|
|
||||||
val get_uint8 : t -> int -> int
|
val get_uint8 : t -> int -> int
|
||||||
|
(** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte
|
||||||
|
index [i]. *)
|
||||||
|
|
||||||
val get_int16_ne : t -> int -> int
|
val get_int16_ne : t -> int -> int
|
||||||
|
(** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer
|
||||||
|
starting at byte index [i]. *)
|
||||||
|
|
||||||
val get_int16_le : t -> int -> int
|
val get_int16_le : t -> int -> int
|
||||||
|
(** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer
|
||||||
|
starting at byte index [i]. *)
|
||||||
|
|
||||||
val get_int16_be : t -> int -> int
|
val get_int16_be : t -> int -> int
|
||||||
|
(** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer
|
||||||
|
starting at byte index [i]. *)
|
||||||
|
|
||||||
val get_int32_ne : t -> int -> int32
|
val get_int32_ne : t -> int -> int32
|
||||||
|
(** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting
|
||||||
|
at byte index [i]. *)
|
||||||
|
|
||||||
val get_int32_le : t -> int -> int32
|
val get_int32_le : t -> int -> int32
|
||||||
|
(** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting
|
||||||
|
at byte index [i]. *)
|
||||||
|
|
||||||
val get_int32_be : t -> int -> int32
|
val get_int32_be : t -> int -> int32
|
||||||
|
(** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at
|
||||||
|
byte index [i]. *)
|
||||||
|
|
||||||
val get_int64_ne : t -> int -> int64
|
val get_int64_ne : t -> int -> int64
|
||||||
|
(** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting
|
||||||
|
at byte index [i]. *)
|
||||||
|
|
||||||
val get_int64_le : t -> int -> int64
|
val get_int64_le : t -> int -> int64
|
||||||
|
(** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting
|
||||||
|
at byte index [i]. *)
|
||||||
|
|
||||||
val get_int64_be : t -> int -> int64
|
val get_int64_be : t -> int -> int64
|
||||||
|
(** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at
|
||||||
|
byte index [i]. *)
|
||||||
|
|
||||||
val sub : t -> off:int -> len:int -> t
|
val sub : t -> off:int -> len:int -> t
|
||||||
(** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a new
|
(** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a
|
||||||
|
new
|
||||||
view into [bstr] starting at [off], and with length [len].
|
view into [bstr] starting at [off], and with length [len].
|
||||||
|
|
||||||
{b Note} that this does not allocate a new buffer, but instead shares the
|
{b Note} that this does not allocate a new buffer, but instead shares the
|
||||||
buffer of [bstr] with the newly-returned bigstring. *)
|
buffer of [bstr] with the newly-returned bigstring. *)
|
||||||
|
|
||||||
val sub_string : t -> off:int -> len:int -> string
|
val sub_string : t -> off:int -> len:int -> string
|
||||||
(** [sub_string bstr ~off ~len] returns a string of length [len] containing the
|
(** [sub_string bstr ~off ~len] returns a string of length [len] containing
|
||||||
bytes of [t] starting at [off]. *)
|
the bytes of [t] starting at [off]. *)
|
||||||
|
|
||||||
val to_string : t -> string
|
val to_string : t -> string
|
||||||
(** [to_string bstr] is equivalent to [sub_string bstr ~off:0 ~len:(length bstr)]. *)
|
(** [to_string bstr] is equivalent to [sub_string bstr ~off:0 ~len:(length
|
||||||
|
bstr)]. *)
|
||||||
|
|
||||||
val blit_to_bytes :
|
val blit_to_bytes :
|
||||||
t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit
|
t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit
|
||||||
|
@ -57,10 +91,9 @@ module Bstr : sig
|
||||||
of [dst]. *)
|
of [dst]. *)
|
||||||
|
|
||||||
val is_empty : t -> bool
|
val is_empty : t -> bool
|
||||||
|
|
||||||
(*
|
|
||||||
val is_prefix : affix:string -> t -> bool
|
val is_prefix : affix:string -> t -> bool
|
||||||
val is_infix : affix:string -> t -> bool
|
val is_infix : affix:string -> t -> bool
|
||||||
|
(*
|
||||||
val is_suffix : affix:string -> t -> bool
|
val is_suffix : affix:string -> t -> bool
|
||||||
val for_all : (char -> bool) -> t -> bool
|
val for_all : (char -> bool) -> t -> bool
|
||||||
val exists : (char -> bool) -> t -> bool
|
val exists : (char -> bool) -> t -> bool
|
||||||
|
@ -105,7 +138,30 @@ type 'fd map = 'fd -> pos:int -> int -> bigstring
|
||||||
{!val:load} or the {{!user_friendly} user-friendly functions}.
|
{!val:load} or the {{!user_friendly} user-friendly functions}.
|
||||||
|
|
||||||
These functions can read one or more pages. {!val:load} reads one page at
|
These functions can read one or more pages. {!val:load} reads one page at
|
||||||
most. *)
|
most.
|
||||||
|
|
||||||
|
{2 Note about large file and [Cachet].}
|
||||||
|
|
||||||
|
For performance reasons, Cachet has chosen to use an [int] rather than an
|
||||||
|
[int64] for the offset (the logical address). On a 64-bit architecture,
|
||||||
|
addressing in the block device should not be a problem and Cachet is able
|
||||||
|
to manage large block devices. However, on a 32-bit architecture, Cachet
|
||||||
|
should only be able to handle ~2 GB files.
|
||||||
|
|
||||||
|
We consider that it is up to the developer to check this:
|
||||||
|
{[
|
||||||
|
let _max_int31 = 2147483647L (* (1 lsl 31) - 1 *)
|
||||||
|
|
||||||
|
let () =
|
||||||
|
let fd = Unix.openfile "disk.img" Unix.[ O_RDONLY ] 0o644 in
|
||||||
|
let stat = Unix.LargeFile.fstat fd in
|
||||||
|
if Sys.word_size = 32 && stat.Unix.LargeFile.st_size > _max_int31
|
||||||
|
then failwith "Too big block-device";
|
||||||
|
...
|
||||||
|
]}
|
||||||
|
|
||||||
|
So that, as soon as possible, the user can find out whether or not the
|
||||||
|
program can handle large block-devices. *)
|
||||||
|
|
||||||
type 'fd t
|
type 'fd t
|
||||||
|
|
||||||
|
@ -162,8 +218,8 @@ val get_int8 : 'fd t -> int -> int
|
||||||
index [logical_address]. *)
|
index [logical_address]. *)
|
||||||
|
|
||||||
val get_uint8 : 'fd t -> int -> int
|
val get_uint8 : 'fd t -> int -> int
|
||||||
(** [get_uint8 t logical_address] is [t]'s unsigned 8-bit integer starting at byte
|
(** [get_uint8 t logical_address] is [t]'s unsigned 8-bit integer starting at
|
||||||
index [logical_address]. *)
|
byte index [logical_address]. *)
|
||||||
|
|
||||||
val get_uint16_ne : 'fd t -> int -> int
|
val get_uint16_ne : 'fd t -> int -> int
|
||||||
val get_uint16_le : 'fd t -> int -> int
|
val get_uint16_le : 'fd t -> int -> int
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
(test
|
(test
|
||||||
(name test)
|
(name test)
|
||||||
(libraries cachet bigstringaf alcotest))
|
(libraries cachet alcotest))
|
||||||
|
|
16
test/test.ml
16
test/test.ml
|
@ -1,12 +1,12 @@
|
||||||
let random ?g len =
|
let random ?g len =
|
||||||
let bstr = Bigstringaf.create len in
|
let bstr = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in
|
||||||
for i = 0 to len - 1 do
|
for i = 0 to len - 1 do
|
||||||
let chr =
|
let chr =
|
||||||
match g with
|
match g with
|
||||||
| Some g -> Char.unsafe_chr (Random.State.bits g land 0xff)
|
| Some g -> Char.unsafe_chr (Random.State.bits g land 0xff)
|
||||||
| None -> Char.unsafe_chr (Random.bits () land 0xff)
|
| None -> Char.unsafe_chr (Random.bits () land 0xff)
|
||||||
in
|
in
|
||||||
Bigstringaf.set bstr i chr
|
bstr.{i} <- chr
|
||||||
done;
|
done;
|
||||||
bstr
|
bstr
|
||||||
|
|
||||||
|
@ -18,24 +18,24 @@ let make ?cachesize ?pagesize ?g len =
|
||||||
let len' = Int.min (Bigarray.Array1.dim bstr - pos) len in
|
let len' = Int.min (Bigarray.Array1.dim bstr - pos) len in
|
||||||
Bigarray.Array1.sub bstr pos len'
|
Bigarray.Array1.sub bstr pos len'
|
||||||
in
|
in
|
||||||
(Cachet.make ?cachesize ?pagesize ~map (), bstr)
|
(Cachet.make ?cachesize ?pagesize ~map (), Cachet.Bstr.of_bigstring bstr)
|
||||||
|
|
||||||
let test01 =
|
let test01 =
|
||||||
Alcotest.test_case "test01" `Quick @@ fun () ->
|
Alcotest.test_case "test01" `Quick @@ fun () ->
|
||||||
let t, oracle = make ~cachesize:0x100 ~pagesize:0x100 0xe000 in
|
let t, oracle = make ~cachesize:0x100 ~pagesize:0x100 0xe000 in
|
||||||
let a = Cachet.get_uint8 t 0xdead in
|
let a = Cachet.get_uint8 t 0xdead in
|
||||||
let b = Char.code (Bigstringaf.get oracle 0xdead) in
|
let b = Char.code (Cachet.Bstr.get oracle 0xdead) in
|
||||||
Alcotest.(check int) "0xdead" a b;
|
Alcotest.(check int) "0xdead" a b;
|
||||||
let a = Cachet.get_string t 0xdead ~len:10 in
|
let a = Cachet.get_string t 0xdead ~len:10 in
|
||||||
let b = Bigstringaf.substring oracle ~off:0xdead ~len:10 in
|
let b = Cachet.Bstr.sub_string oracle ~off:0xdead ~len:10 in
|
||||||
Alcotest.(check string) "0xdead" a b;
|
Alcotest.(check string) "0xdead" a b;
|
||||||
let a = Cachet.get_string t 0xdea0 ~len:10 in
|
let a = Cachet.get_string t 0xdea0 ~len:10 in
|
||||||
let b = Bigstringaf.substring oracle ~off:0xdea0 ~len:10 in
|
let b = Cachet.Bstr.sub_string oracle ~off:0xdea0 ~len:10 in
|
||||||
Alcotest.(check string) "0xdea0" a b;
|
Alcotest.(check string) "0xdea0" a b;
|
||||||
let a = Cachet.get_seq t 0 in
|
let a = Cachet.get_seq t 0 in
|
||||||
let b = Bigstringaf.to_string oracle in
|
let b = Cachet.Bstr.to_string oracle in
|
||||||
let a = List.of_seq a in
|
let a = List.of_seq a in
|
||||||
let a = String.concat "" a in
|
let a = String.concat "" a in
|
||||||
Alcotest.(check string) "all" a b
|
Alcotest.(check string) "all" a b
|
||||||
|
|
||||||
let () = Alcotest.run "chat" [ ("simple", [ test01 ]) ]
|
let () = Alcotest.run "cachet" [ ("simple", [ test01 ]) ]
|
||||||
|
|
Loading…
Reference in a new issue