cachet/lib/cachet.mli

429 lines
17 KiB
OCaml
Raw Normal View History

2024-11-07 19:11:22 +00:00
type bigstring =
(char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
val memcpy :
bigstring -> src_off:int -> bigstring -> dst_off:int -> len:int -> unit
val memmove :
bigstring -> src_off:int -> bigstring -> dst_off:int -> len:int -> unit
2024-11-07 19:11:22 +00:00
module Bstr : sig
(** A read-only bigstring. *)
type t = private bigstring
2024-11-08 10:05:23 +00:00
val empty : t
(** [empty] is an empty bigstring. *)
2024-11-07 19:11:22 +00:00
val of_bigstring : bigstring -> t
2024-11-08 10:05:23 +00:00
2024-11-07 19:11:22 +00:00
val length : t -> int
2024-11-08 10:05:23 +00:00
(** [length bstr] is the number of bytes in [bstr]. *)
2024-11-07 19:11:22 +00:00
val get : t -> int -> char
(** [get bstr i] is the byte of [bstr]' at index [i]. This is equivalent to
the [bstr.{i}] notation.
2024-11-08 10:05:23 +00:00
@raise Invalid_argument if [i] is not an index of [bstr]. *)
2024-11-07 19:11:22 +00:00
val get_int8 : t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index
[i]. *)
2024-11-07 19:11:22 +00:00
val get_uint8 : t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte
index [i]. *)
2024-11-08 14:21:37 +00:00
val get_uint16_ne : t -> int -> int
(** [get_int16_ne bstr i] is [bstr]'s native-endian unsigned 16-bit integer
starting at byte index [i]. *)
val get_uint16_le : t -> int -> int
(** [get_int16_le bstr i] is [bstr]'s little-endian unsigned 16-bit integer
starting at byte index [i]. *)
val get_uint16_be : t -> int -> int
(** [get_int16_be bstr i] is [bstr]'s big-endian unsigned 16-bit integer
starting at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_ne : t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer
starting at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_le : t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer
starting at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_be : t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer
starting at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int32_ne : t -> int -> int32
(** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting at
byte index [i]. *)
2024-11-08 11:40:10 +00:00
2024-11-07 19:11:22 +00:00
val get_int32_le : t -> int -> int32
(** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting at
byte index [i]. *)
2024-11-08 11:40:10 +00:00
2024-11-07 19:11:22 +00:00
val get_int32_be : t -> int -> int32
2024-11-08 11:40:10 +00:00
(** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at
byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int64_ne : t -> int -> int64
(** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting at
byte index [i]. *)
2024-11-08 11:40:10 +00:00
2024-11-07 19:11:22 +00:00
val get_int64_le : t -> int -> int64
(** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting at
byte index [i]. *)
2024-11-08 11:40:10 +00:00
2024-11-07 19:11:22 +00:00
val get_int64_be : t -> int -> int64
2024-11-08 11:40:10 +00:00
(** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at
byte index [i]. *)
2024-11-08 11:08:05 +00:00
2024-11-07 19:11:22 +00:00
val sub : t -> off:int -> len:int -> t
2024-11-08 11:40:10 +00:00
(** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a
new view into [bstr] starting at [off], and with length [len].
2024-11-08 11:08:05 +00:00
{b Note} that this does not allocate a new buffer, but instead shares the
buffer of [bstr] with the newly-returned bigstring. *)
2024-11-07 19:11:22 +00:00
val sub_string : t -> off:int -> len:int -> string
2024-11-08 11:40:10 +00:00
(** [sub_string bstr ~off ~len] returns a string of length [len] containing
the bytes of [t] starting at [off]. *)
2024-11-08 11:08:05 +00:00
2024-11-07 19:11:22 +00:00
val to_string : t -> string
(** [to_string bstr] is equivalent to
[sub_string bstr ~off:0 ~len:(length bstr)]. *)
2024-11-07 19:11:22 +00:00
val blit_to_bytes :
t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit
2024-11-08 11:08:05 +00:00
(** [blit_to_bytes src ~src_off dst ~dst_off ~len] copies [len] bytes from
[src], starting at index [src_off], to byte sequence [dst], starting at
index [dst_off].
@raise Invalid_argument
if [src_off] and [len] do not designate a valid range of [src], or if
[dst_off] and [len] do not designate a valid range of [dst]. *)
2024-11-07 19:11:22 +00:00
val is_empty : t -> bool
2024-11-08 14:21:37 +00:00
(** [is_empty bstr] is [length bstr = 0]. *)
2024-11-07 19:11:22 +00:00
val is_prefix : affix:string -> t -> bool
2024-11-08 14:21:37 +00:00
(** [is_prefix ~affix bstr] is [true] iff [affix.[idx] = bstr.{idx}] for all
indices [idx] of [affix]. *)
2024-11-07 19:11:22 +00:00
val is_infix : affix:string -> t -> bool
2024-11-08 14:21:37 +00:00
(** [is_infix ~affix bstr] is [true] iff there exists an index [j] in [bstr]
such that for all indices [i] of [affix] we have
[affix.[i] = bstr.{j + i}]. *)
2024-11-08 14:21:37 +00:00
2024-11-07 19:11:22 +00:00
val is_suffix : affix:string -> t -> bool
2024-11-08 14:21:37 +00:00
(** [is_suffix ~affix bstr] is [true] iff [affix.[n - idx] = bstr.{m - idx}]
for all indices [idx] of [affix] with [n = String.length affix - 1] and
[m = length bstr - 1]. *)
2024-11-07 19:11:22 +00:00
val for_all : (char -> bool) -> t -> bool
(** [for_all p bstr] is [true] iff for all indices [idx] of [bstr],
[p bstr.{idx} = true]. *)
2024-11-08 14:21:37 +00:00
2024-11-07 19:11:22 +00:00
val exists : (char -> bool) -> t -> bool
2024-11-08 14:21:37 +00:00
(** [exists p bstr] is [true] iff there exists an index [idx] of [bstr] with
[p bstr.{idx} = true]. *)
2024-11-07 19:11:22 +00:00
val equal : t -> t -> bool
2024-11-08 14:21:37 +00:00
(** [equal a b] is [a = b]. *)
2024-11-07 19:11:22 +00:00
val with_range : ?first:int -> ?len:int -> t -> t
2024-11-08 14:21:37 +00:00
(** [with_range ~first ~len bstr] are the consecutive bytes of [bstr] whose
indices exist in the range \[[first];[first + len - 1]\].
[first] defaults to [0] and [len] to [max_int]. Note that [first] can be
any integer and [len] any positive integer. *)
2024-11-07 19:11:22 +00:00
val with_index_range : ?first:int -> ?last:int -> t -> t
2024-11-08 14:21:37 +00:00
(** [with_index_range ~first ~last bstr] are the consecutive bytes of [bstr]
whose indices exists in the range \[[first];[last]\].
[first] defaults to [0] and [last] to [length bstr - 1].
Note that both [first] and [last] can be any integer. If [first > last]
the interval is empty and the empty bigstring is returned. *)
2024-11-07 19:11:22 +00:00
val trim : ?drop:(char -> bool) -> t -> t
(** [trim ~drop bstr] is [bstr] with prefix and suffix bytes satisfying [drop]
in [bstr] removed. [drop] defaults to [fun chr -> chr = ' ']. *)
val span :
?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t * t
2024-11-08 14:21:37 +00:00
(** [span ~rev ~min ~max ~sat bstr] is [(l, r)] where:
- if [rev] is [false] (default), [l] is at least [min] and at most [max]
consecutive [sat] satisfying initial bytes of [bstr] or {!empty} if
there are no such bytes. [r] are the remaining bytes of [bstr].
- if [rev] is [true], [r] is at least [min] and at most [max] consecutive
[sat] satisfying final bytes of [bstr] or {!empty} if there are no such
bytes. [l] are the remaining bytes of [bstr].
2024-11-08 14:21:37 +00:00
If [max] is unspecified the span is unlimited. If [min] is unspecified it
defaults to [0]. If [min > max] the condition can't be satisfied and the
left or right span, depending on [rev], is always empty. [sat] defaults to
[Fun.const true].
@raise Invalid_argument if [max] or [min] is negative. *)
val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t
2024-12-26 14:44:17 +00:00
(** [take ~rev ~min ~max ~sat bstr] is the matching span of {!span} without
the remaining one. In other words:
{[
(if rev then snd else fst) @@ span ~rev ~min ~max ~sat bstr
]} *)
val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t
2024-12-26 14:44:17 +00:00
(** [drop ~rev ~min ~max ~sat bstr] is the remaining span of {!span} without
the matching span. In other words:
{[
(if rev then fst else snd) @@ span ~rev ~min ~max ~sat bstr
]} *)
2024-11-07 19:11:22 +00:00
end
type slice = private { offset: int; length: int; payload: Bstr.t }
(** A slice is an aligned segment of bytes (according to the [pagesize]
specified by the cache, see {!val:make}) with its absolute position into the
underlying {i block-device} and size. *)
val pp_slice : Format.formatter -> slice -> unit
2024-12-26 14:44:17 +00:00
(** Pretty-printer of {!type:slice}s. *)
2024-11-07 19:11:22 +00:00
val bstr_of_slice : ?logical_address:int -> slice -> Bstr.t
2024-12-26 14:44:17 +00:00
(** [bstr_of_slice ?logical_address slice] returns a read-only {i bigstring}
according the given [slice] and optionnaly the [logical_address].
@raise Invalid_argument
if the given [logical_address] does not correspond to the given [slice].
*)
2024-11-07 19:11:22 +00:00
type 'fd map = 'fd -> pos:int -> int -> bigstring
(** A value [map : 'fd map] when applied [map fd ~pos len] reads a
{!type:bigstring} at [pos]. [map] must return as much data as is available,
though never more than [len] bytes. [map] never fails. Instead, an empty
[bigstring] must be returned if e.g. the position is out of range. Depending
on how the cache is configured (see {!val:make}), [map] never read more than
[pagesize] bytes. *)
2024-11-07 19:11:22 +00:00
2024-11-08 11:08:05 +00:00
(** {2 Note about schedulers and [Cachet].}
2024-11-07 19:11:22 +00:00
[Cachet] assumes that {!type:map} is {b atomic}, in other words: {!type:map}
is a unit of work that is indivisible and guaranteed to be executed as a
single, coherent, and uninterrupted operation.
In this way, the [map] function is considered as a "direct" computation that
does {b not} interact with a scheduler. However, reading a page can take
time. It may therefore be necessary to add a cooperation point after
2024-11-08 11:08:05 +00:00
{!val:load} or the {{!user_friendly} user-friendly functions}.
2024-11-07 19:11:22 +00:00
These functions can read one or more pages. {!val:load} reads one page at
most.
2024-11-08 11:40:10 +00:00
{2 Note about large file and [Cachet].}
For performance reasons, Cachet has chosen to use an [int] rather than an
[int64] for the offset (the logical address). On a 64-bit architecture,
addressing in the block device should not be a problem and Cachet is able to
manage large block devices. However, on a 32-bit architecture, Cachet should
only be able to handle ~2 GB files.
2024-11-08 11:40:10 +00:00
We consider that it is up to the developer to check this:
{[
let _max_int31 = 2147483647L (* (1 lsl 31) - 1 *)
let () =
let fd = Unix.openfile "disk.img" Unix.[ O_RDONLY ] 0o644 in
let stat = Unix.LargeFile.fstat fd in
if Sys.word_size = 32 && stat.Unix.LargeFile.st_size > _max_int31
then failwith "Too big block-device";
...
]}
So that, as soon as possible, the user can find out whether or not the
program can handle large block-devices. *)
2024-11-07 19:11:22 +00:00
type 'fd t
2024-12-26 14:44:17 +00:00
(** Type of cachet's values. *)
2024-11-07 19:11:22 +00:00
val fd : 'fd t -> 'fd
2024-12-26 14:44:17 +00:00
(** [fd t] is the abstract {i file-descriptor} used by [t] (and specified on
{!make}). *)
2024-11-26 16:01:50 +00:00
val pagesize : 'fd t -> int
2024-12-26 14:44:17 +00:00
(** [pagesize t] is the {i page-size} used by [t] (and specified on {!make}). *)
2024-11-07 19:11:22 +00:00
val map : 'fd t -> pos:int -> int -> Bstr.t
(** [map t ~pos len] returns a {!type:Bstr.t} which corresponds to a slice of
the {i block-device}. If this slice is smaller than or equal to a
{!val:pagesize}, the cache system is used to obtain the page and apply
{!val:Bstr.sub} to it (in other words, only a small allocation is made).
Otherwise, the {i syscall} {!type:map} is used.
Regardless of the expected position [pos] or size [len], this function will
call the {i syscall} {!type:map} as the last analysis, with a position
aligned with the {!val:pagesize} and a size aligned with the
{!val:pagesize}. *)
2024-11-07 19:11:22 +00:00
val cache_hit : 'fd t -> int
(** [cache_hit t] is the number of times a load hit the cache. *)
val cache_miss : 'fd t -> int
(** [cache_miss t] is the number of times a load didn't hit the cache. *)
val copy : 'fd t -> 'fd t
(** [copy t] creates a new, empty cache using the same [map] function. *)
val make : ?cachesize:int -> ?pagesize:int -> map:'fd map -> 'fd -> 'fd t
(** [make ~cachesize ~pagesize ~map fd] creates a new, empty cache using [map]
and [fd] for reading [pagesize] bytes. The size of the cache is [cachesize].
@raise Invalid_argument
if either [cachesize] or [pagesize] is not a power of two. *)
2024-11-07 19:11:22 +00:00
val load : 'fd t -> ?len:int -> int -> slice option
(** [load t ~len logical_address] loads a page at the given [logical_address]
and returns a {!type:slice}. [len] (defaults to [1]) is the expected minimum
number of bytes returned.
2024-11-07 19:11:22 +00:00
If the slice does not contains, at least, [len] bytes, [load] returns
[None]. [load t ~len:0 logical_address] always returns an empty slice. *)
2024-11-07 19:11:22 +00:00
val invalidate : 'fd t -> off:int -> len:int -> unit
(** [invalidate t ~off ~len] invalidates the cache on [len] bytes from [off]. *)
2024-11-26 16:01:50 +00:00
val is_cached : 'fd t -> int -> bool
(** [is_cached t logical_address] returns [true] if the [logicial_address]
requested is available in the cache, otherwise [false]. *)
2024-11-08 11:08:05 +00:00
(** {2:user_friendly User friendly functions.} *)
2024-11-07 19:11:22 +00:00
(** {3 Binary decoding of integers.}
The functions in this section binary decode integers from byte sequences.
All following functions raise [Invalid_argument] if the space needed at
index [i] to decode the integer is not available.
Little-endian (resp. big-endian) encoding means that least (resp. most)
significant bytes are stored first. Big-endian is also known as network byte
order. Native-endian encoding is either little-endian or big-endian
depending on {!Sys.big_endian}.
32-bit and 64-bit integers are represented by the [int] type, which has more
bits than the binary encoding. Functions that decode signed (resp. unsigned)
8-bit or 16-bit integers represented by [int] values sign-extend (resp.
zero-extend) their result. *)
exception Out_of_bounds of int
(** If Cachet tries to retrieve a byte outside the block device, this exception
is raised. *)
2024-11-07 19:11:22 +00:00
val get_int8 : 'fd t -> int -> int
2024-11-08 10:05:23 +00:00
(** [get_int8 t logical_address] is [t]'s signed 8-bit integer starting at byte
index [logical_address].
@raise Out_of_bounds if [logical_address] is not accessible. *)
2024-11-08 10:05:23 +00:00
2024-11-07 19:11:22 +00:00
val get_uint8 : 'fd t -> int -> int
2024-11-08 11:40:10 +00:00
(** [get_uint8 t logical_address] is [t]'s unsigned 8-bit integer starting at
byte index [logical_address].
@raise Out_of_bounds if [logical_address] is not accessible. *)
2024-11-08 10:05:23 +00:00
2024-11-07 19:11:22 +00:00
val get_uint16_ne : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_uint16_ne t i] is [t]'s native-endian unsigned 16-bit integer starting
at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_uint16_le : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_uint16_le t i] is [t]'s little-endian unsigned 16-bit integer starting
at byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_uint16_be : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_uint16_be t i] is [t]'s big-endian unsigned 16-bit integer starting at
byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_ne : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_int16_be t i] is [t]'s native-endian signed 16-bit integer starting at
byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_le : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_int16_le t i] is [t]'s little-endian signed 16-bit integer starting at
byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int16_be : 'fd t -> int -> int
2024-12-26 14:44:17 +00:00
(** [get_int16_be t i] is [t]'s big-endian signed 16-bit integer starting at
byte index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int32_ne : 'fd t -> int -> int32
2024-12-26 14:44:17 +00:00
(** [get_int32_ne t i] is [t]'s native-endian 32-bit integer starting at byte
index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int32_le : 'fd t -> int -> int32
2024-12-26 14:44:17 +00:00
(** [get_int32_le t i] is [t]'s little-endian 32-bit integer starting at byte
index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int32_be : 'fd t -> int -> int32
2024-12-26 14:44:17 +00:00
(** [get_int32_be t i] is [t]'s big-endian 32-bit integer starting at byte index
[i]. *)
2024-11-07 19:11:22 +00:00
val get_int64_ne : 'fd t -> int -> int64
2024-12-26 14:44:17 +00:00
(** [get_int64_ne t i] is [t]'s native-endian 64-bit integer starting at byte
index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int64_le : 'fd t -> int -> int64
2024-12-26 14:44:17 +00:00
(** [get_int64_le t i] is [t]'s little-endian 64-bit integer starting at byte
index [i]. *)
2024-11-07 19:11:22 +00:00
val get_int64_be : 'fd t -> int -> int64
2024-12-26 14:44:17 +00:00
(** [get_int64_be t i] is [t]'s big-endian 64-bit integer starting at byte index
[i]. *)
2024-11-08 11:08:05 +00:00
2024-11-07 19:11:22 +00:00
val get_string : 'fd t -> len:int -> int -> string
2024-11-08 11:08:05 +00:00
(** [get_string t ~len logical_address] loads the various pages needed from the
cache or using [map] to copy [len] bytes available at [off].
You can use {!val:syscalls} to find out how many times [get_string] can call
[map] at most.
2024-11-08 11:08:05 +00:00
@raise Out_of_bounds
if [logical_address] and [len] byte(s) are not accessible. *)
2024-11-08 11:08:05 +00:00
2024-11-07 19:11:22 +00:00
val get_seq : 'fd t -> int -> string Seq.t
2024-12-26 14:44:17 +00:00
(** [get_seq t off] returns a [string Seq.t] which loads various pages until the
end of the underlying {i block-device} and starting at [off]. *)
2024-11-07 19:11:22 +00:00
val next : 'fd t -> slice -> slice option
2024-12-26 14:44:17 +00:00
(** [next t slice] returns the next slice from the {i block-device} after the
given one [slice]. *)
2024-11-07 19:11:22 +00:00
val iter : 'fd t -> ?len:int -> fn:(int -> unit) -> int -> unit
2024-12-26 14:44:17 +00:00
(** [iter t ?len ~fn off] iters on each bytes until [len] (or the end of the
{i block-device} if it's not specified and starting at [off]. *)
2024-11-07 19:11:22 +00:00
val blit_to_bytes :
'fd t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit
(** [blit_to_bytes t ~src_off dst ~dst_off ~len] copies [len] bytes from the
cached {i block-device} represented by [t], starting at index [src_off] as
the logical address, to byte sequence [dst], starting at index [dst_off].
2024-11-08 11:08:05 +00:00
This function can read several pages depending on the size of the [dst]
buffer.
@raise Invalid_argument
if [src_off] and [len] do not designate a valid range of the
{i block-device}, or if [dst_off] and [len] do not designate a valid range
of [dst]. *)
2024-11-08 11:08:05 +00:00
val syscalls : 'fd t -> logical_address:int -> len:int -> int
(** [syscalls t ~logicial_address ~len] returns the maximum number (if the cache
is empty) of calls to [map] to load a segment of the block-device according
to the [logical_address] and the size [len] (in bytes) of the segment. *)