(** A small library for manipulating bigstrings. To clarify the use of bigstrings in OCaml, we advise you to read the overview of bigstrings and the difference with bytes. After this theoretical reading, this module offers a whole host of useful functions for manipulating bigstrings. {1:overview Overview.} A bigstring is a special kind of memory area in the OCaml world. Unlike bytes, bigstrings are allocated via [malloc()] or are available via [Unix.map_file]. They therefore exist outside the space normally allocated for OCaml with regard to all its values. So there are some particularities to the use of bigstrings. The first thing to understand about bigstrings is that allocating them can take time. Since a bigstring is obtained either by [malloc()] or by [Unix.map_file], the former is a performance hit on the [malloc()] used (which also depends on the fragmentation of the C heap) and the latter is a system call that can interact with your file system. By way of comparison, a byte of less than 2048 bytes requires only 3 processor instructions to exist and be available — beyond that, the bytes is allocated in the major heap. It is therefore advisable to allocate just a few bigstrings and reuse them throughout your application. It's even advisable to allocate large bigstrings. A particularity of bigstrings is that they cannot be moved by the Garbage Collector. Existing in a space other than that of OCaml (the C heap), they don't move. With this advantage in mind, we can imagine several situations where we'd like a memory zone that doesn't move: - a bigstring can be manipulated by several threads/domains. Of course, parallel accesses must be protected, but you can be sure that the bigstring will not move throughout the process. Thus, its location in memory can be shared by several computing units. - it may be necessary, in system programming, to write to a particular zone in order to interact with a device. In this case, the bigstring can be found as an OCaml value bridging a special memory area (such as the framebuffer). A final feature of bigstring is that it can be seen as a slice. You can have another view of a bigstring that would be equally smaller. For example, the {!val:sub} operation in particular doesn't copy your bigstring, but offers you a "proxy" accessing the same memory area as the original bigstring. {1:pkt Encode & Decode packets.} In order to encode or decode packets (such as ARP or DNS packets), Bstr offers a small API for converting a slice of bytes from a {!val:Bstr.t} to a user-defined variant or record. *) type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t val memcpy : t -> src_off:int -> t -> dst_off:int -> len:int -> unit (** [memcpy src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to [dst]. [src] must not overlap [dst]. Use {!val:memmove} if [src] & [dst] do overlap. *) val memmove : t -> src_off:int -> t -> dst_off:int -> len:int -> unit (** [memmove src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to [dst]. [src] and [dst] may overlap: copying takes place as though the bytes in [src] are first copied into a temporary array that does not overlap [src] or [dst], and the bytes are then copied from the temporary array to [dst]. *) val memcmp : t -> src_off:int -> t -> dst_off:int -> len:int -> int val memchr : t -> off:int -> len:int -> char -> int val memset : t -> off:int -> len:int -> char -> unit val empty : t (** [empty] is an empty bigstring. *) val length : t -> int (** [length bstr] is the number of bytes in [bstr]. *) val get : t -> int -> char (** [get bstr i] is the byte of [bstr]' at index [i]. This is equivalent to the [bstr.{i}] notation. @raise Invalid_argument if [i] is not an index of [bstr]. *) val create : int -> t val make : int -> char -> t val of_string : string -> t val fill : t -> off:int -> len:int -> char -> unit val blit : t -> src_off:int -> t -> dst_off:int -> len:int -> unit val blit_from_string : string -> src_off:int -> t -> dst_off:int -> len:int -> unit val blit_from_bytes : bytes -> src_off:int -> t -> dst_off:int -> len:int -> unit (* val init : int -> (int -> char) -> t val copy : t -> t val extend : t -> int -> int -> t val concat : t -> t list -> t val cat : t -> t -> t val iter : (char -> unit) -> t -> unit val iteri : (int -> char -> unit) -> t -> unit val map : (char -> char) -> t -> t val mapi : (int -> char -> char) -> t -> t val fold_left : ('acc -> char -> 'acc) -> 'acc -> t -> 'acc val fold_right : (char -> 'acc -> 'acc) -> t -> 'acc -> 'acc val index : t -> ?rev:bool -> ?from:int -> char -> int val contains : t -> ?rev:bool -> ?from:int -> char -> bool val compare : t -> t -> int val starts_with : prefix:string -> t -> bool val ends_with : suffix:string -> t -> bool val overlap : t -> t -> (int * int * int) option *) val get_int8 : t -> int -> int (** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index [i]. *) val get_uint8 : t -> int -> int (** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte index [i]. *) val get_uint16_ne : t -> int -> int (** [get_int16_ne bstr i] is [bstr]'s native-endian unsigned 16-bit integer starting at byte index [i]. *) val get_uint16_le : t -> int -> int (** [get_int16_le bstr i] is [bstr]'s little-endian unsigned 16-bit integer starting at byte index [i]. *) val get_uint16_be : t -> int -> int (** [get_int16_be bstr i] is [bstr]'s big-endian unsigned 16-bit integer starting at byte index [i]. *) val get_int16_ne : t -> int -> int (** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer starting at byte index [i]. *) val get_int16_le : t -> int -> int (** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer starting at byte index [i]. *) val get_int16_be : t -> int -> int (** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer starting at byte index [i]. *) val get_int32_ne : t -> int -> int32 (** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting at byte index [i]. *) val get_int32_le : t -> int -> int32 (** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting at byte index [i]. *) val get_int32_be : t -> int -> int32 (** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at byte index [i]. *) val get_int64_ne : t -> int -> int64 (** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting at byte index [i]. *) val get_int64_le : t -> int -> int64 (** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting at byte index [i]. *) val get_int64_be : t -> int -> int64 (** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at byte index [i]. *) val set : t -> int -> char -> unit val set_int8 : t -> int -> int -> unit val set_uint8 : t -> int -> int -> unit val set_uint16_ne : t -> int -> int -> unit val set_uint16_le : t -> int -> int -> unit val set_uint16_be : t -> int -> int -> unit val set_int16_ne : t -> int -> int -> unit val set_int16_le : t -> int -> int -> unit val set_int16_be : t -> int -> int -> unit val set_int32_ne : t -> int -> int32 -> unit val set_int32_le : t -> int -> int32 -> unit val set_int32_be : t -> int -> int32 -> unit val set_int64_ne : t -> int -> int64 -> unit val set_int64_le : t -> int -> int64 -> unit val set_int64_be : t -> int -> int64 -> unit val unsafe_set : t -> int -> char -> unit val sub : t -> off:int -> len:int -> t (** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a new view into [bstr] starting at [off], and with length [len]. {b Note} that this does not allocate a new buffer, but instead shares the buffer of [bstr] with the newly-returned bigstring. *) val overlap : t -> t -> (int * int * int) option val sub_string : t -> off:int -> len:int -> string (** [sub_string bstr ~off ~len] returns a string of length [len] containing the bytes of [t] starting at [off]. *) val to_string : t -> string (** [to_string bstr] is equivalent to [sub_string bstr ~off:0 ~len:(length bstr)]. *) val blit_to_bytes : t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit (** [blit_to_bytes src ~src_off dst ~dst_off ~len] copies [len] bytes from [src], starting at index [src_off], to byte sequence [dst], starting at index [dst_off]. @raise Invalid_argument if [src_off] and [len] do not designate a valid range of [src], or if [dst_off] and [len] do not designate a valid range of [dst]. *) val is_empty : t -> bool (** [is_empty bstr] is [length bstr = 0]. *) val is_prefix : affix:string -> t -> bool (** [is_prefix ~affix bstr] is [true] iff [affix.[idx] = bstr.{idx}] for all indices [idx] of [affix]. *) val is_infix : affix:string -> t -> bool (** [is_infix ~affix bstr] is [true] iff there exists an index [j] in [bstr] such that for all indices [i] of [affix] we have [affix.[i] = bstr.{j + i}]. *) val is_suffix : affix:string -> t -> bool (** [is_suffix ~affix bstr] is [true] iff [affix.[n - idx] = bstr.{m - idx}] for all indices [idx] of [affix] with [n = String.length affix - 1] and [m = length bstr - 1]. *) val for_all : (char -> bool) -> t -> bool (** [for_all p bstr] is [true] iff for all indices [idx] of [bstr], [p bstr.{idx} = true]. *) val exists : (char -> bool) -> t -> bool (** [exists p bstr] is [true] iff there exists an index [idx] of [bstr] with [p bstr.{idx} = true]. *) val equal : t -> t -> bool (** [equal a b] is [a = b]. *) val with_range : ?first:int -> ?len:int -> t -> t (** [with_range ~first ~len bstr] are the consecutive bytes of [bstr] whose indices exist in the range \[[first];[first + len - 1]\]. [first] defaults to [0] and [len] to [max_int]. Note that [first] can be any integer and [len] any positive integer. *) val with_index_range : ?first:int -> ?last:int -> t -> t (** [with_index_range ~first ~last bstr] are the consecutive bytes of [bstr] whose indices exists in the range \[[first];[last]\]. [first] defaults to [0] and [last] to [length bstr - 1]. Note that both [first] and [last] can be any integer. If [first > last] the interval is empty and the empty bigstring is returned. *) val trim : ?drop:(char -> bool) -> t -> t (** [trim ~drop bstr] is [bstr] with prefix and suffix bytes satisfying [drop] in [bstr] removed. [drop] defaults to [fun chr -> chr = ' ']. *) val span : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t * t (** [span ~rev ~min ~max ~sat bstr] is [(l, r)] where: - if [rev] is [false] (default), [l] is at least [min] and at most [max] consecutive [sat] satisfying initial bytes of [bstr] or {!empty} if there are no such bytes. [r] are the remaining bytes of [bstr]. - if [rev] is [true], [r] is at least [min] and at most [max] consecutive [sat] satisfying final bytes of [bstr] or {!empty} if there are no such bytes. [l] are the remaining bytes of [bstr]. If [max] is unspecified the span is unlimited. If [min] is unspecified it defaults to [0]. If [min > max] the condition can't be satisfied and the left or right span, depending on [rev], is always empty. [sat] defaults to [Fun.const true]. @raise Invalid_argument if [max] or [min] is negative. *) val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t (** [take ~rev ~min ~max ~sat bstr] is the matching span of {!span} without the remaining one. In other words: {[ (if rev then snd else fst) (span ~rev ~min ~max ~sat bstr) ]} *) val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t (** [drop ~rev ~min ~max ~sat bstr] is the remaining span of {!span} without the matching span. In other words: {[ (if rev then fst else snd) (span ~rev ~min ~max ~sat bstr) ]} *) val shift : t -> int -> t (** [shift bstr n] is [sub bstr n (length bstr - n)]. *) val split_on_char : char -> t -> t list val to_seq : t -> char Seq.t val to_seqi : t -> (int * char) Seq.t val of_seq : char Seq.t -> t module Pkt : sig type bigstring = t type 'a t val char : char t val uint8 : int t val int8 : int t val beuint16 : int t val leuint16 : int t val neuint16 : int t val beint16 : int t val leint16 : int t val neint16 : int t val beint32 : int32 t val leint32 : int32 t val neint32 : int32 t val beint64 : int64 t val leint64 : int64 t val neint64 : int64 t val varint31 : int t val varint63 : int t val bytes : int -> string t val cstring : string t val until : char -> string t (* {2:records Records.} {[ type header = { version : int32 ; number : int32 } let _PACK = 0x5041434bl let header = record (fun pack version number -> if pack <> _PACK then invalid_arg "Invalid PACK file"; { version; number }) |+ field beint32 (fun _ -> _PACK) |+ field beint32 (fun t -> t.version) |+ field beint32 (fun t -> t.number) |> sealr ]} *) type ('a, 'b, 'c) open_record (** The type for representing open records of type ['a] with a constructor of ['b]. ['c] represents the remaining fields to be described using the {!val:(|+)} operator. An open record initially stisfies ['c = 'b] and can be {{!val:sealr} sealed} once ['c = 'a]. *) val record : 'b -> ('a, 'b, 'b) open_record (** [record f] is an incomplete representation of the record of type ['a] with constructor [f]. To complete the representation, add fields with {!val:(|+)} and then seal the record with {!val:sealr}. *) type ('a, 'b) field (** The type for fields holding values of type ['b] and belonging to a record of type ['a]. *) val field : 'a t -> ('b -> 'a) -> ('b, 'a) field (** [field n t g] is the representation of the field called [n] of type [t] with getter [g]. For instance: {[ type t = { foo: string } let foo = field cstring (fun t -> t.foo) ]} *) val ( |+ ) : ('a, 'b, 'c -> 'd) open_record -> ('a, 'c) field -> ('a, 'b, 'd) open_record (** [r |+ f] is the open record [r] augmented with the field [f]. *) val sealr : ('a, 'b, 'a) open_record -> 'a t (** [sealr r] seals the open record [r]. *) (** {2:variants Variants.} {[ type t = Foo | Bar of string let t = variant (fun foo bar -> function Foo -> foo | Bar s -> bar s) |~ case0 Foo |~ case1 cstring (fun x -> Bar x) |> sealv ]} *) type ('a, 'b, 'c) open_variant (** The type for representing open variants of type ['a] with pattern-matching of type ['b]. ['c] represents the remaining constructors to be described using the {!val:(|~)} operator. An open variant initially satisfies ['c = 'b] and can be {{!val:sealv} sealed} once ['c = 'a]. *) val variant : 'b -> ('a, 'b, 'b) open_variant (** [variant n p] is an incomplete representation of the variant type called [n] of type ['a] using [p] to deconstruct values. To complete the representation, add cases with {!val:(|~)} and then seal the variant with {!val:sealv}. *) type ('a, 'b) case (** The type for representing variant cases of type ['a] with patterns of type ['b]. *) type 'a case_p (** The type for representing patterns for a variant of type ['a]. *) val case0 : 'a -> ('a, 'a case_p) case (** [case0 v] is a representation of a variant constructor [v] with no arguments. For instance: {[ type t = Foo let foo = case0 Foo ]} *) val case1 : 'b t -> ('b -> 'a) -> ('a, 'b -> 'a case_p) case (** [case1 n t c] is a representation of a variant constructor [c] with an argument of type [t]. For instances: {[ type t = Foo of string let foo = case1 cstring (fun s -> Foo s) ]} *) val ( |~ ) : ('a, 'b, 'c -> 'd) open_variant -> ('a, 'c) case -> ('a, 'b, 'd) open_variant (** [v |~ c] is the open variant [v] augmented with the case [c]. *) val sealv : ('a, 'b, 'a -> 'a case_p) open_variant -> 'a t (** [sealv v] seals the open variant [v]. *) (* {2:decoder Decoder.} *) val decode : 'a t -> bigstring -> int ref -> 'a end