First commit

This commit is contained in:
Calascibetta Romain 2025-01-15 16:43:37 +01:00
commit 158be8641a
12 changed files with 1936 additions and 0 deletions

20
LICENSE.md Normal file
View file

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2024 Romain Calascibetta
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

1
README.md Normal file
View file

@ -0,0 +1 @@
# Bstr, the new `Cstruct`

18
bstr.opam Normal file
View file

@ -0,0 +1,18 @@
opam-version: "2.0"
name: "bstr"
maintainer: [ "Romain Calascibetta <romain.calascibetta@gmail.com>" ]
authors: [ "Romain Calascibetta <romain.calascibetta@gmail.com>" ]
homepage: "https://git.robur.coop/robur/bstr"
bug-reports: "https://git.robur.coop/robur/bstr"
dev-repo: "git+https://github.com/robur-coop/bstr"
doc: "https://robur-coop.github.io/bstr/"
license: "MIT"
build: [ "dune" "build" "-p" name "-j" jobs ]
run-test: [ "dune" "runtest" "-p" name "-j" jobs ]
depends: [
"ocaml" {>= "4.14.0"}
"dune" {>= "3.5.0"}
]
x-maintenance-intent: [ "(latest)" ]

2
dune-project Normal file
View file

@ -0,0 +1,2 @@
(lang dune 2.0)
(name bstr)

141
lib/bstr.c Normal file
View file

@ -0,0 +1,141 @@
#include <caml/alloc.h>
#include <caml/bigarray.h>
#include <caml/m.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <string.h>
CAMLprim value bstr_bytecode_ptr(value va) {
CAMLparam1(va);
CAMLlocal1(res);
struct caml_ba_array *a = Caml_ba_array_val(va);
void *src_a = a->data;
res = caml_copy_nativeint((intnat)src_a);
CAMLreturn(res);
}
intnat bstr_native_ptr(value va) {
struct caml_ba_array *a = Caml_ba_array_val(va);
return ((intnat)a->data);
}
#define bstr_uint8_off(ba, off) ((uint8_t *)Caml_ba_data_val(ba) + off)
void bstr_native_memcpy(value src, intnat src_off, value dst, intnat dst_off,
intnat len) {
memcpy(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len);
}
CAMLprim value bstr_bytecode_memcpy(value src, value src_off, value dst,
value dst_off, value len) {
CAMLparam5(src, src_off, dst, dst_off, len);
bstr_native_memcpy(src, Unsigned_long_val(src_off), dst,
Unsigned_long_val(dst_off), Unsigned_long_val(len));
CAMLreturn(Val_unit);
}
void bstr_native_memmove(value src, intnat src_off, value dst, intnat dst_off,
intnat len) {
memmove(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len);
}
CAMLprim value bstr_bytecode_memmove(value src, value src_off, value dst,
value dst_off, value len) {
CAMLparam5(src, src_off, dst, dst_off, len);
bstr_native_memmove(src, Unsigned_long_val(src_off), dst,
Unsigned_long_val(dst_off), Unsigned_long_val(len));
CAMLreturn(Val_unit);
}
intnat bstr_native_memcmp(value src, intnat src_off, value dst, intnat dst_off,
intnat len) {
intnat res;
res = memcmp(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len);
return (res);
}
CAMLprim value bstr_bytecode_memcmp(value src, value src_off, value dst,
value dst_off, value len) {
CAMLparam5(src, src_off, dst, dst_off, len);
intnat res;
res = bstr_native_memcmp(src, Unsigned_long_val(src_off), dst,
Unsigned_long_val(dst_off), Unsigned_long_val(len));
CAMLreturn(Val_long(res));
}
#define __MEM1(name) \
intnat bstr_native_##name(value src, intnat src_off, intnat src_len, \
intnat va) { \
void *res = name(bstr_uint8_off(src, src_off), va, src_len); \
if (res == NULL) \
return (-1); \
\
return ((intnat)(res - src)); \
} \
\
CAMLprim value bstr_bytecode_##name(value src, value src_off, value src_len, \
value va) { \
CAMLparam4(src, src_off, src_len, va); \
intnat res; \
res = \
bstr_native_##name(src, Unsigned_long_val(src_off), \
Unsigned_long_val(src_len), Unsigned_long_val(va)); \
CAMLreturn(Val_long(res)); \
}
__MEM1(memset)
__MEM1(memchr)
/* This function is **only** useful when accessing to a bigstring for an
* architecture requiring alignment **and** for an OCaml executable in
* bytecode. It concerns only 32-bits architectures.
*/
uint64_t bstr_native_get64u(value va, intnat off) {
#ifdef ARCH_ALIGN_INT64
char b0, b1, b2, b3, b4, b5, b6, b7;
#endif
struct caml_ba_array *a = Caml_ba_array_val(va);
void *addr = &((unsigned char *)a->data)[off];
uint64_t res;
#ifdef ARCH_ALIGN_INT64
if (!((size_t)addr) & 0x7)
res = *((uint64_t *)addr);
else {
b0 = ((unsigned char *)a->data)[off];
b1 = ((unsigned char *)a->data)[off + 1];
b2 = ((unsigned char *)a->data)[off + 2];
b3 = ((unsigned char *)a->data)[off + 3];
b4 = ((unsigned char *)a->data)[off + 4];
b5 = ((unsigned char *)a->data)[off + 5];
b6 = ((unsigned char *)a->data)[off + 6];
b7 = ((unsigned char *)a->data)[off + 7];
#ifdef ARCH_BIG_ENDIAN
res = (uint64_t)b0 << 56 | (uint64_t)b1 << 48 | (uint64_t)b2 << 40 |
(uint64_t)b3 << 32 | (uint64_t)b4 << 24 | (uint64_t)b5 << 16 |
(uint64_t)b6 << 8 | (uint64_t)b7;
#else
res = (uint64_t)b7 << 56 | (uint64_t)b6 << 48 | (uint64_t)b5 << 40 |
(uint64_t)b4 << 32 | (uint64_t)b3 << 24 | (uint64_t)b2 << 16 |
(uint64_t)b1 << 8 | (uint64_t)b0;
#endif
}
#else
res = *((uint64_t *)addr);
#endif
return (res);
}
CAMLprim value bstr_bytecode_get64u(value va, value off) {
CAMLparam2(va, off);
CAMLlocal1(res);
uint64_t val = bstr_native_get64u(va, Unsigned_long_val(off));
res = caml_copy_int64(val);
CAMLreturn(res);
}

948
lib/bstr.ml Normal file
View file

@ -0,0 +1,948 @@
type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
module Bytes = struct
include Bytes
external unsafe_get_uint8 : bytes -> int -> int = "%bytes_unsafe_get"
external unsafe_set_uint8 : bytes -> int -> int -> unit = "%bytes_unsafe_set"
external unsafe_get_int32_ne : bytes -> int -> int32 = "%caml_bytes_get32u"
external unsafe_set_int32_ne : bytes -> int -> int32 -> unit
= "%caml_bytes_set32u"
end
external swap16 : int -> int = "%bswap16"
external swap32 : int32 -> int32 = "%bswap_int32"
external swap64 : int64 -> int64 = "%bswap_int64"
external get_uint8 : t -> int -> int = "%caml_ba_ref_1"
external set_uint8 : t -> int -> int -> unit = "%caml_ba_set_1"
external get_uint16_ne : t -> int -> int = "%caml_bigstring_get16"
external set_int16_ne : t -> int -> int -> unit = "%caml_bigstring_set16"
external get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32"
external set_int32_ne : t -> int -> int32 -> unit = "%caml_bigstring_set32"
external set_int64_ne : t -> int -> int64 -> unit = "%caml_bigstring_set64"
external unsafe_get_uint8 : t -> int -> int = "%caml_ba_unsafe_ref_1"
external unsafe_set_uint8 : t -> int -> int -> unit = "%caml_ba_unsafe_set_1"
external unsafe_get_uint16_ne : t -> int -> int = "%caml_bigstring_get16u"
external unsafe_set_uint16_ne : t -> int -> int -> unit
= "%caml_bigstring_set16u"
external unsafe_get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32u"
external unsafe_set_int32_ne : t -> int -> int32 -> unit
= "%caml_bigstring_set32u"
external unsafe_get_int64_ne : t -> (int[@untagged]) -> (int64[@unboxed])
= "bstr_bytecode_get64u" "bstr_native_get64u"
[@@noalloc]
external _unsafe_set_int64_ne : t -> int -> int64 -> unit
= "%caml_bigstring_set64u"
external unsafe_memcmp :
t
-> (int[@untagged])
-> t
-> (int[@untagged])
-> (int[@untagged])
-> (int[@untagged]) = "bstr_bytecode_memcmp" "bstr_native_memcmp"
[@@noalloc]
external unsafe_memcpy :
t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit
= "bstr_bytecode_memcpy" "bstr_native_memcpy"
[@@noalloc]
external unsafe_memmove :
t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit
= "bstr_bytecode_memmove" "bstr_native_memmove"
[@@noalloc]
external unsafe_memchr :
t
-> (int[@untagged])
-> (int[@untagged])
-> (int[@untagged])
-> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr"
[@@noalloc]
external unsafe_memset :
t
-> (int[@untagged])
-> (int[@untagged])
-> (int[@untagged])
-> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr"
[@@noalloc]
let memcmp src ~src_off dst ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > Bigarray.Array1.dim src - len
|| dst_off < 0
|| dst_off > Bigarray.Array1.dim dst - len
then invalid_arg "Bstr.memcmp";
unsafe_memcmp src src_off dst dst_off len
let memcpy src ~src_off dst ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > Bigarray.Array1.dim src - len
|| dst_off < 0
|| dst_off > Bigarray.Array1.dim dst - len
then invalid_arg "Bstr.memcpy";
unsafe_memcpy src src_off dst dst_off len
let memmove src ~src_off dst ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > Bigarray.Array1.dim src - len
|| dst_off < 0
|| dst_off > Bigarray.Array1.dim dst - len
then invalid_arg "Bstr.memmove";
unsafe_memmove src src_off dst dst_off len
let memchr src ~off ~len value =
if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then
invalid_arg "Bstr.memchr";
unsafe_memchr src off len (Char.code value)
let memset src ~off ~len value =
if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then
invalid_arg "Bstr.memset";
ignore (unsafe_memset src off len (Char.code value))
let empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0
let create len = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len
external length : t -> int = "%caml_ba_dim_1"
external get : t -> int -> char = "%caml_ba_ref_1"
external unsafe_get : t -> int -> char = "%caml_ba_unsafe_ref_1"
external set : t -> int -> char -> unit = "%caml_ba_set_1"
external unsafe_set : t -> int -> char -> unit = "%caml_ba_unsafe_set_1"
let unsafe_fill bstr ~off ~len v =
let nv = Nativeint.of_int v in
let vv = Nativeint.(logor (shift_left nv 8) nv) in
let vvvv = Nativeint.(logor (shift_left vv 16) vv) in
let vvvv = Nativeint.to_int32 vvvv in
let len0 = len land 3 in
let len1 = len lsr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
unsafe_set_int32_ne bstr (off + i) vvvv
done;
for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
unsafe_set_uint8 bstr (off + i) v
done
let fill bstr ~off ~len chr =
if len < 0 || off < 0 || off > length bstr - len then invalid_arg "Bstr.fill";
unsafe_fill bstr ~off ~len (Char.code chr)
let make len chr =
let bstr = create len in
unsafe_fill bstr ~off:0 ~len (Char.code chr);
(* [Obj.magic] instead of [Char.code]? *)
bstr
let get_int64_ne bstr idx =
if idx < 0 || idx > length bstr - 8 then invalid_arg "Bstr.get_int64_ne";
unsafe_get_int64_ne bstr idx
let get_int8 bstr i =
(get_uint8 bstr i lsl (Sys.int_size - 8)) asr (Sys.int_size - 8)
let get_uint16_le bstr i =
if Sys.big_endian then swap16 (get_uint16_ne bstr i) else get_uint16_ne bstr i
let get_uint16_be bstr i =
if not Sys.big_endian then swap16 (get_uint16_ne bstr i)
else get_uint16_ne bstr i
let _unsafe_get_uint16_le bstr i =
(* TODO(dinosaure): for unicode. *)
if Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i)
else unsafe_get_uint16_ne bstr i
let _unsafe_get_uint16_be bstr i =
(* TODO(dinosaure): for unicode. *)
if not Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i)
else unsafe_get_uint16_ne bstr i
let get_int16_ne bstr i =
(get_uint16_ne bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int16_le bstr i =
(get_uint16_le bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int16_be bstr i =
(get_uint16_be bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
let get_int32_le bstr i =
if Sys.big_endian then swap32 (get_int32_ne bstr i) else get_int32_ne bstr i
let get_int32_be bstr i =
if not Sys.big_endian then swap32 (get_int32_ne bstr i)
else get_int32_ne bstr i
let get_int64_le bstr i =
if Sys.big_endian then swap64 (get_int64_ne bstr i) else get_int64_ne bstr i
let get_int64_be bstr i =
if not Sys.big_endian then swap64 (get_int64_ne bstr i)
else get_int64_ne bstr i
let _unsafe_set_uint16_le bstr i x =
(* TODO(dinosaure): for unicode. *)
if Sys.big_endian then unsafe_set_uint16_ne bstr i (swap16 x)
else unsafe_set_uint16_ne bstr i x
let _unsafe_set_uint16_be bstr i x =
(* TODO(dinosaure): for unicode. *)
if Sys.big_endian then unsafe_set_uint16_ne bstr i x
else unsafe_set_uint16_ne bstr i (swap16 x)
let set_int16_le bstr i x =
if Sys.big_endian then set_int16_ne bstr i (swap16 x)
else set_int16_ne bstr i x
let set_int16_be bstr i x =
if not Sys.big_endian then set_int16_ne bstr i (swap16 x)
else set_int16_ne bstr i x
let set_int32_le bstr i x =
if Sys.big_endian then set_int32_ne bstr i (swap32 x)
else set_int32_ne bstr i x
let set_int32_be bstr i x =
if not Sys.big_endian then set_int32_ne bstr i (swap32 x)
else set_int32_ne bstr i x
let set_int64_le bstr i x =
if Sys.big_endian then set_int64_ne bstr i (swap64 x)
else set_int64_ne bstr i x
let set_int64_be bstr i x =
if not Sys.big_endian then set_int64_ne bstr i (swap64 x)
else set_int64_ne bstr i x
let set_int8 = set_uint8
let set_uint16_ne = set_int16_ne
let set_uint16_be = set_int16_be
let set_uint16_le = set_int16_le
external sub : t -> off:int -> len:int -> t = "caml_ba_sub"
let unsafe_blit src ~src_off dst ~dst_off ~len =
let len0 = len land 3 in
let len1 = len lsr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
let v = unsafe_get_int32_ne src (src_off + i) in
unsafe_set_int32_ne dst (dst_off + i) v
done;
for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
let v = unsafe_get_uint8 src (src_off + i) in
unsafe_set_uint8 dst (dst_off + i) v
done
let blit src ~src_off dst ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > length src - len
|| dst_off < 0
|| dst_off > length dst - len
then invalid_arg "Bstr.blit";
unsafe_blit src ~src_off dst ~dst_off ~len
let unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len =
let len0 = len land 3 in
let len1 = len lsr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
let v = unsafe_get_int32_ne bstr (src_off + i) in
Bytes.unsafe_set_int32_ne dst (dst_off + i) v
done;
for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
let v = unsafe_get_uint8 bstr (src_off + i) in
Bytes.unsafe_set_uint8 dst (dst_off + i) v
done
let unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len =
let len0 = len land 3 in
let len1 = len lsr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
let v = Bytes.unsafe_get_int32_ne src (src_off + i) in
unsafe_set_int32_ne bstr (dst_off + i) v
done;
for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
let v = Bytes.unsafe_get_uint8 src (src_off + i) in
unsafe_set_uint8 bstr (dst_off + i) v
done
let blit_from_bytes src ~src_off bstr ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > Bytes.length src - len
|| dst_off < 0
|| dst_off > length bstr - len
then invalid_arg "Bstr.blit_from_bytes";
unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len
let blit_from_string src ~src_off bstr ~dst_off ~len =
blit_from_bytes (Bytes.unsafe_of_string src) ~src_off bstr ~dst_off ~len
let blit_to_bytes bstr ~src_off dst ~dst_off ~len =
if
len < 0
|| src_off < 0
|| src_off > length bstr - len
|| dst_off < 0
|| dst_off > Bytes.length dst - len
then invalid_arg "Bstr.blit_to_bytes";
unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len
let of_string str =
let len = String.length str in
let bstr = create len in
unsafe_blit_from_bytes
(Bytes.unsafe_of_string str)
~src_off:0 bstr ~dst_off:0 ~len;
bstr
let unsafe_sub_string bstr src_off len =
let buf = Bytes.create len in
unsafe_blit_to_bytes bstr ~src_off buf ~dst_off:0 ~len;
Bytes.unsafe_to_string buf
let sub_string bstr ~off ~len =
if len < 0 || off < 0 || off > length bstr - len then
invalid_arg "Bstr.sub_string";
unsafe_sub_string bstr off len
let to_string bstr = unsafe_sub_string bstr 0 (length bstr)
let is_empty bstr = length bstr == 0
let is_prefix ~affix bstr =
let len_affix = String.length affix in
let len_bstr = length bstr in
if len_affix > len_bstr then false
else
let max_idx_affix = len_affix - 1 in
let rec go idx =
if idx > max_idx_affix then true
else if affix.[idx] != bstr.{idx} then false
else go (succ idx)
in
go 0
let is_infix ~affix bstr =
let len_affix = String.length affix in
let len_bstr = length bstr in
if len_affix > len_bstr then false
else
let max_idx_affix = len_affix - 1 in
let max_idx_bstr = len_bstr - len_affix in
let rec go idx k =
if idx > max_idx_bstr then false
else if k > max_idx_affix then true
else if k > 0 then
if affix.[k] == bstr.{idx + k} then go idx (succ k) else go (succ idx) 0
else if affix.[0] = bstr.{idx} then go idx 1
else go (succ idx) 0
in
go 0 0
let is_suffix ~affix bstr =
let max_idx_affix = String.length affix - 1 in
let max_idx_bstr = length bstr - 1 in
if max_idx_affix > max_idx_bstr then false
else
let rec go idx =
if idx > max_idx_affix then true
else if affix.[max_idx_affix - idx] != bstr.{max_idx_bstr - idx} then
false
else go (succ idx)
in
go 0
exception Break
let for_all sat bstr =
try
for idx = 0 to length bstr - 1 do
if sat (unsafe_get bstr idx) == false then raise_notrace Break
done;
true
with Break -> false
let exists sat bstr =
try
for idx = 0 to length bstr - 1 do
if sat (unsafe_get bstr idx) then raise_notrace Break
done;
false
with Break -> true
let equal a b =
if length a == length b then
try
let len = length a in
let len0 = len land 3 in
let len1 = len lsr 2 in
for i = 0 to len1 - 1 do
let i = i * 4 in
if unsafe_get_int32_ne a i <> unsafe_get_int32_ne b i then
raise_notrace Break
done;
for i = 0 to len0 - 1 do
let i = (len1 * 4) + i in
if unsafe_get_uint8 a i != unsafe_get_uint8 b i then raise_notrace Break
done;
true
with Break -> false
else false
let with_range ?(first = 0) ?(len = max_int) bstr =
if len < 0 then invalid_arg "Bstr.with_range";
if len == 0 then empty
else
let bstr_len = length bstr in
let max_idx = bstr_len - 1 in
let last =
match len with
| len when len = max_int -> max_idx
| len ->
let last = first + len - 1 in
if last > max_idx then max_idx else last
in
let first = if first < 0 then 0 else first in
if first = 0 && last = max_idx then bstr
else sub bstr ~off:first ~len:(last + 1 - first)
let with_index_range ?(first = 0) ?last bstr =
let bstr_len = length bstr in
let max_idx = bstr_len - 1 in
let last =
match last with
| None -> max_idx
| Some last -> if last > max_idx then max_idx else last
in
let first = if first < 0 then 0 else first in
if first > max_idx || last < 0 || first > last then empty
else if first == 0 && last = max_idx then bstr
else sub bstr ~off:first ~len:(last + 1 - first)
let is_white chr = chr == ' '
let trim ?(drop = is_white) bstr =
let len = length bstr in
if len == 0 then bstr
else
let max_idx = len - 1 in
let rec left_pos idx =
if idx > max_idx then len
else if drop bstr.{idx} then left_pos (succ idx)
else idx
in
let rec right_pos idx =
if idx < 0 then 0
else if drop bstr.{idx} then right_pos (pred idx)
else succ idx
in
let left = left_pos 0 in
if left = len then empty
else
let right = right_pos max_idx in
if left == 0 && right == len then bstr
else sub bstr ~off:left ~len:(right - left)
let fspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.fspan";
if max < 0 then invalid_arg "Bstr.fspan";
if min > max || max == 0 then (empty, bstr)
else
let len = length bstr in
let max_idx = len - 1 in
let max_idx =
let k = max - 1 in
if k > max_idx then max_idx else k
in
let need_idx = min in
let rec go idx =
if idx <= max_idx && sat bstr.{idx} then go (succ idx)
else if idx < need_idx || idx == 0 then (empty, bstr)
else if idx == len then (bstr, empty)
else (sub bstr ~off:0 ~len:idx, sub bstr ~off:idx ~len:(len - idx))
in
go 0
let rspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.rspan";
if max < 0 then invalid_arg "Bstr.rspan";
if min > max || max == 0 then (bstr, empty)
else
let len = length bstr in
let max_idx = len - 1 in
let min_idx =
let k = len - max in
if k < 0 then 0 else k
in
let need_idx = max_idx - min in
let rec go idx =
if idx >= min_idx && sat bstr.{idx} then go (pred idx)
else if idx > need_idx || idx == max_idx then (bstr, empty)
else if idx == -1 then (empty, bstr)
else
let cut = idx + 1 in
(sub bstr ~off:0 ~len:cut, sub bstr ~off:cut ~len:(len - cut))
in
go 0
let span ?(rev = false) ?min ?max ?sat bstr =
match rev with
| true -> rspan ?min ?max ?sat bstr
| false -> fspan ?min ?max ?sat bstr
let ftake ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.ftake";
if max < 0 then invalid_arg "Bstr.ftake";
if min > max || max == 0 then empty
else
let len = length bstr in
let max_idx = len - 1 in
let max_idx =
let k = max - 1 in
if k > max_idx then max_idx else k
in
let need_idx = min in
let rec go idx =
if idx <= max_idx && sat bstr.{idx} then go (succ idx)
else if idx < need_idx || idx == 0 then empty
else if idx == len then bstr
else sub bstr ~off:0 ~len:idx
in
go 0
let rtake ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.rtake";
if max < 0 then invalid_arg "Bstr.rtake";
if min > max || max == 0 then empty
else
let len = length bstr in
let max_idx = len - 1 in
let min_idx =
let k = len - max in
if k < 0 then 0 else k
in
let need_idx = max_idx - min in
let rec go idx =
if idx >= min_idx && sat bstr.{idx} then go (pred idx)
else if idx > need_idx || idx == max_idx then empty
else if idx == -1 then bstr
else
let cut = idx + 1 in
sub bstr ~off:cut ~len:(len - cut)
in
go 0
let take ?(rev = false) ?min ?max ?sat bstr =
match rev with
| true -> rtake ?min ?max ?sat bstr
| false -> ftake ?min ?max ?sat bstr
let fdrop ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.fdrop";
if max < 0 then invalid_arg "Bstr.fdrop";
if min > max || max == 0 then bstr
else
let len = length bstr in
let max_idx = len - 1 in
let max_idx =
let k = max - 1 in
if k > max_idx then max_idx else k
in
let need_idx = min in
let rec go idx =
if idx <= max_idx && sat bstr.{idx} then go (succ idx)
else if idx < need_idx || idx == 0 then bstr
else if idx == len then bstr
else sub bstr ~off:idx ~len:(len - idx)
in
go 0
let rdrop ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
if min < 0 then invalid_arg "Bstr.rdrop";
if max < 0 then invalid_arg "Bstr.rdrop";
if min > max || max == 0 then bstr
else
let len = length bstr in
let max_idx = len - 1 in
let min_idx =
let k = len - max in
if k < 0 then 0 else k
in
let need_idx = max_idx - min in
let rec go idx =
if idx >= min_idx && sat bstr.{idx} then go (pred idx)
else if idx > need_idx || idx == max_idx then bstr
else if idx == -1 then empty
else
let cut = idx + 1 in
sub bstr ~off:0 ~len:cut
in
go 0
let drop ?(rev = false) ?min ?max ?sat bstr =
match rev with
| true -> rdrop ?min ?max ?sat bstr
| false -> fdrop ?min ?max ?sat bstr
let shift bstr off =
if off > length bstr then invalid_arg "Bstr.shift";
let len = length bstr - off in
Bigarray.Array1.sub bstr off len
external ptr : t -> (nativeint[@unboxed])
= "bstr_bytecode_ptr" "bstr_native_ptr"
[@@noalloc]
let overlap a b =
let src_a = ptr a in
let src_b = ptr b in
let len_a = Nativeint.of_int (length a) in
let len_b = Nativeint.of_int (length b) in
let len =
let ( + ) = Nativeint.add in
let ( - ) = Nativeint.sub in
Nativeint.max 0n (Nativeint.min (src_a + len_a) (src_b + len_b))
- Nativeint.max src_a src_b
in
let len = Nativeint.to_int len in
if src_a >= src_b && src_a < Nativeint.add src_b len_b then
let offset = Nativeint.(to_int (sub src_a src_b)) in
Some (len, 0, offset)
else if src_b >= src_a && src_b < Nativeint.add src_a len_a then
let offset = Nativeint.(to_int (sub src_b src_a)) in
Some (len, offset, 0)
else None
let split_on_char sep bstr =
let lst = ref [] in
let max = ref (length bstr) in
for idx = length bstr - 1 downto 0 do
if unsafe_get bstr idx == sep then begin
lst := sub bstr ~off:(idx + 1) ~len:(!max - idx - 1) :: !lst;
max := idx
end
done;
sub bstr ~off:0 ~len:!max :: !lst
let to_seq bstr =
let rec go idx () =
if idx == length bstr then Seq.Nil
else
let chr = unsafe_get bstr idx in
Seq.Cons (chr, go (idx + 1))
in
go 0
let to_seqi bstr =
let rec go idx () =
if idx == length bstr then Seq.Nil
else
let chr = unsafe_get bstr idx in
Seq.Cons ((idx, chr), go (idx + 1))
in
go 0
let of_seq seq =
let n = ref 0 in
let buf = ref (make 0x7ff '\000') in
let resize () =
let new_len = Int.min (2 * length !buf) Sys.max_string_length in
(* TODO(dinosaure): should we keep this limit? *)
if length !buf == new_len then failwith "Bstr.of_seq: cannot grow bigstring";
let new_buf = make new_len '\000' in
Bigarray.Array1.blit !buf (sub new_buf ~off:0 ~len:(length !buf));
buf := new_buf
in
let fn chr =
if !n == length !buf then resize ();
unsafe_set !buf !n chr;
incr n
in
Seq.iter fn seq; sub !buf ~off:0 ~len:!n
module Witness = struct
type (_, _) eq = Refl : ('a, 'a) eq
type _ equality = ..
module type Inst = sig
type t
type _ equality += Eq : t equality
end
type 'a t = (module Inst with type t = 'a)
let make : type a. unit -> a t =
fun () ->
let module Inst = struct
type t = a
type _ equality += Eq : t equality
end in
(module Inst)
let _eq : type a b. a t -> b t -> (a, b) eq option =
fun (module A) (module B) -> match A.Eq with B.Eq -> Some Refl | _ -> None
end
module Pkt = struct
type bigstring = t
type endianness = Big_endian | Little_endian | Native_endian
type _ t =
| Primary : 'a primary -> 'a t
| Record : 'a record -> 'a t
| Variant : 'a variant -> 'a t
and _ primary =
| Char : char primary
| UInt8 : int primary
| Int8 : int primary
| UInt16 : endianness -> int primary
| Int16 : endianness -> int primary
| Int32 : endianness -> int32 primary
| Int64 : endianness -> int64 primary
| Var_int31 : int primary
| Var_int63 : int primary
| Bytes : int -> string primary
| CString : string primary
| Until : char -> string primary
and _ a_case = C0 : 'a case0 -> 'a a_case | C1 : ('a, 'b) case1 -> 'a a_case
and _ case_v =
| CV0 : 'a case0 -> 'a case_v
| CV1 : ('a, 'b) case1 * 'b -> 'a case_v
and 'a case0 = { ctag0: int; c0: 'a }
and ('a, 'b) case1 = {
ctag1: int
; ctype1: 'b t
; cwitn1: 'b Witness.t
; c1: 'b -> 'a
}
and 'a record = { rwit: 'a Witness.t; rfields: 'a fields_and_constr }
and 'a fields_and_constr =
| Fields : ('a, 'b) fields * 'b -> 'a fields_and_constr
and ('a, 'b) fields =
| F0 : ('a, 'a) fields
| F1 : ('a, 'b) field * ('a, 'c) fields -> ('a, 'b -> 'c) fields
and ('a, 'b) field = { ftype: 'b t; fget: 'a -> 'b }
and 'a variant = {
vwit: 'a Witness.t
; vcases: 'a a_case array
; vget: 'a -> 'a case_v
}
module Fields_folder (Acc : sig
type ('a, 'b) t
end) =
struct
type 'a t = {
nil: ('a, 'a) Acc.t
; cons: 'b 'c. ('a, 'b) field -> ('a, 'c) Acc.t -> ('a, 'b -> 'c) Acc.t
}
let rec fold : type a c. a t -> (a, c) fields -> (a, c) Acc.t =
fun folder -> function
| F0 -> folder.nil
| F1 (f, fs) -> folder.cons f (fold folder fs)
end
(* decoder *)
module Record_decoder = Fields_folder (struct
type ('a, 'b) t = bigstring -> int ref -> 'b -> 'a
end)
type 'a decoder = bigstring -> int ref -> 'a
let decode_char bstr pos =
let idx = !pos in
incr pos; get bstr idx
[@@inline always]
let decode_uint8 bstr pos =
let idx = !pos in
incr pos; get_uint8 bstr idx
[@@inline always]
let decode_int8 bstr pos =
let idx = !pos in
incr pos; get_int8 bstr idx
[@@inline always]
let decode_uint16 e bstr pos =
let idx = !pos in
incr pos;
match e with
| Big_endian -> get_uint16_be bstr idx
| Little_endian -> get_uint16_le bstr idx
| Native_endian -> get_uint16_ne bstr idx
[@@inline always]
let decode_int16 e bstr pos =
let idx = !pos in
incr pos;
match e with
| Big_endian -> get_int16_be bstr idx
| Little_endian -> get_int16_le bstr idx
| Native_endian -> get_int16_ne bstr idx
[@@inline always]
let decode_int32 e bstr pos =
let idx = !pos in
incr pos;
match e with
| Big_endian -> get_int32_be bstr idx
| Little_endian -> get_int32_le bstr idx
| Native_endian -> get_int32_ne bstr idx
[@@inline always]
let decode_int64 e bstr pos =
let idx = !pos in
incr pos;
match e with
| Big_endian -> get_int64_be bstr idx
| Little_endian -> get_int64_le bstr idx
| Native_endian -> get_int64_ne bstr idx
[@@inline always]
let decode_bytes len bstr pos =
let off = !pos in
pos := !pos + len;
sub_string bstr ~off ~len
let rec decode : type a. a t -> a decoder = function
| Primary p -> prim p
| Record r -> record r
| Variant _ -> assert false
and prim : type a. a primary -> a decoder = function
| Char -> decode_char
| UInt8 -> decode_uint8
| Int8 -> decode_int8
| UInt16 e -> decode_uint16 e
| Int16 e -> decode_int16 e
| Int32 e -> decode_int32 e
| Int64 e -> decode_int64 e
| Bytes len -> decode_bytes len
| _ -> assert false
and record : type a. a record -> a decoder =
fun { rfields= Fields (fs, constr); _ } ->
let nil _bstr _pos fn = fn in
let cons { ftype; _ } k =
let decode = decode ftype in
fun bstr pos constr ->
let x = decode bstr pos in
let constr = constr x in
k bstr pos constr
in
let fn = Record_decoder.fold { nil; cons } fs in
fun bstr pos -> fn bstr pos constr
(* combinators *)
let char = Primary Char
let uint8 = Primary UInt8
let int8 = Primary Int8
let beuint16 = Primary (UInt16 Big_endian)
let leuint16 = Primary (UInt16 Little_endian)
let neuint16 = Primary (UInt16 Native_endian)
let beint16 = Primary (Int16 Big_endian)
let leint16 = Primary (Int16 Little_endian)
let neint16 = Primary (Int16 Native_endian)
let beint32 = Primary (Int32 Big_endian)
let leint32 = Primary (Int32 Little_endian)
let neint32 = Primary (Int32 Native_endian)
let beint64 = Primary (Int64 Big_endian)
let leint64 = Primary (Int64 Little_endian)
let neint64 = Primary (Int64 Native_endian)
let varint31 = Primary Var_int31
let varint63 = Primary Var_int63
let bytes len = Primary (Bytes len)
let cstring = Primary CString
let until byte = Primary (Until byte)
(* record *)
type ('a, 'b, 'c) open_record = ('a, 'c) fields -> 'b * ('a, 'b) fields
let field ftype fget = { ftype; fget }
let record : 'b -> ('a, 'b, 'b) open_record = fun c fs -> (c, fs)
let app : type a b c d.
(a, b, c -> d) open_record -> (a, c) field -> (a, b, d) open_record =
fun r f fs -> r (F1 (f, fs))
let sealr : type a b. (a, b, a) open_record -> a t =
fun r ->
let c, fs = r F0 in
let rwit = Witness.make () in
let sealed = { rwit; rfields= Fields (fs, c) } in
Record sealed
let ( |+ ) = app
(* variant *)
type 'a case_p = 'a case_v
type ('a, 'b) case = int -> 'a a_case * 'b
let case0 c0 ctag0 =
let c = { ctag0; c0 } in
(C0 c, CV0 c)
let case1 : type a b. b t -> (b -> a) -> (a, b -> a case_p) case =
fun ctype1 c1 ctag1 ->
let cwitn1 : b Witness.t = Witness.make () in
let c = { ctag1; ctype1; cwitn1; c1 } in
(C1 c, fun v -> CV1 (c, v))
type ('a, 'b, 'c) open_variant = 'a a_case list -> 'c * 'a a_case list
let variant c vs = (c, vs)
let app v c cs =
let fc, cs = v cs in
let c, f = c (List.length cs) in
(fc f, c :: cs)
let sealv v =
let vget, vcases = v [] in
let vwit = Witness.make () in
let vcases = Array.of_list (List.rev vcases) in
Variant { vwit; vcases; vget }
let ( |~ ) = app
end

446
lib/bstr.mli Normal file
View file

@ -0,0 +1,446 @@
(** A small library for manipulating bigstrings.
To clarify the use of bigstrings in OCaml, we advise you to read the
overview of bigstrings and the difference with bytes. After this theoretical
reading, this module offers a whole host of useful functions for
manipulating bigstrings.
{1:overview Overview.}
A bigstring is a special kind of memory area in the OCaml world. Unlike
bytes, bigstrings are allocated via [malloc()] or are available via
[Unix.map_file].
They therefore exist outside the space normally allocated for OCaml with
regard to all its values. So there are some particularities to the use of
bigstrings.
The first thing to understand about bigstrings is that allocating them can
take time. Since a bigstring is obtained either by [malloc()] or by
[Unix.map_file], the former is a performance hit on the [malloc()] used
(which also depends on the fragmentation of the C heap) and the latter is a
system call that can interact with your file system.
By way of comparison, a byte of less than 2048 bytes requires only 3
processor instructions to exist and be available beyond that, the bytes is
allocated in the major heap.
It is therefore advisable to allocate just a few bigstrings and reuse them
throughout your application. It's even advisable to allocate large
bigstrings.
A particularity of bigstrings is that they cannot be moved by the Garbage
Collector. Existing in a space other than that of OCaml (the C heap), they
don't move. With this advantage in mind, we can imagine several situations
where we'd like a memory zone that doesn't move:
- a bigstring can be manipulated by several threads/domains. Of course,
parallel accesses must be protected, but you can be sure that the
bigstring will not move throughout the process. Thus, its location in
memory can be shared by several computing units.
- it may be necessary, in system programming, to write to a particular zone
in order to interact with a device. In this case, the bigstring can be
found as an OCaml value bridging a special memory area (such as the
framebuffer).
A final feature of bigstring is that it can be seen as a slice. You can have
another view of a bigstring that would be equally smaller. For example, the
{!val:sub} operation in particular doesn't copy your bigstring, but offers
you a "proxy" accessing the same memory area as the original bigstring.
{1:pkt Encode & Decode packets.}
In order to encode or decode packets (such as ARP or DNS packets), Bstr
offers a small API for converting a slice of bytes from a {!val:Bstr.t} to a
user-defined variant or record. *)
type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
val memcpy : t -> src_off:int -> t -> dst_off:int -> len:int -> unit
(** [memcpy src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to
[dst]. [src] must not overlap [dst]. Use {!val:memmove} if [src] & [dst] do
overlap. *)
val memmove : t -> src_off:int -> t -> dst_off:int -> len:int -> unit
(** [memmove src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to
[dst]. [src] and [dst] may overlap: copying takes place as though the bytes
in [src] are first copied into a temporary array that does not overlap [src]
or [dst], and the bytes are then copied from the temporary array to [dst].
*)
val memcmp : t -> src_off:int -> t -> dst_off:int -> len:int -> int
val memchr : t -> off:int -> len:int -> char -> int
val memset : t -> off:int -> len:int -> char -> unit
val empty : t
(** [empty] is an empty bigstring. *)
val length : t -> int
(** [length bstr] is the number of bytes in [bstr]. *)
val get : t -> int -> char
(** [get bstr i] is the byte of [bstr]' at index [i]. This is equivalent to the
[bstr.{i}] notation.
@raise Invalid_argument if [i] is not an index of [bstr]. *)
val create : int -> t
val make : int -> char -> t
val of_string : string -> t
val fill : t -> off:int -> len:int -> char -> unit
val blit : t -> src_off:int -> t -> dst_off:int -> len:int -> unit
val blit_from_string :
string -> src_off:int -> t -> dst_off:int -> len:int -> unit
val blit_from_bytes :
bytes -> src_off:int -> t -> dst_off:int -> len:int -> unit
(*
val init : int -> (int -> char) -> t
val copy : t -> t
val extend : t -> int -> int -> t
val concat : t -> t list -> t
val cat : t -> t -> t
val iter : (char -> unit) -> t -> unit
val iteri : (int -> char -> unit) -> t -> unit
val map : (char -> char) -> t -> t
val mapi : (int -> char -> char) -> t -> t
val fold_left : ('acc -> char -> 'acc) -> 'acc -> t -> 'acc
val fold_right : (char -> 'acc -> 'acc) -> t -> 'acc -> 'acc
val index : t -> ?rev:bool -> ?from:int -> char -> int
val contains : t -> ?rev:bool -> ?from:int -> char -> bool
val compare : t -> t -> int
val starts_with : prefix:string -> t -> bool
val ends_with : suffix:string -> t -> bool
val overlap : t -> t -> (int * int * int) option
*)
val get_int8 : t -> int -> int
(** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index
[i]. *)
val get_uint8 : t -> int -> int
(** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte index
[i]. *)
val get_uint16_ne : t -> int -> int
(** [get_int16_ne bstr i] is [bstr]'s native-endian unsigned 16-bit integer
starting at byte index [i]. *)
val get_uint16_le : t -> int -> int
(** [get_int16_le bstr i] is [bstr]'s little-endian unsigned 16-bit integer
starting at byte index [i]. *)
val get_uint16_be : t -> int -> int
(** [get_int16_be bstr i] is [bstr]'s big-endian unsigned 16-bit integer
starting at byte index [i]. *)
val get_int16_ne : t -> int -> int
(** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer
starting at byte index [i]. *)
val get_int16_le : t -> int -> int
(** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer
starting at byte index [i]. *)
val get_int16_be : t -> int -> int
(** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer starting
at byte index [i]. *)
val get_int32_ne : t -> int -> int32
(** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting at
byte index [i]. *)
val get_int32_le : t -> int -> int32
(** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting at
byte index [i]. *)
val get_int32_be : t -> int -> int32
(** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at byte
index [i]. *)
val get_int64_ne : t -> int -> int64
(** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting at
byte index [i]. *)
val get_int64_le : t -> int -> int64
(** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting at
byte index [i]. *)
val get_int64_be : t -> int -> int64
(** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at byte
index [i]. *)
val set : t -> int -> char -> unit
val set_int8 : t -> int -> int -> unit
val set_uint8 : t -> int -> int -> unit
val set_uint16_ne : t -> int -> int -> unit
val set_uint16_le : t -> int -> int -> unit
val set_uint16_be : t -> int -> int -> unit
val set_int16_ne : t -> int -> int -> unit
val set_int16_le : t -> int -> int -> unit
val set_int16_be : t -> int -> int -> unit
val set_int32_ne : t -> int -> int32 -> unit
val set_int32_le : t -> int -> int32 -> unit
val set_int32_be : t -> int -> int32 -> unit
val set_int64_ne : t -> int -> int64 -> unit
val set_int64_le : t -> int -> int64 -> unit
val set_int64_be : t -> int -> int64 -> unit
val unsafe_set : t -> int -> char -> unit
val sub : t -> off:int -> len:int -> t
(** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a
new view into [bstr] starting at [off], and with length [len].
{b Note} that this does not allocate a new buffer, but instead shares the
buffer of [bstr] with the newly-returned bigstring. *)
val overlap : t -> t -> (int * int * int) option
val sub_string : t -> off:int -> len:int -> string
(** [sub_string bstr ~off ~len] returns a string of length [len] containing the
bytes of [t] starting at [off]. *)
val to_string : t -> string
(** [to_string bstr] is equivalent to
[sub_string bstr ~off:0 ~len:(length bstr)]. *)
val blit_to_bytes : t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit
(** [blit_to_bytes src ~src_off dst ~dst_off ~len] copies [len] bytes from
[src], starting at index [src_off], to byte sequence [dst], starting at
index [dst_off].
@raise Invalid_argument
if [src_off] and [len] do not designate a valid range of [src], or if
[dst_off] and [len] do not designate a valid range of [dst]. *)
val is_empty : t -> bool
(** [is_empty bstr] is [length bstr = 0]. *)
val is_prefix : affix:string -> t -> bool
(** [is_prefix ~affix bstr] is [true] iff [affix.[idx] = bstr.{idx}] for all
indices [idx] of [affix]. *)
val is_infix : affix:string -> t -> bool
(** [is_infix ~affix bstr] is [true] iff there exists an index [j] in [bstr]
such that for all indices [i] of [affix] we have [affix.[i] = bstr.{j + i}].
*)
val is_suffix : affix:string -> t -> bool
(** [is_suffix ~affix bstr] is [true] iff [affix.[n - idx] = bstr.{m - idx}] for
all indices [idx] of [affix] with [n = String.length affix - 1] and
[m = length bstr - 1]. *)
val for_all : (char -> bool) -> t -> bool
(** [for_all p bstr] is [true] iff for all indices [idx] of [bstr],
[p bstr.{idx} = true]. *)
val exists : (char -> bool) -> t -> bool
(** [exists p bstr] is [true] iff there exists an index [idx] of [bstr] with
[p bstr.{idx} = true]. *)
val equal : t -> t -> bool
(** [equal a b] is [a = b]. *)
val with_range : ?first:int -> ?len:int -> t -> t
(** [with_range ~first ~len bstr] are the consecutive bytes of [bstr] whose
indices exist in the range \[[first];[first + len - 1]\].
[first] defaults to [0] and [len] to [max_int]. Note that [first] can be any
integer and [len] any positive integer. *)
val with_index_range : ?first:int -> ?last:int -> t -> t
(** [with_index_range ~first ~last bstr] are the consecutive bytes of [bstr]
whose indices exists in the range \[[first];[last]\].
[first] defaults to [0] and [last] to [length bstr - 1].
Note that both [first] and [last] can be any integer. If [first > last] the
interval is empty and the empty bigstring is returned. *)
val trim : ?drop:(char -> bool) -> t -> t
(** [trim ~drop bstr] is [bstr] with prefix and suffix bytes satisfying [drop]
in [bstr] removed. [drop] defaults to [fun chr -> chr = ' ']. *)
val span :
?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t * t
(** [span ~rev ~min ~max ~sat bstr] is [(l, r)] where:
- if [rev] is [false] (default), [l] is at least [min] and at most [max]
consecutive [sat] satisfying initial bytes of [bstr] or {!empty} if there
are no such bytes. [r] are the remaining bytes of [bstr].
- if [rev] is [true], [r] is at least [min] and at most [max] consecutive
[sat] satisfying final bytes of [bstr] or {!empty} if there are no such
bytes. [l] are the remaining bytes of [bstr].
If [max] is unspecified the span is unlimited. If [min] is unspecified it
defaults to [0]. If [min > max] the condition can't be satisfied and the
left or right span, depending on [rev], is always empty. [sat] defaults to
[Fun.const true].
@raise Invalid_argument if [max] or [min] is negative. *)
val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t
(** [take ~rev ~min ~max ~sat bstr] is the matching span of {!span} without the
remaining one. In other words:
{[
(if rev then snd else fst) (span ~rev ~min ~max ~sat bstr)
]} *)
val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t
(** [drop ~rev ~min ~max ~sat bstr] is the remaining span of {!span} without the
matching span. In other words:
{[
(if rev then fst else snd) (span ~rev ~min ~max ~sat bstr)
]} *)
val shift : t -> int -> t
(** [shift bstr n] is [sub bstr n (length bstr - n)]. *)
val split_on_char : char -> t -> t list
val to_seq : t -> char Seq.t
val to_seqi : t -> (int * char) Seq.t
val of_seq : char Seq.t -> t
module Pkt : sig
type bigstring = t
type 'a t
val char : char t
val uint8 : int t
val int8 : int t
val beuint16 : int t
val leuint16 : int t
val neuint16 : int t
val beint16 : int t
val leint16 : int t
val neint16 : int t
val beint32 : int32 t
val leint32 : int32 t
val neint32 : int32 t
val beint64 : int64 t
val leint64 : int64 t
val neint64 : int64 t
val varint31 : int t
val varint63 : int t
val bytes : int -> string t
val cstring : string t
val until : char -> string t
(* {2:records Records.}
{[
type header =
{ version : int32
; number : int32 }
let _PACK = 0x5041434bl
let header =
record (fun pack version number ->
if pack <> _PACK
then invalid_arg "Invalid PACK file";
{ version; number })
|+ field beint32 (fun _ -> _PACK)
|+ field beint32 (fun t -> t.version)
|+ field beint32 (fun t -> t.number)
|> sealr
]} *)
type ('a, 'b, 'c) open_record
(** The type for representing open records of type ['a] with a constructor of
['b]. ['c] represents the remaining fields to be described using the
{!val:(|+)} operator. An open record initially stisfies ['c = 'b] and can
be {{!val:sealr} sealed} once ['c = 'a]. *)
val record : 'b -> ('a, 'b, 'b) open_record
(** [record f] is an incomplete representation of the record of type ['a] with
constructor [f]. To complete the representation, add fields with
{!val:(|+)} and then seal the record with {!val:sealr}. *)
type ('a, 'b) field
(** The type for fields holding values of type ['b] and belonging to a record
of type ['a]. *)
val field : 'a t -> ('b -> 'a) -> ('b, 'a) field
(** [field n t g] is the representation of the field called [n] of type [t]
with getter [g]. For instance:
{[
type t = { foo: string }
let foo = field cstring (fun t -> t.foo)
]} *)
val ( |+ ) :
('a, 'b, 'c -> 'd) open_record -> ('a, 'c) field -> ('a, 'b, 'd) open_record
(** [r |+ f] is the open record [r] augmented with the field [f]. *)
val sealr : ('a, 'b, 'a) open_record -> 'a t
(** [sealr r] seals the open record [r]. *)
(** {2:variants Variants.}
{[
type t = Foo | Bar of string
let t =
variant (fun foo bar -> function Foo -> foo | Bar s -> bar s)
|~ case0 Foo
|~ case1 cstring (fun x -> Bar x)
|> sealv
]} *)
type ('a, 'b, 'c) open_variant
(** The type for representing open variants of type ['a] with pattern-matching
of type ['b]. ['c] represents the remaining constructors to be described
using the {!val:(|~)} operator. An open variant initially satisfies
['c = 'b] and can be {{!val:sealv} sealed} once ['c = 'a]. *)
val variant : 'b -> ('a, 'b, 'b) open_variant
(** [variant n p] is an incomplete representation of the variant type called
[n] of type ['a] using [p] to deconstruct values. To complete the
representation, add cases with {!val:(|~)} and then seal the variant with
{!val:sealv}. *)
type ('a, 'b) case
(** The type for representing variant cases of type ['a] with patterns of type
['b]. *)
type 'a case_p
(** The type for representing patterns for a variant of type ['a]. *)
val case0 : 'a -> ('a, 'a case_p) case
(** [case0 v] is a representation of a variant constructor [v] with no
arguments. For instance:
{[
type t = Foo
let foo = case0 Foo
]} *)
val case1 : 'b t -> ('b -> 'a) -> ('a, 'b -> 'a case_p) case
(** [case1 n t c] is a representation of a variant constructor [c] with an
argument of type [t]. For instances:
{[
type t = Foo of string
let foo = case1 cstring (fun s -> Foo s)
]} *)
val ( |~ ) :
('a, 'b, 'c -> 'd) open_variant
-> ('a, 'c) case
-> ('a, 'b, 'd) open_variant
(** [v |~ c] is the open variant [v] augmented with the case [c]. *)
val sealv : ('a, 'b, 'a -> 'a case_p) open_variant -> 'a t
(** [sealv v] seals the open variant [v]. *)
(* {2:decoder Decoder.} *)
val decode : 'a t -> bigstring -> int ref -> 'a
end

10
lib/dune Normal file
View file

@ -0,0 +1,10 @@
(library
(name bstr)
(public_name bstr)
(modules bstr)
(foreign_stubs
(language c)
(names bstr)
(flags
(:standard -Wcast-align)))
(wrapped false))

9
test/dune Normal file
View file

@ -0,0 +1,9 @@
(library
(name test)
(modules test)
(libraries unix))
(test
(name t)
(modules t)
(libraries bstr test))

258
test/t.ml Normal file
View file

@ -0,0 +1,258 @@
open Test
let test01 =
let descr = {text|"empty bstr"|text} in
Test.test ~title:"empty bstr" ~descr @@ fun () ->
let x = Bstr.create 0 in
check (0 = Bstr.length x);
let y = Bstr.to_string x in
check ("" = y)
let test02 =
let descr = {text|negative length|text} in
Test.test ~title:"negative bstr" ~descr @@ fun () ->
try
let _ = Bstr.create (-1) in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test03 =
let descr = {text|positive shift|text} in
Test.test ~title:"positive shift" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.shift x 1 in
check (0 = Bstr.length y)
let test04 =
let descr = {text|negative shift|text} in
Test.test ~title:"negative shift" ~descr @@ fun () ->
let x = Bstr.create 2 in
let y = Bstr.sub x ~off:1 ~len:1 in
begin
try
let _ = Bstr.shift x (-1) in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end;
begin
try
let _ = Bstr.shift y (-1) in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end
let test05 =
let descr = {text|bad positive shift|text} in
Test.test ~title:"bad positive shift" ~descr @@ fun () ->
let x = Bstr.create 10 in
try
let _ = Bstr.shift x 11 in
check false
with Invalid_argument _ -> check true
let test06 =
let descr = {text|sub|text} in
Test.test ~title:"sub" ~descr @@ fun () ->
let x = Bstr.create 100 in
let y = Bstr.sub x ~off:10 ~len:80 in
begin
match Bstr.overlap x y with
| Some (len, x_off, _) ->
check (len = 80);
check (x_off = 10)
| None -> check false
end;
let z = Bstr.sub y ~off:20 ~len:60 in
begin
match Bstr.overlap x z with
| Some (len, x_off, _) ->
check (len = 60);
check (x_off = 30)
| None -> check false
end
let test07 =
let descr = {text|negative sub|text} in
Test.test ~title:"negative sub" ~descr @@ fun () ->
let x = Bstr.create 2 in
let y = Bstr.sub ~off:1 ~len:1 x in
begin
try
let _ = Bstr.sub x ~off:(-1) ~len:0 in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end;
begin
try
let _ = Bstr.sub y ~off:(-1) ~len:0 in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end
let test08 =
let descr = {text|sub len too big|text} in
Test.test ~title:"sub len too big" ~descr @@ fun () ->
let x = Bstr.create 0 in
try
let _ = Bstr.sub x ~off:0 ~len:1 in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test09 =
let descr = {text|sub len too small|text} in
Test.test ~title:"sub len too small" ~descr @@ fun () ->
let x = Bstr.create 0 in
try
let _ = Bstr.sub x ~off:0 ~len:(-1) in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test10 =
let descr = {text|sub offset too big|text} in
Test.test ~title:"sub offset too big" ~descr @@ fun () ->
let x = Bstr.create 10 in
begin
try
let _ = Bstr.sub x ~off:11 ~len:0 in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end;
let y = Bstr.sub x ~off:1 ~len:9 in
begin
try
let _ = Bstr.sub y ~off:10 ~len:0 in
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
end
let test11 =
let descr = {text|blit offset too big|text} in
Test.test ~title:"blit offset too big" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:2 y ~dst_off:1 ~len:1;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test12 =
let descr = {text|blit offset too small|text} in
Test.test ~title:"blit offset too small" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:(-1) y ~dst_off:1 ~len:1;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test13 =
let descr = {text|blit dst offset too big|text} in
Test.test ~title:"blit dst offset too big" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:1 y ~dst_off:2 ~len:1;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test14 =
let descr = {text|blit dst offset too small|text} in
Test.test ~title:"blit dst offset too small" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:1 y ~dst_off:(-1) ~len:1;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test15 =
let descr = {text|blit dst offset negative|text} in
Test.test ~title:"blit dst offset negative" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:0 y ~dst_off:(-1) ~len:1;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test16 =
let descr = {text|blit len too big|text} in
Test.test ~title:"blit len too big" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 2 in
try
Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:2;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test17 =
let descr = {text|blit len too big (2)|text} in
Test.test ~title:"blit len too big (2)" ~descr @@ fun () ->
let x = Bstr.create 2 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:2;
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let test18 =
let descr = {text|blit len too small|text} in
Test.test ~title:"blit len too small" ~descr @@ fun () ->
let x = Bstr.create 1 in
let y = Bstr.create 1 in
try
Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:(-1);
check false
with
| Invalid_argument _ -> check true
| _exn -> check false
let ( / ) = Filename.concat
let () =
let tests =
[
test01; test02; test03; test04; test05; test06; test07; test08; test09
; test10; test11; test12; test13; test14; test15; test16; test17; test18
]
in
let ({ Test.directory } as runner) = Test.runner (Sys.getcwd () / "_tests") in
let run idx test =
Format.printf "test%03d: %!" (succ idx);
Test.run runner test;
Format.printf "ok\n%!"
in
Format.printf "Run tests into %s\n%!" directory;
List.iteri run tests

75
test/test.ml Normal file
View file

@ -0,0 +1,75 @@
let strf fmt = Format.asprintf fmt
let ( / ) = Filename.concat
let check test =
let bt = Printexc.get_callstack max_int in
try
assert test;
print_string ".";
flush stdout
with exn ->
print_string "x";
flush stdout;
Printexc.raise_with_backtrace exn bt
type t = { title: string; descr: string; fn: unit -> unit }
let test ~title ~descr fn = { title; descr; fn }
type runner = { directory: string }
let rec mkdir_p path perm =
if path <> "" then begin
try Unix.mkdir path perm with
| Unix.Unix_error (EEXIST, _, _) when Sys.is_directory path -> ()
| Unix.Unix_error (ENOENT, _, _) ->
mkdir_p (Filename.dirname path) perm;
Unix.mkdir path perm
end
let mkdir ({ directory } as runner) = mkdir_p directory 0o755; runner
type ('a, 'b) str = ('a -> 'b, Format.formatter, unit, string) format4
let runner ?(g = Random.State.make_self_init ())
?(fmt : ('a, 'b) str = "run-%s") root =
let random_string len =
let res = Bytes.create len in
for i = 0 to len - 1 do
let chr =
match Random.State.int g (26 + 26 + 10) with
| n when n < 26 -> Char.chr (Char.code 'a' + n)
| n when n < 26 + 26 -> Char.chr (Char.code 'A' + n - 26)
| n -> Char.chr (Char.code '0' + n - 26 - 26)
in
Bytes.set res i chr
done;
Bytes.unsafe_to_string res
in
let rec go retry =
if retry >= 10 then failwith "Impossible to create a test directory";
let directory = root / strf fmt (random_string 4) in
if Sys.file_exists directory then go (succ retry) else mkdir { directory }
in
go 0
let run { directory= dir } { title; fn; _ } =
let old_stderr = Unix.dup Unix.stderr in
let new_stderr = open_out (dir / strf "%s.stderr" title) in
Unix.dup2 (Unix.descr_of_out_channel new_stderr) Unix.stderr;
let finally () =
flush stderr;
Unix.dup2 old_stderr Unix.stderr;
Unix.close old_stderr;
close_out new_stderr
in
Format.eprintf "*** %s ***\n%!" title;
try Fun.protect ~finally fn
with exn ->
let ic = open_in (dir / strf "%s.stderr" title) in
let ln = in_channel_length ic in
let rs = Bytes.create ln in
really_input ic rs 0 ln;
Format.printf "Terminated with: %S\n%!" (Printexc.to_string exn);
Format.printf "%s\n%!" (Bytes.unsafe_to_string rs);
exit 1

8
test/test.mli Normal file
View file

@ -0,0 +1,8 @@
type t
type runner = private { directory: string }
type ('a, 'b) str = ('a -> 'b, Format.formatter, unit, string) format4
val check : bool -> unit
val test : title:string -> descr:string -> (unit -> unit) -> t
val runner : ?g:Random.State.t -> ?fmt:(string, string) str -> string -> runner
val run : runner -> t -> unit