commit 158be8641a1b935b3a10dc65a9cd74b3ca071b14 Author: Calascibetta Romain Date: Wed Jan 15 16:43:37 2025 +0100 First commit diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..7064c1b --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2024 Romain Calascibetta + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..710b916 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Bstr, the new `Cstruct` diff --git a/bstr.opam b/bstr.opam new file mode 100644 index 0000000..5d07923 --- /dev/null +++ b/bstr.opam @@ -0,0 +1,18 @@ +opam-version: "2.0" +name: "bstr" +maintainer: [ "Romain Calascibetta " ] +authors: [ "Romain Calascibetta " ] +homepage: "https://git.robur.coop/robur/bstr" +bug-reports: "https://git.robur.coop/robur/bstr" +dev-repo: "git+https://github.com/robur-coop/bstr" +doc: "https://robur-coop.github.io/bstr/" +license: "MIT" + +build: [ "dune" "build" "-p" name "-j" jobs ] +run-test: [ "dune" "runtest" "-p" name "-j" jobs ] + +depends: [ + "ocaml" {>= "4.14.0"} + "dune" {>= "3.5.0"} +] +x-maintenance-intent: [ "(latest)" ] diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..242b64a --- /dev/null +++ b/dune-project @@ -0,0 +1,2 @@ +(lang dune 2.0) +(name bstr) diff --git a/lib/bstr.c b/lib/bstr.c new file mode 100644 index 0000000..e3df28e --- /dev/null +++ b/lib/bstr.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include + +CAMLprim value bstr_bytecode_ptr(value va) { + CAMLparam1(va); + CAMLlocal1(res); + + struct caml_ba_array *a = Caml_ba_array_val(va); + void *src_a = a->data; + res = caml_copy_nativeint((intnat)src_a); + + CAMLreturn(res); +} + +intnat bstr_native_ptr(value va) { + struct caml_ba_array *a = Caml_ba_array_val(va); + return ((intnat)a->data); +} + +#define bstr_uint8_off(ba, off) ((uint8_t *)Caml_ba_data_val(ba) + off) + +void bstr_native_memcpy(value src, intnat src_off, value dst, intnat dst_off, + intnat len) { + memcpy(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len); +} + +CAMLprim value bstr_bytecode_memcpy(value src, value src_off, value dst, + value dst_off, value len) { + CAMLparam5(src, src_off, dst, dst_off, len); + bstr_native_memcpy(src, Unsigned_long_val(src_off), dst, + Unsigned_long_val(dst_off), Unsigned_long_val(len)); + CAMLreturn(Val_unit); +} + +void bstr_native_memmove(value src, intnat src_off, value dst, intnat dst_off, + intnat len) { + memmove(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len); +} + +CAMLprim value bstr_bytecode_memmove(value src, value src_off, value dst, + value dst_off, value len) { + CAMLparam5(src, src_off, dst, dst_off, len); + bstr_native_memmove(src, Unsigned_long_val(src_off), dst, + Unsigned_long_val(dst_off), Unsigned_long_val(len)); + CAMLreturn(Val_unit); +} + +intnat bstr_native_memcmp(value src, intnat src_off, value dst, intnat dst_off, + intnat len) { + intnat res; + res = memcmp(bstr_uint8_off(src, src_off), bstr_uint8_off(dst, dst_off), len); + return (res); +} + +CAMLprim value bstr_bytecode_memcmp(value src, value src_off, value dst, + value dst_off, value len) { + CAMLparam5(src, src_off, dst, dst_off, len); + intnat res; + res = bstr_native_memcmp(src, Unsigned_long_val(src_off), dst, + Unsigned_long_val(dst_off), Unsigned_long_val(len)); + CAMLreturn(Val_long(res)); +} + +#define __MEM1(name) \ + intnat bstr_native_##name(value src, intnat src_off, intnat src_len, \ + intnat va) { \ + void *res = name(bstr_uint8_off(src, src_off), va, src_len); \ + if (res == NULL) \ + return (-1); \ + \ + return ((intnat)(res - src)); \ + } \ + \ + CAMLprim value bstr_bytecode_##name(value src, value src_off, value src_len, \ + value va) { \ + CAMLparam4(src, src_off, src_len, va); \ + intnat res; \ + res = \ + bstr_native_##name(src, Unsigned_long_val(src_off), \ + Unsigned_long_val(src_len), Unsigned_long_val(va)); \ + CAMLreturn(Val_long(res)); \ + } + +__MEM1(memset) +__MEM1(memchr) + +/* This function is **only** useful when accessing to a bigstring for an + * architecture requiring alignment **and** for an OCaml executable in + * bytecode. It concerns only 32-bits architectures. + */ + +uint64_t bstr_native_get64u(value va, intnat off) { +#ifdef ARCH_ALIGN_INT64 + char b0, b1, b2, b3, b4, b5, b6, b7; +#endif + struct caml_ba_array *a = Caml_ba_array_val(va); + void *addr = &((unsigned char *)a->data)[off]; + uint64_t res; + +#ifdef ARCH_ALIGN_INT64 + if (!((size_t)addr) & 0x7) + res = *((uint64_t *)addr); + else { + b0 = ((unsigned char *)a->data)[off]; + b1 = ((unsigned char *)a->data)[off + 1]; + b2 = ((unsigned char *)a->data)[off + 2]; + b3 = ((unsigned char *)a->data)[off + 3]; + b4 = ((unsigned char *)a->data)[off + 4]; + b5 = ((unsigned char *)a->data)[off + 5]; + b6 = ((unsigned char *)a->data)[off + 6]; + b7 = ((unsigned char *)a->data)[off + 7]; +#ifdef ARCH_BIG_ENDIAN + res = (uint64_t)b0 << 56 | (uint64_t)b1 << 48 | (uint64_t)b2 << 40 | + (uint64_t)b3 << 32 | (uint64_t)b4 << 24 | (uint64_t)b5 << 16 | + (uint64_t)b6 << 8 | (uint64_t)b7; +#else + res = (uint64_t)b7 << 56 | (uint64_t)b6 << 48 | (uint64_t)b5 << 40 | + (uint64_t)b4 << 32 | (uint64_t)b3 << 24 | (uint64_t)b2 << 16 | + (uint64_t)b1 << 8 | (uint64_t)b0; +#endif + } +#else + res = *((uint64_t *)addr); +#endif + + return (res); +} + +CAMLprim value bstr_bytecode_get64u(value va, value off) { + CAMLparam2(va, off); + CAMLlocal1(res); + + uint64_t val = bstr_native_get64u(va, Unsigned_long_val(off)); + res = caml_copy_int64(val); + + CAMLreturn(res); +} diff --git a/lib/bstr.ml b/lib/bstr.ml new file mode 100644 index 0000000..94a9a2a --- /dev/null +++ b/lib/bstr.ml @@ -0,0 +1,948 @@ +type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t + +module Bytes = struct + include Bytes + + external unsafe_get_uint8 : bytes -> int -> int = "%bytes_unsafe_get" + external unsafe_set_uint8 : bytes -> int -> int -> unit = "%bytes_unsafe_set" + external unsafe_get_int32_ne : bytes -> int -> int32 = "%caml_bytes_get32u" + + external unsafe_set_int32_ne : bytes -> int -> int32 -> unit + = "%caml_bytes_set32u" +end + +external swap16 : int -> int = "%bswap16" +external swap32 : int32 -> int32 = "%bswap_int32" +external swap64 : int64 -> int64 = "%bswap_int64" +external get_uint8 : t -> int -> int = "%caml_ba_ref_1" +external set_uint8 : t -> int -> int -> unit = "%caml_ba_set_1" +external get_uint16_ne : t -> int -> int = "%caml_bigstring_get16" +external set_int16_ne : t -> int -> int -> unit = "%caml_bigstring_set16" +external get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32" +external set_int32_ne : t -> int -> int32 -> unit = "%caml_bigstring_set32" +external set_int64_ne : t -> int -> int64 -> unit = "%caml_bigstring_set64" +external unsafe_get_uint8 : t -> int -> int = "%caml_ba_unsafe_ref_1" +external unsafe_set_uint8 : t -> int -> int -> unit = "%caml_ba_unsafe_set_1" +external unsafe_get_uint16_ne : t -> int -> int = "%caml_bigstring_get16u" + +external unsafe_set_uint16_ne : t -> int -> int -> unit + = "%caml_bigstring_set16u" + +external unsafe_get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32u" + +external unsafe_set_int32_ne : t -> int -> int32 -> unit + = "%caml_bigstring_set32u" + +external unsafe_get_int64_ne : t -> (int[@untagged]) -> (int64[@unboxed]) + = "bstr_bytecode_get64u" "bstr_native_get64u" +[@@noalloc] + +external _unsafe_set_int64_ne : t -> int -> int64 -> unit + = "%caml_bigstring_set64u" + +external unsafe_memcmp : + t + -> (int[@untagged]) + -> t + -> (int[@untagged]) + -> (int[@untagged]) + -> (int[@untagged]) = "bstr_bytecode_memcmp" "bstr_native_memcmp" +[@@noalloc] + +external unsafe_memcpy : + t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit + = "bstr_bytecode_memcpy" "bstr_native_memcpy" +[@@noalloc] + +external unsafe_memmove : + t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit + = "bstr_bytecode_memmove" "bstr_native_memmove" +[@@noalloc] + +external unsafe_memchr : + t + -> (int[@untagged]) + -> (int[@untagged]) + -> (int[@untagged]) + -> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr" +[@@noalloc] + +external unsafe_memset : + t + -> (int[@untagged]) + -> (int[@untagged]) + -> (int[@untagged]) + -> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr" +[@@noalloc] + +let memcmp src ~src_off dst ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > Bigarray.Array1.dim src - len + || dst_off < 0 + || dst_off > Bigarray.Array1.dim dst - len + then invalid_arg "Bstr.memcmp"; + unsafe_memcmp src src_off dst dst_off len + +let memcpy src ~src_off dst ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > Bigarray.Array1.dim src - len + || dst_off < 0 + || dst_off > Bigarray.Array1.dim dst - len + then invalid_arg "Bstr.memcpy"; + unsafe_memcpy src src_off dst dst_off len + +let memmove src ~src_off dst ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > Bigarray.Array1.dim src - len + || dst_off < 0 + || dst_off > Bigarray.Array1.dim dst - len + then invalid_arg "Bstr.memmove"; + unsafe_memmove src src_off dst dst_off len + +let memchr src ~off ~len value = + if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then + invalid_arg "Bstr.memchr"; + unsafe_memchr src off len (Char.code value) + +let memset src ~off ~len value = + if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then + invalid_arg "Bstr.memset"; + ignore (unsafe_memset src off len (Char.code value)) + +let empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0 +let create len = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len + +external length : t -> int = "%caml_ba_dim_1" +external get : t -> int -> char = "%caml_ba_ref_1" +external unsafe_get : t -> int -> char = "%caml_ba_unsafe_ref_1" +external set : t -> int -> char -> unit = "%caml_ba_set_1" +external unsafe_set : t -> int -> char -> unit = "%caml_ba_unsafe_set_1" + +let unsafe_fill bstr ~off ~len v = + let nv = Nativeint.of_int v in + let vv = Nativeint.(logor (shift_left nv 8) nv) in + let vvvv = Nativeint.(logor (shift_left vv 16) vv) in + let vvvv = Nativeint.to_int32 vvvv in + let len0 = len land 3 in + let len1 = len lsr 2 in + for i = 0 to len1 - 1 do + let i = i * 4 in + unsafe_set_int32_ne bstr (off + i) vvvv + done; + for i = 0 to len0 - 1 do + let i = (len1 * 4) + i in + unsafe_set_uint8 bstr (off + i) v + done + +let fill bstr ~off ~len chr = + if len < 0 || off < 0 || off > length bstr - len then invalid_arg "Bstr.fill"; + unsafe_fill bstr ~off ~len (Char.code chr) + +let make len chr = + let bstr = create len in + unsafe_fill bstr ~off:0 ~len (Char.code chr); + (* [Obj.magic] instead of [Char.code]? *) + bstr + +let get_int64_ne bstr idx = + if idx < 0 || idx > length bstr - 8 then invalid_arg "Bstr.get_int64_ne"; + unsafe_get_int64_ne bstr idx + +let get_int8 bstr i = + (get_uint8 bstr i lsl (Sys.int_size - 8)) asr (Sys.int_size - 8) + +let get_uint16_le bstr i = + if Sys.big_endian then swap16 (get_uint16_ne bstr i) else get_uint16_ne bstr i + +let get_uint16_be bstr i = + if not Sys.big_endian then swap16 (get_uint16_ne bstr i) + else get_uint16_ne bstr i + +let _unsafe_get_uint16_le bstr i = + (* TODO(dinosaure): for unicode. *) + if Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i) + else unsafe_get_uint16_ne bstr i + +let _unsafe_get_uint16_be bstr i = + (* TODO(dinosaure): for unicode. *) + if not Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i) + else unsafe_get_uint16_ne bstr i + +let get_int16_ne bstr i = + (get_uint16_ne bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16) + +let get_int16_le bstr i = + (get_uint16_le bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16) + +let get_int16_be bstr i = + (get_uint16_be bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16) + +let get_int32_le bstr i = + if Sys.big_endian then swap32 (get_int32_ne bstr i) else get_int32_ne bstr i + +let get_int32_be bstr i = + if not Sys.big_endian then swap32 (get_int32_ne bstr i) + else get_int32_ne bstr i + +let get_int64_le bstr i = + if Sys.big_endian then swap64 (get_int64_ne bstr i) else get_int64_ne bstr i + +let get_int64_be bstr i = + if not Sys.big_endian then swap64 (get_int64_ne bstr i) + else get_int64_ne bstr i + +let _unsafe_set_uint16_le bstr i x = + (* TODO(dinosaure): for unicode. *) + if Sys.big_endian then unsafe_set_uint16_ne bstr i (swap16 x) + else unsafe_set_uint16_ne bstr i x + +let _unsafe_set_uint16_be bstr i x = + (* TODO(dinosaure): for unicode. *) + if Sys.big_endian then unsafe_set_uint16_ne bstr i x + else unsafe_set_uint16_ne bstr i (swap16 x) + +let set_int16_le bstr i x = + if Sys.big_endian then set_int16_ne bstr i (swap16 x) + else set_int16_ne bstr i x + +let set_int16_be bstr i x = + if not Sys.big_endian then set_int16_ne bstr i (swap16 x) + else set_int16_ne bstr i x + +let set_int32_le bstr i x = + if Sys.big_endian then set_int32_ne bstr i (swap32 x) + else set_int32_ne bstr i x + +let set_int32_be bstr i x = + if not Sys.big_endian then set_int32_ne bstr i (swap32 x) + else set_int32_ne bstr i x + +let set_int64_le bstr i x = + if Sys.big_endian then set_int64_ne bstr i (swap64 x) + else set_int64_ne bstr i x + +let set_int64_be bstr i x = + if not Sys.big_endian then set_int64_ne bstr i (swap64 x) + else set_int64_ne bstr i x + +let set_int8 = set_uint8 +let set_uint16_ne = set_int16_ne +let set_uint16_be = set_int16_be +let set_uint16_le = set_int16_le + +external sub : t -> off:int -> len:int -> t = "caml_ba_sub" + +let unsafe_blit src ~src_off dst ~dst_off ~len = + let len0 = len land 3 in + let len1 = len lsr 2 in + for i = 0 to len1 - 1 do + let i = i * 4 in + let v = unsafe_get_int32_ne src (src_off + i) in + unsafe_set_int32_ne dst (dst_off + i) v + done; + for i = 0 to len0 - 1 do + let i = (len1 * 4) + i in + let v = unsafe_get_uint8 src (src_off + i) in + unsafe_set_uint8 dst (dst_off + i) v + done + +let blit src ~src_off dst ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > length src - len + || dst_off < 0 + || dst_off > length dst - len + then invalid_arg "Bstr.blit"; + unsafe_blit src ~src_off dst ~dst_off ~len + +let unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len = + let len0 = len land 3 in + let len1 = len lsr 2 in + for i = 0 to len1 - 1 do + let i = i * 4 in + let v = unsafe_get_int32_ne bstr (src_off + i) in + Bytes.unsafe_set_int32_ne dst (dst_off + i) v + done; + for i = 0 to len0 - 1 do + let i = (len1 * 4) + i in + let v = unsafe_get_uint8 bstr (src_off + i) in + Bytes.unsafe_set_uint8 dst (dst_off + i) v + done + +let unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len = + let len0 = len land 3 in + let len1 = len lsr 2 in + for i = 0 to len1 - 1 do + let i = i * 4 in + let v = Bytes.unsafe_get_int32_ne src (src_off + i) in + unsafe_set_int32_ne bstr (dst_off + i) v + done; + for i = 0 to len0 - 1 do + let i = (len1 * 4) + i in + let v = Bytes.unsafe_get_uint8 src (src_off + i) in + unsafe_set_uint8 bstr (dst_off + i) v + done + +let blit_from_bytes src ~src_off bstr ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > Bytes.length src - len + || dst_off < 0 + || dst_off > length bstr - len + then invalid_arg "Bstr.blit_from_bytes"; + unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len + +let blit_from_string src ~src_off bstr ~dst_off ~len = + blit_from_bytes (Bytes.unsafe_of_string src) ~src_off bstr ~dst_off ~len + +let blit_to_bytes bstr ~src_off dst ~dst_off ~len = + if + len < 0 + || src_off < 0 + || src_off > length bstr - len + || dst_off < 0 + || dst_off > Bytes.length dst - len + then invalid_arg "Bstr.blit_to_bytes"; + unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len + +let of_string str = + let len = String.length str in + let bstr = create len in + unsafe_blit_from_bytes + (Bytes.unsafe_of_string str) + ~src_off:0 bstr ~dst_off:0 ~len; + bstr + +let unsafe_sub_string bstr src_off len = + let buf = Bytes.create len in + unsafe_blit_to_bytes bstr ~src_off buf ~dst_off:0 ~len; + Bytes.unsafe_to_string buf + +let sub_string bstr ~off ~len = + if len < 0 || off < 0 || off > length bstr - len then + invalid_arg "Bstr.sub_string"; + unsafe_sub_string bstr off len + +let to_string bstr = unsafe_sub_string bstr 0 (length bstr) +let is_empty bstr = length bstr == 0 + +let is_prefix ~affix bstr = + let len_affix = String.length affix in + let len_bstr = length bstr in + if len_affix > len_bstr then false + else + let max_idx_affix = len_affix - 1 in + let rec go idx = + if idx > max_idx_affix then true + else if affix.[idx] != bstr.{idx} then false + else go (succ idx) + in + go 0 + +let is_infix ~affix bstr = + let len_affix = String.length affix in + let len_bstr = length bstr in + if len_affix > len_bstr then false + else + let max_idx_affix = len_affix - 1 in + let max_idx_bstr = len_bstr - len_affix in + let rec go idx k = + if idx > max_idx_bstr then false + else if k > max_idx_affix then true + else if k > 0 then + if affix.[k] == bstr.{idx + k} then go idx (succ k) else go (succ idx) 0 + else if affix.[0] = bstr.{idx} then go idx 1 + else go (succ idx) 0 + in + go 0 0 + +let is_suffix ~affix bstr = + let max_idx_affix = String.length affix - 1 in + let max_idx_bstr = length bstr - 1 in + if max_idx_affix > max_idx_bstr then false + else + let rec go idx = + if idx > max_idx_affix then true + else if affix.[max_idx_affix - idx] != bstr.{max_idx_bstr - idx} then + false + else go (succ idx) + in + go 0 + +exception Break + +let for_all sat bstr = + try + for idx = 0 to length bstr - 1 do + if sat (unsafe_get bstr idx) == false then raise_notrace Break + done; + true + with Break -> false + +let exists sat bstr = + try + for idx = 0 to length bstr - 1 do + if sat (unsafe_get bstr idx) then raise_notrace Break + done; + false + with Break -> true + +let equal a b = + if length a == length b then + try + let len = length a in + let len0 = len land 3 in + let len1 = len lsr 2 in + for i = 0 to len1 - 1 do + let i = i * 4 in + if unsafe_get_int32_ne a i <> unsafe_get_int32_ne b i then + raise_notrace Break + done; + for i = 0 to len0 - 1 do + let i = (len1 * 4) + i in + if unsafe_get_uint8 a i != unsafe_get_uint8 b i then raise_notrace Break + done; + true + with Break -> false + else false + +let with_range ?(first = 0) ?(len = max_int) bstr = + if len < 0 then invalid_arg "Bstr.with_range"; + if len == 0 then empty + else + let bstr_len = length bstr in + let max_idx = bstr_len - 1 in + let last = + match len with + | len when len = max_int -> max_idx + | len -> + let last = first + len - 1 in + if last > max_idx then max_idx else last + in + let first = if first < 0 then 0 else first in + if first = 0 && last = max_idx then bstr + else sub bstr ~off:first ~len:(last + 1 - first) + +let with_index_range ?(first = 0) ?last bstr = + let bstr_len = length bstr in + let max_idx = bstr_len - 1 in + let last = + match last with + | None -> max_idx + | Some last -> if last > max_idx then max_idx else last + in + let first = if first < 0 then 0 else first in + if first > max_idx || last < 0 || first > last then empty + else if first == 0 && last = max_idx then bstr + else sub bstr ~off:first ~len:(last + 1 - first) + +let is_white chr = chr == ' ' + +let trim ?(drop = is_white) bstr = + let len = length bstr in + if len == 0 then bstr + else + let max_idx = len - 1 in + let rec left_pos idx = + if idx > max_idx then len + else if drop bstr.{idx} then left_pos (succ idx) + else idx + in + let rec right_pos idx = + if idx < 0 then 0 + else if drop bstr.{idx} then right_pos (pred idx) + else succ idx + in + let left = left_pos 0 in + if left = len then empty + else + let right = right_pos max_idx in + if left == 0 && right == len then bstr + else sub bstr ~off:left ~len:(right - left) + +let fspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.fspan"; + if max < 0 then invalid_arg "Bstr.fspan"; + if min > max || max == 0 then (empty, bstr) + else + let len = length bstr in + let max_idx = len - 1 in + let max_idx = + let k = max - 1 in + if k > max_idx then max_idx else k + in + let need_idx = min in + let rec go idx = + if idx <= max_idx && sat bstr.{idx} then go (succ idx) + else if idx < need_idx || idx == 0 then (empty, bstr) + else if idx == len then (bstr, empty) + else (sub bstr ~off:0 ~len:idx, sub bstr ~off:idx ~len:(len - idx)) + in + go 0 + +let rspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.rspan"; + if max < 0 then invalid_arg "Bstr.rspan"; + if min > max || max == 0 then (bstr, empty) + else + let len = length bstr in + let max_idx = len - 1 in + let min_idx = + let k = len - max in + if k < 0 then 0 else k + in + let need_idx = max_idx - min in + let rec go idx = + if idx >= min_idx && sat bstr.{idx} then go (pred idx) + else if idx > need_idx || idx == max_idx then (bstr, empty) + else if idx == -1 then (empty, bstr) + else + let cut = idx + 1 in + (sub bstr ~off:0 ~len:cut, sub bstr ~off:cut ~len:(len - cut)) + in + go 0 + +let span ?(rev = false) ?min ?max ?sat bstr = + match rev with + | true -> rspan ?min ?max ?sat bstr + | false -> fspan ?min ?max ?sat bstr + +let ftake ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.ftake"; + if max < 0 then invalid_arg "Bstr.ftake"; + if min > max || max == 0 then empty + else + let len = length bstr in + let max_idx = len - 1 in + let max_idx = + let k = max - 1 in + if k > max_idx then max_idx else k + in + let need_idx = min in + let rec go idx = + if idx <= max_idx && sat bstr.{idx} then go (succ idx) + else if idx < need_idx || idx == 0 then empty + else if idx == len then bstr + else sub bstr ~off:0 ~len:idx + in + go 0 + +let rtake ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.rtake"; + if max < 0 then invalid_arg "Bstr.rtake"; + if min > max || max == 0 then empty + else + let len = length bstr in + let max_idx = len - 1 in + let min_idx = + let k = len - max in + if k < 0 then 0 else k + in + let need_idx = max_idx - min in + let rec go idx = + if idx >= min_idx && sat bstr.{idx} then go (pred idx) + else if idx > need_idx || idx == max_idx then empty + else if idx == -1 then bstr + else + let cut = idx + 1 in + sub bstr ~off:cut ~len:(len - cut) + in + go 0 + +let take ?(rev = false) ?min ?max ?sat bstr = + match rev with + | true -> rtake ?min ?max ?sat bstr + | false -> ftake ?min ?max ?sat bstr + +let fdrop ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.fdrop"; + if max < 0 then invalid_arg "Bstr.fdrop"; + if min > max || max == 0 then bstr + else + let len = length bstr in + let max_idx = len - 1 in + let max_idx = + let k = max - 1 in + if k > max_idx then max_idx else k + in + let need_idx = min in + let rec go idx = + if idx <= max_idx && sat bstr.{idx} then go (succ idx) + else if idx < need_idx || idx == 0 then bstr + else if idx == len then bstr + else sub bstr ~off:idx ~len:(len - idx) + in + go 0 + +let rdrop ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr = + if min < 0 then invalid_arg "Bstr.rdrop"; + if max < 0 then invalid_arg "Bstr.rdrop"; + if min > max || max == 0 then bstr + else + let len = length bstr in + let max_idx = len - 1 in + let min_idx = + let k = len - max in + if k < 0 then 0 else k + in + let need_idx = max_idx - min in + let rec go idx = + if idx >= min_idx && sat bstr.{idx} then go (pred idx) + else if idx > need_idx || idx == max_idx then bstr + else if idx == -1 then empty + else + let cut = idx + 1 in + sub bstr ~off:0 ~len:cut + in + go 0 + +let drop ?(rev = false) ?min ?max ?sat bstr = + match rev with + | true -> rdrop ?min ?max ?sat bstr + | false -> fdrop ?min ?max ?sat bstr + +let shift bstr off = + if off > length bstr then invalid_arg "Bstr.shift"; + let len = length bstr - off in + Bigarray.Array1.sub bstr off len + +external ptr : t -> (nativeint[@unboxed]) + = "bstr_bytecode_ptr" "bstr_native_ptr" +[@@noalloc] + +let overlap a b = + let src_a = ptr a in + let src_b = ptr b in + let len_a = Nativeint.of_int (length a) in + let len_b = Nativeint.of_int (length b) in + let len = + let ( + ) = Nativeint.add in + let ( - ) = Nativeint.sub in + Nativeint.max 0n (Nativeint.min (src_a + len_a) (src_b + len_b)) + - Nativeint.max src_a src_b + in + let len = Nativeint.to_int len in + if src_a >= src_b && src_a < Nativeint.add src_b len_b then + let offset = Nativeint.(to_int (sub src_a src_b)) in + Some (len, 0, offset) + else if src_b >= src_a && src_b < Nativeint.add src_a len_a then + let offset = Nativeint.(to_int (sub src_b src_a)) in + Some (len, offset, 0) + else None + +let split_on_char sep bstr = + let lst = ref [] in + let max = ref (length bstr) in + for idx = length bstr - 1 downto 0 do + if unsafe_get bstr idx == sep then begin + lst := sub bstr ~off:(idx + 1) ~len:(!max - idx - 1) :: !lst; + max := idx + end + done; + sub bstr ~off:0 ~len:!max :: !lst + +let to_seq bstr = + let rec go idx () = + if idx == length bstr then Seq.Nil + else + let chr = unsafe_get bstr idx in + Seq.Cons (chr, go (idx + 1)) + in + go 0 + +let to_seqi bstr = + let rec go idx () = + if idx == length bstr then Seq.Nil + else + let chr = unsafe_get bstr idx in + Seq.Cons ((idx, chr), go (idx + 1)) + in + go 0 + +let of_seq seq = + let n = ref 0 in + let buf = ref (make 0x7ff '\000') in + let resize () = + let new_len = Int.min (2 * length !buf) Sys.max_string_length in + (* TODO(dinosaure): should we keep this limit? *) + if length !buf == new_len then failwith "Bstr.of_seq: cannot grow bigstring"; + let new_buf = make new_len '\000' in + Bigarray.Array1.blit !buf (sub new_buf ~off:0 ~len:(length !buf)); + buf := new_buf + in + let fn chr = + if !n == length !buf then resize (); + unsafe_set !buf !n chr; + incr n + in + Seq.iter fn seq; sub !buf ~off:0 ~len:!n + +module Witness = struct + type (_, _) eq = Refl : ('a, 'a) eq + type _ equality = .. + + module type Inst = sig + type t + type _ equality += Eq : t equality + end + + type 'a t = (module Inst with type t = 'a) + + let make : type a. unit -> a t = + fun () -> + let module Inst = struct + type t = a + type _ equality += Eq : t equality + end in + (module Inst) + + let _eq : type a b. a t -> b t -> (a, b) eq option = + fun (module A) (module B) -> match A.Eq with B.Eq -> Some Refl | _ -> None +end + +module Pkt = struct + type bigstring = t + type endianness = Big_endian | Little_endian | Native_endian + + type _ t = + | Primary : 'a primary -> 'a t + | Record : 'a record -> 'a t + | Variant : 'a variant -> 'a t + + and _ primary = + | Char : char primary + | UInt8 : int primary + | Int8 : int primary + | UInt16 : endianness -> int primary + | Int16 : endianness -> int primary + | Int32 : endianness -> int32 primary + | Int64 : endianness -> int64 primary + | Var_int31 : int primary + | Var_int63 : int primary + | Bytes : int -> string primary + | CString : string primary + | Until : char -> string primary + + and _ a_case = C0 : 'a case0 -> 'a a_case | C1 : ('a, 'b) case1 -> 'a a_case + + and _ case_v = + | CV0 : 'a case0 -> 'a case_v + | CV1 : ('a, 'b) case1 * 'b -> 'a case_v + + and 'a case0 = { ctag0: int; c0: 'a } + + and ('a, 'b) case1 = { + ctag1: int + ; ctype1: 'b t + ; cwitn1: 'b Witness.t + ; c1: 'b -> 'a + } + + and 'a record = { rwit: 'a Witness.t; rfields: 'a fields_and_constr } + + and 'a fields_and_constr = + | Fields : ('a, 'b) fields * 'b -> 'a fields_and_constr + + and ('a, 'b) fields = + | F0 : ('a, 'a) fields + | F1 : ('a, 'b) field * ('a, 'c) fields -> ('a, 'b -> 'c) fields + + and ('a, 'b) field = { ftype: 'b t; fget: 'a -> 'b } + + and 'a variant = { + vwit: 'a Witness.t + ; vcases: 'a a_case array + ; vget: 'a -> 'a case_v + } + + module Fields_folder (Acc : sig + type ('a, 'b) t + end) = + struct + type 'a t = { + nil: ('a, 'a) Acc.t + ; cons: 'b 'c. ('a, 'b) field -> ('a, 'c) Acc.t -> ('a, 'b -> 'c) Acc.t + } + + let rec fold : type a c. a t -> (a, c) fields -> (a, c) Acc.t = + fun folder -> function + | F0 -> folder.nil + | F1 (f, fs) -> folder.cons f (fold folder fs) + end + + (* decoder *) + + module Record_decoder = Fields_folder (struct + type ('a, 'b) t = bigstring -> int ref -> 'b -> 'a + end) + + type 'a decoder = bigstring -> int ref -> 'a + + let decode_char bstr pos = + let idx = !pos in + incr pos; get bstr idx + [@@inline always] + + let decode_uint8 bstr pos = + let idx = !pos in + incr pos; get_uint8 bstr idx + [@@inline always] + + let decode_int8 bstr pos = + let idx = !pos in + incr pos; get_int8 bstr idx + [@@inline always] + + let decode_uint16 e bstr pos = + let idx = !pos in + incr pos; + match e with + | Big_endian -> get_uint16_be bstr idx + | Little_endian -> get_uint16_le bstr idx + | Native_endian -> get_uint16_ne bstr idx + [@@inline always] + + let decode_int16 e bstr pos = + let idx = !pos in + incr pos; + match e with + | Big_endian -> get_int16_be bstr idx + | Little_endian -> get_int16_le bstr idx + | Native_endian -> get_int16_ne bstr idx + [@@inline always] + + let decode_int32 e bstr pos = + let idx = !pos in + incr pos; + match e with + | Big_endian -> get_int32_be bstr idx + | Little_endian -> get_int32_le bstr idx + | Native_endian -> get_int32_ne bstr idx + [@@inline always] + + let decode_int64 e bstr pos = + let idx = !pos in + incr pos; + match e with + | Big_endian -> get_int64_be bstr idx + | Little_endian -> get_int64_le bstr idx + | Native_endian -> get_int64_ne bstr idx + [@@inline always] + + let decode_bytes len bstr pos = + let off = !pos in + pos := !pos + len; + sub_string bstr ~off ~len + + let rec decode : type a. a t -> a decoder = function + | Primary p -> prim p + | Record r -> record r + | Variant _ -> assert false + + and prim : type a. a primary -> a decoder = function + | Char -> decode_char + | UInt8 -> decode_uint8 + | Int8 -> decode_int8 + | UInt16 e -> decode_uint16 e + | Int16 e -> decode_int16 e + | Int32 e -> decode_int32 e + | Int64 e -> decode_int64 e + | Bytes len -> decode_bytes len + | _ -> assert false + + and record : type a. a record -> a decoder = + fun { rfields= Fields (fs, constr); _ } -> + let nil _bstr _pos fn = fn in + let cons { ftype; _ } k = + let decode = decode ftype in + fun bstr pos constr -> + let x = decode bstr pos in + let constr = constr x in + k bstr pos constr + in + let fn = Record_decoder.fold { nil; cons } fs in + fun bstr pos -> fn bstr pos constr + + (* combinators *) + + let char = Primary Char + let uint8 = Primary UInt8 + let int8 = Primary Int8 + let beuint16 = Primary (UInt16 Big_endian) + let leuint16 = Primary (UInt16 Little_endian) + let neuint16 = Primary (UInt16 Native_endian) + let beint16 = Primary (Int16 Big_endian) + let leint16 = Primary (Int16 Little_endian) + let neint16 = Primary (Int16 Native_endian) + let beint32 = Primary (Int32 Big_endian) + let leint32 = Primary (Int32 Little_endian) + let neint32 = Primary (Int32 Native_endian) + let beint64 = Primary (Int64 Big_endian) + let leint64 = Primary (Int64 Little_endian) + let neint64 = Primary (Int64 Native_endian) + let varint31 = Primary Var_int31 + let varint63 = Primary Var_int63 + let bytes len = Primary (Bytes len) + let cstring = Primary CString + let until byte = Primary (Until byte) + + (* record *) + + type ('a, 'b, 'c) open_record = ('a, 'c) fields -> 'b * ('a, 'b) fields + + let field ftype fget = { ftype; fget } + let record : 'b -> ('a, 'b, 'b) open_record = fun c fs -> (c, fs) + + let app : type a b c d. + (a, b, c -> d) open_record -> (a, c) field -> (a, b, d) open_record = + fun r f fs -> r (F1 (f, fs)) + + let sealr : type a b. (a, b, a) open_record -> a t = + fun r -> + let c, fs = r F0 in + let rwit = Witness.make () in + let sealed = { rwit; rfields= Fields (fs, c) } in + Record sealed + + let ( |+ ) = app + + (* variant *) + + type 'a case_p = 'a case_v + type ('a, 'b) case = int -> 'a a_case * 'b + + let case0 c0 ctag0 = + let c = { ctag0; c0 } in + (C0 c, CV0 c) + + let case1 : type a b. b t -> (b -> a) -> (a, b -> a case_p) case = + fun ctype1 c1 ctag1 -> + let cwitn1 : b Witness.t = Witness.make () in + let c = { ctag1; ctype1; cwitn1; c1 } in + (C1 c, fun v -> CV1 (c, v)) + + type ('a, 'b, 'c) open_variant = 'a a_case list -> 'c * 'a a_case list + + let variant c vs = (c, vs) + + let app v c cs = + let fc, cs = v cs in + let c, f = c (List.length cs) in + (fc f, c :: cs) + + let sealv v = + let vget, vcases = v [] in + let vwit = Witness.make () in + let vcases = Array.of_list (List.rev vcases) in + Variant { vwit; vcases; vget } + + let ( |~ ) = app +end diff --git a/lib/bstr.mli b/lib/bstr.mli new file mode 100644 index 0000000..e5b87af --- /dev/null +++ b/lib/bstr.mli @@ -0,0 +1,446 @@ +(** A small library for manipulating bigstrings. + + To clarify the use of bigstrings in OCaml, we advise you to read the + overview of bigstrings and the difference with bytes. After this theoretical + reading, this module offers a whole host of useful functions for + manipulating bigstrings. + + {1:overview Overview.} + + A bigstring is a special kind of memory area in the OCaml world. Unlike + bytes, bigstrings are allocated via [malloc()] or are available via + [Unix.map_file]. + + They therefore exist outside the space normally allocated for OCaml with + regard to all its values. So there are some particularities to the use of + bigstrings. + + The first thing to understand about bigstrings is that allocating them can + take time. Since a bigstring is obtained either by [malloc()] or by + [Unix.map_file], the former is a performance hit on the [malloc()] used + (which also depends on the fragmentation of the C heap) and the latter is a + system call that can interact with your file system. + + By way of comparison, a byte of less than 2048 bytes requires only 3 + processor instructions to exist and be available — beyond that, the bytes is + allocated in the major heap. + + It is therefore advisable to allocate just a few bigstrings and reuse them + throughout your application. It's even advisable to allocate large + bigstrings. + + A particularity of bigstrings is that they cannot be moved by the Garbage + Collector. Existing in a space other than that of OCaml (the C heap), they + don't move. With this advantage in mind, we can imagine several situations + where we'd like a memory zone that doesn't move: + - a bigstring can be manipulated by several threads/domains. Of course, + parallel accesses must be protected, but you can be sure that the + bigstring will not move throughout the process. Thus, its location in + memory can be shared by several computing units. + - it may be necessary, in system programming, to write to a particular zone + in order to interact with a device. In this case, the bigstring can be + found as an OCaml value bridging a special memory area (such as the + framebuffer). + + A final feature of bigstring is that it can be seen as a slice. You can have + another view of a bigstring that would be equally smaller. For example, the + {!val:sub} operation in particular doesn't copy your bigstring, but offers + you a "proxy" accessing the same memory area as the original bigstring. + + {1:pkt Encode & Decode packets.} + + In order to encode or decode packets (such as ARP or DNS packets), Bstr + offers a small API for converting a slice of bytes from a {!val:Bstr.t} to a + user-defined variant or record. *) + +type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t + +val memcpy : t -> src_off:int -> t -> dst_off:int -> len:int -> unit +(** [memcpy src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to + [dst]. [src] must not overlap [dst]. Use {!val:memmove} if [src] & [dst] do + overlap. *) + +val memmove : t -> src_off:int -> t -> dst_off:int -> len:int -> unit +(** [memmove src ~src_off dst ~dst_off ~len] copies [len] bytes from [src] to + [dst]. [src] and [dst] may overlap: copying takes place as though the bytes + in [src] are first copied into a temporary array that does not overlap [src] + or [dst], and the bytes are then copied from the temporary array to [dst]. +*) + +val memcmp : t -> src_off:int -> t -> dst_off:int -> len:int -> int +val memchr : t -> off:int -> len:int -> char -> int +val memset : t -> off:int -> len:int -> char -> unit + +val empty : t +(** [empty] is an empty bigstring. *) + +val length : t -> int +(** [length bstr] is the number of bytes in [bstr]. *) + +val get : t -> int -> char +(** [get bstr i] is the byte of [bstr]' at index [i]. This is equivalent to the + [bstr.{i}] notation. + + @raise Invalid_argument if [i] is not an index of [bstr]. *) + +val create : int -> t +val make : int -> char -> t +val of_string : string -> t +val fill : t -> off:int -> len:int -> char -> unit +val blit : t -> src_off:int -> t -> dst_off:int -> len:int -> unit + +val blit_from_string : + string -> src_off:int -> t -> dst_off:int -> len:int -> unit + +val blit_from_bytes : + bytes -> src_off:int -> t -> dst_off:int -> len:int -> unit + +(* +val init : int -> (int -> char) -> t +val copy : t -> t +val extend : t -> int -> int -> t +val concat : t -> t list -> t +val cat : t -> t -> t +val iter : (char -> unit) -> t -> unit +val iteri : (int -> char -> unit) -> t -> unit +val map : (char -> char) -> t -> t +val mapi : (int -> char -> char) -> t -> t +val fold_left : ('acc -> char -> 'acc) -> 'acc -> t -> 'acc +val fold_right : (char -> 'acc -> 'acc) -> t -> 'acc -> 'acc +val index : t -> ?rev:bool -> ?from:int -> char -> int +val contains : t -> ?rev:bool -> ?from:int -> char -> bool +val compare : t -> t -> int +val starts_with : prefix:string -> t -> bool +val ends_with : suffix:string -> t -> bool +val overlap : t -> t -> (int * int * int) option +*) + +val get_int8 : t -> int -> int +(** [get_int8 bstr i] is [bstr]'s signed 8-bit integer starting at byte index + [i]. *) + +val get_uint8 : t -> int -> int +(** [get_uint8 bstr i] is [bstr]'s unsigned 8-bit integer starting at byte index + [i]. *) + +val get_uint16_ne : t -> int -> int +(** [get_int16_ne bstr i] is [bstr]'s native-endian unsigned 16-bit integer + starting at byte index [i]. *) + +val get_uint16_le : t -> int -> int +(** [get_int16_le bstr i] is [bstr]'s little-endian unsigned 16-bit integer + starting at byte index [i]. *) + +val get_uint16_be : t -> int -> int +(** [get_int16_be bstr i] is [bstr]'s big-endian unsigned 16-bit integer + starting at byte index [i]. *) + +val get_int16_ne : t -> int -> int +(** [get_int16_ne bstr i] is [bstr]'s native-endian signed 16-bit integer + starting at byte index [i]. *) + +val get_int16_le : t -> int -> int +(** [get_int16_le bstr i] is [bstr]'s little-endian signed 16-bit integer + starting at byte index [i]. *) + +val get_int16_be : t -> int -> int +(** [get_int16_be bstr i] is [bstr]'s big-endian signed 16-bit integer starting + at byte index [i]. *) + +val get_int32_ne : t -> int -> int32 +(** [get_int32_ne bstr i] is [bstr]'s native-endian 32-bit integer starting at + byte index [i]. *) + +val get_int32_le : t -> int -> int32 +(** [get_int32_le bstr i] is [bstr]'s little-endian 32-bit integer starting at + byte index [i]. *) + +val get_int32_be : t -> int -> int32 +(** [get_int32_be bstr i] is [bstr]'s big-endian 32-bit integer starting at byte + index [i]. *) + +val get_int64_ne : t -> int -> int64 +(** [get_int64_ne bstr i] is [bstr]'s native-endian 64-bit integer starting at + byte index [i]. *) + +val get_int64_le : t -> int -> int64 +(** [get_int64_le bstr i] is [bstr]'s little-endian 64-bit integer starting at + byte index [i]. *) + +val get_int64_be : t -> int -> int64 +(** [get_int64_be bstr i] is [bstr]'s big-endian 64-bit integer starting at byte + index [i]. *) + +val set : t -> int -> char -> unit +val set_int8 : t -> int -> int -> unit +val set_uint8 : t -> int -> int -> unit +val set_uint16_ne : t -> int -> int -> unit +val set_uint16_le : t -> int -> int -> unit +val set_uint16_be : t -> int -> int -> unit +val set_int16_ne : t -> int -> int -> unit +val set_int16_le : t -> int -> int -> unit +val set_int16_be : t -> int -> int -> unit +val set_int32_ne : t -> int -> int32 -> unit +val set_int32_le : t -> int -> int32 -> unit +val set_int32_be : t -> int -> int32 -> unit +val set_int64_ne : t -> int -> int64 -> unit +val set_int64_le : t -> int -> int64 -> unit +val set_int64_be : t -> int -> int64 -> unit +val unsafe_set : t -> int -> char -> unit + +val sub : t -> off:int -> len:int -> t +(** [sub bstr ~off ~len] does not allocate a bigstring, but instead returns a + new view into [bstr] starting at [off], and with length [len]. + + {b Note} that this does not allocate a new buffer, but instead shares the + buffer of [bstr] with the newly-returned bigstring. *) + +val overlap : t -> t -> (int * int * int) option + +val sub_string : t -> off:int -> len:int -> string +(** [sub_string bstr ~off ~len] returns a string of length [len] containing the + bytes of [t] starting at [off]. *) + +val to_string : t -> string +(** [to_string bstr] is equivalent to + [sub_string bstr ~off:0 ~len:(length bstr)]. *) + +val blit_to_bytes : t -> src_off:int -> bytes -> dst_off:int -> len:int -> unit +(** [blit_to_bytes src ~src_off dst ~dst_off ~len] copies [len] bytes from + [src], starting at index [src_off], to byte sequence [dst], starting at + index [dst_off]. + + @raise Invalid_argument + if [src_off] and [len] do not designate a valid range of [src], or if + [dst_off] and [len] do not designate a valid range of [dst]. *) + +val is_empty : t -> bool +(** [is_empty bstr] is [length bstr = 0]. *) + +val is_prefix : affix:string -> t -> bool +(** [is_prefix ~affix bstr] is [true] iff [affix.[idx] = bstr.{idx}] for all + indices [idx] of [affix]. *) + +val is_infix : affix:string -> t -> bool +(** [is_infix ~affix bstr] is [true] iff there exists an index [j] in [bstr] + such that for all indices [i] of [affix] we have [affix.[i] = bstr.{j + i}]. +*) + +val is_suffix : affix:string -> t -> bool +(** [is_suffix ~affix bstr] is [true] iff [affix.[n - idx] = bstr.{m - idx}] for + all indices [idx] of [affix] with [n = String.length affix - 1] and + [m = length bstr - 1]. *) + +val for_all : (char -> bool) -> t -> bool +(** [for_all p bstr] is [true] iff for all indices [idx] of [bstr], + [p bstr.{idx} = true]. *) + +val exists : (char -> bool) -> t -> bool +(** [exists p bstr] is [true] iff there exists an index [idx] of [bstr] with + [p bstr.{idx} = true]. *) + +val equal : t -> t -> bool +(** [equal a b] is [a = b]. *) + +val with_range : ?first:int -> ?len:int -> t -> t +(** [with_range ~first ~len bstr] are the consecutive bytes of [bstr] whose + indices exist in the range \[[first];[first + len - 1]\]. + + [first] defaults to [0] and [len] to [max_int]. Note that [first] can be any + integer and [len] any positive integer. *) + +val with_index_range : ?first:int -> ?last:int -> t -> t +(** [with_index_range ~first ~last bstr] are the consecutive bytes of [bstr] + whose indices exists in the range \[[first];[last]\]. + + [first] defaults to [0] and [last] to [length bstr - 1]. + + Note that both [first] and [last] can be any integer. If [first > last] the + interval is empty and the empty bigstring is returned. *) + +val trim : ?drop:(char -> bool) -> t -> t +(** [trim ~drop bstr] is [bstr] with prefix and suffix bytes satisfying [drop] + in [bstr] removed. [drop] defaults to [fun chr -> chr = ' ']. *) + +val span : + ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t * t +(** [span ~rev ~min ~max ~sat bstr] is [(l, r)] where: + - if [rev] is [false] (default), [l] is at least [min] and at most [max] + consecutive [sat] satisfying initial bytes of [bstr] or {!empty} if there + are no such bytes. [r] are the remaining bytes of [bstr]. + - if [rev] is [true], [r] is at least [min] and at most [max] consecutive + [sat] satisfying final bytes of [bstr] or {!empty} if there are no such + bytes. [l] are the remaining bytes of [bstr]. + + If [max] is unspecified the span is unlimited. If [min] is unspecified it + defaults to [0]. If [min > max] the condition can't be satisfied and the + left or right span, depending on [rev], is always empty. [sat] defaults to + [Fun.const true]. + + @raise Invalid_argument if [max] or [min] is negative. *) + +val take : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t +(** [take ~rev ~min ~max ~sat bstr] is the matching span of {!span} without the + remaining one. In other words: + + {[ + (if rev then snd else fst) (span ~rev ~min ~max ~sat bstr) + ]} *) + +val drop : ?rev:bool -> ?min:int -> ?max:int -> ?sat:(char -> bool) -> t -> t +(** [drop ~rev ~min ~max ~sat bstr] is the remaining span of {!span} without the + matching span. In other words: + + {[ + (if rev then fst else snd) (span ~rev ~min ~max ~sat bstr) + ]} *) + +val shift : t -> int -> t +(** [shift bstr n] is [sub bstr n (length bstr - n)]. *) + +val split_on_char : char -> t -> t list +val to_seq : t -> char Seq.t +val to_seqi : t -> (int * char) Seq.t +val of_seq : char Seq.t -> t + +module Pkt : sig + type bigstring = t + type 'a t + + val char : char t + val uint8 : int t + val int8 : int t + val beuint16 : int t + val leuint16 : int t + val neuint16 : int t + val beint16 : int t + val leint16 : int t + val neint16 : int t + val beint32 : int32 t + val leint32 : int32 t + val neint32 : int32 t + val beint64 : int64 t + val leint64 : int64 t + val neint64 : int64 t + val varint31 : int t + val varint63 : int t + val bytes : int -> string t + val cstring : string t + val until : char -> string t + + (* {2:records Records.} + + {[ + type header = + { version : int32 + ; number : int32 } + + let _PACK = 0x5041434bl + + let header = + record (fun pack version number -> + if pack <> _PACK + then invalid_arg "Invalid PACK file"; + { version; number }) + |+ field beint32 (fun _ -> _PACK) + |+ field beint32 (fun t -> t.version) + |+ field beint32 (fun t -> t.number) + |> sealr + ]} *) + + type ('a, 'b, 'c) open_record + (** The type for representing open records of type ['a] with a constructor of + ['b]. ['c] represents the remaining fields to be described using the + {!val:(|+)} operator. An open record initially stisfies ['c = 'b] and can + be {{!val:sealr} sealed} once ['c = 'a]. *) + + val record : 'b -> ('a, 'b, 'b) open_record + (** [record f] is an incomplete representation of the record of type ['a] with + constructor [f]. To complete the representation, add fields with + {!val:(|+)} and then seal the record with {!val:sealr}. *) + + type ('a, 'b) field + (** The type for fields holding values of type ['b] and belonging to a record + of type ['a]. *) + + val field : 'a t -> ('b -> 'a) -> ('b, 'a) field + (** [field n t g] is the representation of the field called [n] of type [t] + with getter [g]. For instance: + + {[ + type t = { foo: string } + + let foo = field cstring (fun t -> t.foo) + ]} *) + + val ( |+ ) : + ('a, 'b, 'c -> 'd) open_record -> ('a, 'c) field -> ('a, 'b, 'd) open_record + (** [r |+ f] is the open record [r] augmented with the field [f]. *) + + val sealr : ('a, 'b, 'a) open_record -> 'a t + (** [sealr r] seals the open record [r]. *) + + (** {2:variants Variants.} + + {[ + type t = Foo | Bar of string + + let t = + variant (fun foo bar -> function Foo -> foo | Bar s -> bar s) + |~ case0 Foo + |~ case1 cstring (fun x -> Bar x) + |> sealv + ]} *) + + type ('a, 'b, 'c) open_variant + (** The type for representing open variants of type ['a] with pattern-matching + of type ['b]. ['c] represents the remaining constructors to be described + using the {!val:(|~)} operator. An open variant initially satisfies + ['c = 'b] and can be {{!val:sealv} sealed} once ['c = 'a]. *) + + val variant : 'b -> ('a, 'b, 'b) open_variant + (** [variant n p] is an incomplete representation of the variant type called + [n] of type ['a] using [p] to deconstruct values. To complete the + representation, add cases with {!val:(|~)} and then seal the variant with + {!val:sealv}. *) + + type ('a, 'b) case + (** The type for representing variant cases of type ['a] with patterns of type + ['b]. *) + + type 'a case_p + (** The type for representing patterns for a variant of type ['a]. *) + + val case0 : 'a -> ('a, 'a case_p) case + (** [case0 v] is a representation of a variant constructor [v] with no + arguments. For instance: + + {[ + type t = Foo + + let foo = case0 Foo + ]} *) + + val case1 : 'b t -> ('b -> 'a) -> ('a, 'b -> 'a case_p) case + (** [case1 n t c] is a representation of a variant constructor [c] with an + argument of type [t]. For instances: + + {[ + type t = Foo of string + + let foo = case1 cstring (fun s -> Foo s) + ]} *) + + val ( |~ ) : + ('a, 'b, 'c -> 'd) open_variant + -> ('a, 'c) case + -> ('a, 'b, 'd) open_variant + (** [v |~ c] is the open variant [v] augmented with the case [c]. *) + + val sealv : ('a, 'b, 'a -> 'a case_p) open_variant -> 'a t + (** [sealv v] seals the open variant [v]. *) + + (* {2:decoder Decoder.} *) + + val decode : 'a t -> bigstring -> int ref -> 'a +end diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..beaa82c --- /dev/null +++ b/lib/dune @@ -0,0 +1,10 @@ +(library + (name bstr) + (public_name bstr) + (modules bstr) + (foreign_stubs + (language c) + (names bstr) + (flags + (:standard -Wcast-align))) + (wrapped false)) diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..fcbfaae --- /dev/null +++ b/test/dune @@ -0,0 +1,9 @@ +(library + (name test) + (modules test) + (libraries unix)) + +(test + (name t) + (modules t) + (libraries bstr test)) diff --git a/test/t.ml b/test/t.ml new file mode 100644 index 0000000..d79f832 --- /dev/null +++ b/test/t.ml @@ -0,0 +1,258 @@ +open Test + +let test01 = + let descr = {text|"empty bstr"|text} in + Test.test ~title:"empty bstr" ~descr @@ fun () -> + let x = Bstr.create 0 in + check (0 = Bstr.length x); + let y = Bstr.to_string x in + check ("" = y) + +let test02 = + let descr = {text|negative length|text} in + Test.test ~title:"negative bstr" ~descr @@ fun () -> + try + let _ = Bstr.create (-1) in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test03 = + let descr = {text|positive shift|text} in + Test.test ~title:"positive shift" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.shift x 1 in + check (0 = Bstr.length y) + +let test04 = + let descr = {text|negative shift|text} in + Test.test ~title:"negative shift" ~descr @@ fun () -> + let x = Bstr.create 2 in + let y = Bstr.sub x ~off:1 ~len:1 in + begin + try + let _ = Bstr.shift x (-1) in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end; + begin + try + let _ = Bstr.shift y (-1) in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end + +let test05 = + let descr = {text|bad positive shift|text} in + Test.test ~title:"bad positive shift" ~descr @@ fun () -> + let x = Bstr.create 10 in + try + let _ = Bstr.shift x 11 in + check false + with Invalid_argument _ -> check true + +let test06 = + let descr = {text|sub|text} in + Test.test ~title:"sub" ~descr @@ fun () -> + let x = Bstr.create 100 in + let y = Bstr.sub x ~off:10 ~len:80 in + begin + match Bstr.overlap x y with + | Some (len, x_off, _) -> + check (len = 80); + check (x_off = 10) + | None -> check false + end; + let z = Bstr.sub y ~off:20 ~len:60 in + begin + match Bstr.overlap x z with + | Some (len, x_off, _) -> + check (len = 60); + check (x_off = 30) + | None -> check false + end + +let test07 = + let descr = {text|negative sub|text} in + Test.test ~title:"negative sub" ~descr @@ fun () -> + let x = Bstr.create 2 in + let y = Bstr.sub ~off:1 ~len:1 x in + begin + try + let _ = Bstr.sub x ~off:(-1) ~len:0 in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end; + begin + try + let _ = Bstr.sub y ~off:(-1) ~len:0 in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end + +let test08 = + let descr = {text|sub len too big|text} in + Test.test ~title:"sub len too big" ~descr @@ fun () -> + let x = Bstr.create 0 in + try + let _ = Bstr.sub x ~off:0 ~len:1 in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test09 = + let descr = {text|sub len too small|text} in + Test.test ~title:"sub len too small" ~descr @@ fun () -> + let x = Bstr.create 0 in + try + let _ = Bstr.sub x ~off:0 ~len:(-1) in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test10 = + let descr = {text|sub offset too big|text} in + Test.test ~title:"sub offset too big" ~descr @@ fun () -> + let x = Bstr.create 10 in + begin + try + let _ = Bstr.sub x ~off:11 ~len:0 in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end; + let y = Bstr.sub x ~off:1 ~len:9 in + begin + try + let _ = Bstr.sub y ~off:10 ~len:0 in + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + end + +let test11 = + let descr = {text|blit offset too big|text} in + Test.test ~title:"blit offset too big" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:2 y ~dst_off:1 ~len:1; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test12 = + let descr = {text|blit offset too small|text} in + Test.test ~title:"blit offset too small" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:(-1) y ~dst_off:1 ~len:1; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test13 = + let descr = {text|blit dst offset too big|text} in + Test.test ~title:"blit dst offset too big" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:1 y ~dst_off:2 ~len:1; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test14 = + let descr = {text|blit dst offset too small|text} in + Test.test ~title:"blit dst offset too small" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:1 y ~dst_off:(-1) ~len:1; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test15 = + let descr = {text|blit dst offset negative|text} in + Test.test ~title:"blit dst offset negative" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:0 y ~dst_off:(-1) ~len:1; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test16 = + let descr = {text|blit len too big|text} in + Test.test ~title:"blit len too big" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 2 in + try + Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:2; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test17 = + let descr = {text|blit len too big (2)|text} in + Test.test ~title:"blit len too big (2)" ~descr @@ fun () -> + let x = Bstr.create 2 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:2; + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let test18 = + let descr = {text|blit len too small|text} in + Test.test ~title:"blit len too small" ~descr @@ fun () -> + let x = Bstr.create 1 in + let y = Bstr.create 1 in + try + Bstr.blit x ~src_off:0 y ~dst_off:0 ~len:(-1); + check false + with + | Invalid_argument _ -> check true + | _exn -> check false + +let ( / ) = Filename.concat + +let () = + let tests = + [ + test01; test02; test03; test04; test05; test06; test07; test08; test09 + ; test10; test11; test12; test13; test14; test15; test16; test17; test18 + ] + in + let ({ Test.directory } as runner) = Test.runner (Sys.getcwd () / "_tests") in + let run idx test = + Format.printf "test%03d: %!" (succ idx); + Test.run runner test; + Format.printf "ok\n%!" + in + Format.printf "Run tests into %s\n%!" directory; + List.iteri run tests diff --git a/test/test.ml b/test/test.ml new file mode 100644 index 0000000..0c42a13 --- /dev/null +++ b/test/test.ml @@ -0,0 +1,75 @@ +let strf fmt = Format.asprintf fmt +let ( / ) = Filename.concat + +let check test = + let bt = Printexc.get_callstack max_int in + try + assert test; + print_string "."; + flush stdout + with exn -> + print_string "x"; + flush stdout; + Printexc.raise_with_backtrace exn bt + +type t = { title: string; descr: string; fn: unit -> unit } + +let test ~title ~descr fn = { title; descr; fn } + +type runner = { directory: string } + +let rec mkdir_p path perm = + if path <> "" then begin + try Unix.mkdir path perm with + | Unix.Unix_error (EEXIST, _, _) when Sys.is_directory path -> () + | Unix.Unix_error (ENOENT, _, _) -> + mkdir_p (Filename.dirname path) perm; + Unix.mkdir path perm + end + +let mkdir ({ directory } as runner) = mkdir_p directory 0o755; runner + +type ('a, 'b) str = ('a -> 'b, Format.formatter, unit, string) format4 + +let runner ?(g = Random.State.make_self_init ()) + ?(fmt : ('a, 'b) str = "run-%s") root = + let random_string len = + let res = Bytes.create len in + for i = 0 to len - 1 do + let chr = + match Random.State.int g (26 + 26 + 10) with + | n when n < 26 -> Char.chr (Char.code 'a' + n) + | n when n < 26 + 26 -> Char.chr (Char.code 'A' + n - 26) + | n -> Char.chr (Char.code '0' + n - 26 - 26) + in + Bytes.set res i chr + done; + Bytes.unsafe_to_string res + in + let rec go retry = + if retry >= 10 then failwith "Impossible to create a test directory"; + let directory = root / strf fmt (random_string 4) in + if Sys.file_exists directory then go (succ retry) else mkdir { directory } + in + go 0 + +let run { directory= dir } { title; fn; _ } = + let old_stderr = Unix.dup Unix.stderr in + let new_stderr = open_out (dir / strf "%s.stderr" title) in + Unix.dup2 (Unix.descr_of_out_channel new_stderr) Unix.stderr; + let finally () = + flush stderr; + Unix.dup2 old_stderr Unix.stderr; + Unix.close old_stderr; + close_out new_stderr + in + Format.eprintf "*** %s ***\n%!" title; + try Fun.protect ~finally fn + with exn -> + let ic = open_in (dir / strf "%s.stderr" title) in + let ln = in_channel_length ic in + let rs = Bytes.create ln in + really_input ic rs 0 ln; + Format.printf "Terminated with: %S\n%!" (Printexc.to_string exn); + Format.printf "%s\n%!" (Bytes.unsafe_to_string rs); + exit 1 diff --git a/test/test.mli b/test/test.mli new file mode 100644 index 0000000..b4033a8 --- /dev/null +++ b/test/test.mli @@ -0,0 +1,8 @@ +type t +type runner = private { directory: string } +type ('a, 'b) str = ('a -> 'b, Format.formatter, unit, string) format4 + +val check : bool -> unit +val test : title:string -> descr:string -> (unit -> unit) -> t +val runner : ?g:Random.State.t -> ?fmt:(string, string) str -> string -> runner +val run : runner -> t -> unit