2025-01-15 15:43:37 +00:00
|
|
|
type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
|
|
|
|
|
|
|
|
module Bytes = struct
|
|
|
|
include Bytes
|
|
|
|
|
|
|
|
external unsafe_get_uint8 : bytes -> int -> int = "%bytes_unsafe_get"
|
|
|
|
external unsafe_set_uint8 : bytes -> int -> int -> unit = "%bytes_unsafe_set"
|
|
|
|
external unsafe_get_int32_ne : bytes -> int -> int32 = "%caml_bytes_get32u"
|
|
|
|
|
|
|
|
external unsafe_set_int32_ne : bytes -> int -> int32 -> unit
|
|
|
|
= "%caml_bytes_set32u"
|
|
|
|
end
|
|
|
|
|
|
|
|
external swap16 : int -> int = "%bswap16"
|
|
|
|
external swap32 : int32 -> int32 = "%bswap_int32"
|
|
|
|
external swap64 : int64 -> int64 = "%bswap_int64"
|
|
|
|
external get_uint8 : t -> int -> int = "%caml_ba_ref_1"
|
|
|
|
external set_uint8 : t -> int -> int -> unit = "%caml_ba_set_1"
|
|
|
|
external get_uint16_ne : t -> int -> int = "%caml_bigstring_get16"
|
|
|
|
external set_int16_ne : t -> int -> int -> unit = "%caml_bigstring_set16"
|
|
|
|
external get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32"
|
|
|
|
external set_int32_ne : t -> int -> int32 -> unit = "%caml_bigstring_set32"
|
|
|
|
external set_int64_ne : t -> int -> int64 -> unit = "%caml_bigstring_set64"
|
|
|
|
external unsafe_get_uint8 : t -> int -> int = "%caml_ba_unsafe_ref_1"
|
|
|
|
external unsafe_set_uint8 : t -> int -> int -> unit = "%caml_ba_unsafe_set_1"
|
|
|
|
external unsafe_get_uint16_ne : t -> int -> int = "%caml_bigstring_get16u"
|
|
|
|
|
|
|
|
external unsafe_set_uint16_ne : t -> int -> int -> unit
|
|
|
|
= "%caml_bigstring_set16u"
|
|
|
|
|
|
|
|
external unsafe_get_int32_ne : t -> int -> int32 = "%caml_bigstring_get32u"
|
|
|
|
|
|
|
|
external unsafe_set_int32_ne : t -> int -> int32 -> unit
|
|
|
|
= "%caml_bigstring_set32u"
|
|
|
|
|
|
|
|
external unsafe_get_int64_ne : t -> (int[@untagged]) -> (int64[@unboxed])
|
|
|
|
= "bstr_bytecode_get64u" "bstr_native_get64u"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
external _unsafe_set_int64_ne : t -> int -> int64 -> unit
|
|
|
|
= "%caml_bigstring_set64u"
|
|
|
|
|
|
|
|
external unsafe_memcmp :
|
|
|
|
t
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> t
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged]) = "bstr_bytecode_memcmp" "bstr_native_memcmp"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
external unsafe_memcpy :
|
|
|
|
t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit
|
|
|
|
= "bstr_bytecode_memcpy" "bstr_native_memcpy"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
external unsafe_memmove :
|
|
|
|
t -> (int[@untagged]) -> t -> (int[@untagged]) -> (int[@untagged]) -> unit
|
|
|
|
= "bstr_bytecode_memmove" "bstr_native_memmove"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
external unsafe_memchr :
|
|
|
|
t
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
external unsafe_memset :
|
|
|
|
t
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged])
|
|
|
|
-> (int[@untagged]) = "bstr_bytecode_memchr" "bstr_native_memchr"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
let memcmp src ~src_off dst ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > Bigarray.Array1.dim src - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > Bigarray.Array1.dim dst - len
|
|
|
|
then invalid_arg "Bstr.memcmp";
|
|
|
|
unsafe_memcmp src src_off dst dst_off len
|
|
|
|
|
|
|
|
let memcpy src ~src_off dst ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > Bigarray.Array1.dim src - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > Bigarray.Array1.dim dst - len
|
|
|
|
then invalid_arg "Bstr.memcpy";
|
|
|
|
unsafe_memcpy src src_off dst dst_off len
|
|
|
|
|
|
|
|
let memmove src ~src_off dst ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > Bigarray.Array1.dim src - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > Bigarray.Array1.dim dst - len
|
|
|
|
then invalid_arg "Bstr.memmove";
|
|
|
|
unsafe_memmove src src_off dst dst_off len
|
|
|
|
|
|
|
|
let memchr src ~off ~len value =
|
|
|
|
if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then
|
|
|
|
invalid_arg "Bstr.memchr";
|
|
|
|
unsafe_memchr src off len (Char.code value)
|
|
|
|
|
|
|
|
let memset src ~off ~len value =
|
|
|
|
if len < 0 || off < 0 || off > Bigarray.Array1.dim src - len then
|
|
|
|
invalid_arg "Bstr.memset";
|
|
|
|
ignore (unsafe_memset src off len (Char.code value))
|
|
|
|
|
|
|
|
let empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0
|
|
|
|
let create len = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len
|
|
|
|
|
|
|
|
external length : t -> int = "%caml_ba_dim_1"
|
|
|
|
external get : t -> int -> char = "%caml_ba_ref_1"
|
|
|
|
external unsafe_get : t -> int -> char = "%caml_ba_unsafe_ref_1"
|
|
|
|
external set : t -> int -> char -> unit = "%caml_ba_set_1"
|
|
|
|
external unsafe_set : t -> int -> char -> unit = "%caml_ba_unsafe_set_1"
|
|
|
|
|
|
|
|
let unsafe_fill bstr ~off ~len v =
|
|
|
|
let nv = Nativeint.of_int v in
|
|
|
|
let vv = Nativeint.(logor (shift_left nv 8) nv) in
|
|
|
|
let vvvv = Nativeint.(logor (shift_left vv 16) vv) in
|
|
|
|
let vvvv = Nativeint.to_int32 vvvv in
|
|
|
|
let len0 = len land 3 in
|
|
|
|
let len1 = len lsr 2 in
|
|
|
|
for i = 0 to len1 - 1 do
|
|
|
|
let i = i * 4 in
|
|
|
|
unsafe_set_int32_ne bstr (off + i) vvvv
|
|
|
|
done;
|
|
|
|
for i = 0 to len0 - 1 do
|
|
|
|
let i = (len1 * 4) + i in
|
|
|
|
unsafe_set_uint8 bstr (off + i) v
|
|
|
|
done
|
|
|
|
|
|
|
|
let fill bstr ~off ~len chr =
|
|
|
|
if len < 0 || off < 0 || off > length bstr - len then invalid_arg "Bstr.fill";
|
|
|
|
unsafe_fill bstr ~off ~len (Char.code chr)
|
|
|
|
|
|
|
|
let make len chr =
|
|
|
|
let bstr = create len in
|
|
|
|
unsafe_fill bstr ~off:0 ~len (Char.code chr);
|
|
|
|
(* [Obj.magic] instead of [Char.code]? *)
|
|
|
|
bstr
|
|
|
|
|
2025-02-11 18:29:15 +00:00
|
|
|
let init len fn =
|
|
|
|
let bstr = create len in
|
|
|
|
for i = 0 to len - 1 do
|
|
|
|
unsafe_set bstr i (fn i)
|
|
|
|
done;
|
|
|
|
bstr
|
|
|
|
|
|
|
|
let copy src =
|
|
|
|
let len = length src in
|
|
|
|
let bstr = create len in
|
|
|
|
unsafe_memcpy src 0 bstr 0 len;
|
|
|
|
bstr
|
|
|
|
|
|
|
|
let chop ?(rev = false) bstr =
|
|
|
|
if length bstr == 0 then None
|
|
|
|
else if not rev then Some (unsafe_get bstr 0)
|
|
|
|
else Some (unsafe_get bstr (length bstr - 1))
|
|
|
|
|
2025-01-15 15:43:37 +00:00
|
|
|
let get_int64_ne bstr idx =
|
|
|
|
if idx < 0 || idx > length bstr - 8 then invalid_arg "Bstr.get_int64_ne";
|
|
|
|
unsafe_get_int64_ne bstr idx
|
|
|
|
|
|
|
|
let get_int8 bstr i =
|
|
|
|
(get_uint8 bstr i lsl (Sys.int_size - 8)) asr (Sys.int_size - 8)
|
|
|
|
|
|
|
|
let get_uint16_le bstr i =
|
|
|
|
if Sys.big_endian then swap16 (get_uint16_ne bstr i) else get_uint16_ne bstr i
|
|
|
|
|
|
|
|
let get_uint16_be bstr i =
|
|
|
|
if not Sys.big_endian then swap16 (get_uint16_ne bstr i)
|
|
|
|
else get_uint16_ne bstr i
|
|
|
|
|
|
|
|
let _unsafe_get_uint16_le bstr i =
|
|
|
|
(* TODO(dinosaure): for unicode. *)
|
|
|
|
if Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i)
|
|
|
|
else unsafe_get_uint16_ne bstr i
|
|
|
|
|
|
|
|
let _unsafe_get_uint16_be bstr i =
|
|
|
|
(* TODO(dinosaure): for unicode. *)
|
|
|
|
if not Sys.big_endian then swap16 (unsafe_get_uint16_ne bstr i)
|
|
|
|
else unsafe_get_uint16_ne bstr i
|
|
|
|
|
|
|
|
let get_int16_ne bstr i =
|
|
|
|
(get_uint16_ne bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
|
|
|
|
|
|
|
|
let get_int16_le bstr i =
|
|
|
|
(get_uint16_le bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
|
|
|
|
|
|
|
|
let get_int16_be bstr i =
|
|
|
|
(get_uint16_be bstr i lsl (Sys.int_size - 16)) asr (Sys.int_size - 16)
|
|
|
|
|
|
|
|
let get_int32_le bstr i =
|
|
|
|
if Sys.big_endian then swap32 (get_int32_ne bstr i) else get_int32_ne bstr i
|
|
|
|
|
|
|
|
let get_int32_be bstr i =
|
|
|
|
if not Sys.big_endian then swap32 (get_int32_ne bstr i)
|
|
|
|
else get_int32_ne bstr i
|
|
|
|
|
|
|
|
let get_int64_le bstr i =
|
|
|
|
if Sys.big_endian then swap64 (get_int64_ne bstr i) else get_int64_ne bstr i
|
|
|
|
|
|
|
|
let get_int64_be bstr i =
|
|
|
|
if not Sys.big_endian then swap64 (get_int64_ne bstr i)
|
|
|
|
else get_int64_ne bstr i
|
|
|
|
|
|
|
|
let _unsafe_set_uint16_le bstr i x =
|
|
|
|
(* TODO(dinosaure): for unicode. *)
|
|
|
|
if Sys.big_endian then unsafe_set_uint16_ne bstr i (swap16 x)
|
|
|
|
else unsafe_set_uint16_ne bstr i x
|
|
|
|
|
|
|
|
let _unsafe_set_uint16_be bstr i x =
|
|
|
|
(* TODO(dinosaure): for unicode. *)
|
|
|
|
if Sys.big_endian then unsafe_set_uint16_ne bstr i x
|
|
|
|
else unsafe_set_uint16_ne bstr i (swap16 x)
|
|
|
|
|
|
|
|
let set_int16_le bstr i x =
|
|
|
|
if Sys.big_endian then set_int16_ne bstr i (swap16 x)
|
|
|
|
else set_int16_ne bstr i x
|
|
|
|
|
|
|
|
let set_int16_be bstr i x =
|
|
|
|
if not Sys.big_endian then set_int16_ne bstr i (swap16 x)
|
|
|
|
else set_int16_ne bstr i x
|
|
|
|
|
|
|
|
let set_int32_le bstr i x =
|
|
|
|
if Sys.big_endian then set_int32_ne bstr i (swap32 x)
|
|
|
|
else set_int32_ne bstr i x
|
|
|
|
|
|
|
|
let set_int32_be bstr i x =
|
|
|
|
if not Sys.big_endian then set_int32_ne bstr i (swap32 x)
|
|
|
|
else set_int32_ne bstr i x
|
|
|
|
|
|
|
|
let set_int64_le bstr i x =
|
|
|
|
if Sys.big_endian then set_int64_ne bstr i (swap64 x)
|
|
|
|
else set_int64_ne bstr i x
|
|
|
|
|
|
|
|
let set_int64_be bstr i x =
|
|
|
|
if not Sys.big_endian then set_int64_ne bstr i (swap64 x)
|
|
|
|
else set_int64_ne bstr i x
|
|
|
|
|
|
|
|
let set_int8 = set_uint8
|
|
|
|
let set_uint16_ne = set_int16_ne
|
|
|
|
let set_uint16_be = set_int16_be
|
|
|
|
let set_uint16_le = set_int16_le
|
|
|
|
|
2025-02-11 18:29:15 +00:00
|
|
|
external unsafe_sub : t -> (int[@untagged]) -> (int[@untagged]) -> t
|
|
|
|
= "bstr_bytecode_unsafe_sub" "bstr_native_unsafe_sub"
|
|
|
|
|
|
|
|
let sub bstr ~off ~len =
|
|
|
|
if off < 0 || len < 0 || off > length bstr - len then invalid_arg "Bstr.sub";
|
|
|
|
unsafe_sub bstr off len
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let unsafe_blit src ~src_off dst ~dst_off ~len =
|
2025-02-11 18:29:15 +00:00
|
|
|
unsafe_memmove src src_off dst dst_off len
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let blit src ~src_off dst ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > length src - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > length dst - len
|
|
|
|
then invalid_arg "Bstr.blit";
|
|
|
|
unsafe_blit src ~src_off dst ~dst_off ~len
|
|
|
|
|
|
|
|
let unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len =
|
|
|
|
let len0 = len land 3 in
|
|
|
|
let len1 = len lsr 2 in
|
|
|
|
for i = 0 to len1 - 1 do
|
|
|
|
let i = i * 4 in
|
|
|
|
let v = unsafe_get_int32_ne bstr (src_off + i) in
|
|
|
|
Bytes.unsafe_set_int32_ne dst (dst_off + i) v
|
|
|
|
done;
|
|
|
|
for i = 0 to len0 - 1 do
|
|
|
|
let i = (len1 * 4) + i in
|
|
|
|
let v = unsafe_get_uint8 bstr (src_off + i) in
|
|
|
|
Bytes.unsafe_set_uint8 dst (dst_off + i) v
|
|
|
|
done
|
|
|
|
|
|
|
|
let unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len =
|
|
|
|
let len0 = len land 3 in
|
|
|
|
let len1 = len lsr 2 in
|
|
|
|
for i = 0 to len1 - 1 do
|
|
|
|
let i = i * 4 in
|
|
|
|
let v = Bytes.unsafe_get_int32_ne src (src_off + i) in
|
|
|
|
unsafe_set_int32_ne bstr (dst_off + i) v
|
|
|
|
done;
|
|
|
|
for i = 0 to len0 - 1 do
|
|
|
|
let i = (len1 * 4) + i in
|
|
|
|
let v = Bytes.unsafe_get_uint8 src (src_off + i) in
|
|
|
|
unsafe_set_uint8 bstr (dst_off + i) v
|
|
|
|
done
|
|
|
|
|
|
|
|
let blit_from_bytes src ~src_off bstr ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > Bytes.length src - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > length bstr - len
|
|
|
|
then invalid_arg "Bstr.blit_from_bytes";
|
|
|
|
unsafe_blit_from_bytes src ~src_off bstr ~dst_off ~len
|
|
|
|
|
|
|
|
let blit_from_string src ~src_off bstr ~dst_off ~len =
|
|
|
|
blit_from_bytes (Bytes.unsafe_of_string src) ~src_off bstr ~dst_off ~len
|
|
|
|
|
|
|
|
let blit_to_bytes bstr ~src_off dst ~dst_off ~len =
|
|
|
|
if
|
|
|
|
len < 0
|
|
|
|
|| src_off < 0
|
|
|
|
|| src_off > length bstr - len
|
|
|
|
|| dst_off < 0
|
|
|
|
|| dst_off > Bytes.length dst - len
|
|
|
|
then invalid_arg "Bstr.blit_to_bytes";
|
|
|
|
unsafe_blit_to_bytes bstr ~src_off dst ~dst_off ~len
|
|
|
|
|
|
|
|
let of_string str =
|
|
|
|
let len = String.length str in
|
|
|
|
let bstr = create len in
|
|
|
|
unsafe_blit_from_bytes
|
|
|
|
(Bytes.unsafe_of_string str)
|
|
|
|
~src_off:0 bstr ~dst_off:0 ~len;
|
|
|
|
bstr
|
|
|
|
|
2025-02-11 18:29:15 +00:00
|
|
|
let string ?(off = 0) ?len str =
|
|
|
|
let len =
|
|
|
|
match len with Some len -> len | None -> String.length str - off
|
|
|
|
in
|
|
|
|
if off < 0 || len < 0 || off > String.length str - len then
|
|
|
|
invalid_arg "Bstr.string";
|
|
|
|
let bstr = create len in
|
|
|
|
unsafe_blit_from_bytes
|
|
|
|
(Bytes.unsafe_of_string str)
|
|
|
|
~src_off:off bstr ~dst_off:0 ~len;
|
|
|
|
bstr
|
|
|
|
|
2025-01-15 15:43:37 +00:00
|
|
|
let unsafe_sub_string bstr src_off len =
|
|
|
|
let buf = Bytes.create len in
|
|
|
|
unsafe_blit_to_bytes bstr ~src_off buf ~dst_off:0 ~len;
|
|
|
|
Bytes.unsafe_to_string buf
|
|
|
|
|
|
|
|
let sub_string bstr ~off ~len =
|
|
|
|
if len < 0 || off < 0 || off > length bstr - len then
|
|
|
|
invalid_arg "Bstr.sub_string";
|
|
|
|
unsafe_sub_string bstr off len
|
|
|
|
|
|
|
|
let to_string bstr = unsafe_sub_string bstr 0 (length bstr)
|
|
|
|
let is_empty bstr = length bstr == 0
|
|
|
|
|
|
|
|
let is_prefix ~affix bstr =
|
|
|
|
let len_affix = String.length affix in
|
|
|
|
let len_bstr = length bstr in
|
|
|
|
if len_affix > len_bstr then false
|
|
|
|
else
|
|
|
|
let max_idx_affix = len_affix - 1 in
|
|
|
|
let rec go idx =
|
|
|
|
if idx > max_idx_affix then true
|
2025-02-11 18:29:15 +00:00
|
|
|
else if String.unsafe_get affix idx != unsafe_get bstr idx then false
|
|
|
|
else go (idx + 1)
|
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let starts_with ~prefix bstr =
|
|
|
|
let len_prefix = length prefix in
|
|
|
|
let len_bstr = length bstr in
|
|
|
|
if len_prefix > len_bstr then false
|
|
|
|
else
|
|
|
|
let max_idx_prefix = len_prefix - 1 in
|
|
|
|
let rec go idx =
|
|
|
|
if idx > max_idx_prefix then true
|
|
|
|
else if unsafe_get prefix idx != unsafe_get bstr idx then false
|
|
|
|
else go (idx + 1)
|
2025-01-15 15:43:37 +00:00
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let is_infix ~affix bstr =
|
|
|
|
let len_affix = String.length affix in
|
|
|
|
let len_bstr = length bstr in
|
|
|
|
if len_affix > len_bstr then false
|
|
|
|
else
|
|
|
|
let max_idx_affix = len_affix - 1 in
|
|
|
|
let max_idx_bstr = len_bstr - len_affix in
|
|
|
|
let rec go idx k =
|
|
|
|
if idx > max_idx_bstr then false
|
|
|
|
else if k > max_idx_affix then true
|
|
|
|
else if k > 0 then
|
|
|
|
if affix.[k] == bstr.{idx + k} then go idx (succ k) else go (succ idx) 0
|
|
|
|
else if affix.[0] = bstr.{idx} then go idx 1
|
2025-02-11 18:29:15 +00:00
|
|
|
else go (idx + 1) 0
|
2025-01-15 15:43:37 +00:00
|
|
|
in
|
|
|
|
go 0 0
|
|
|
|
|
|
|
|
let is_suffix ~affix bstr =
|
|
|
|
let max_idx_affix = String.length affix - 1 in
|
|
|
|
let max_idx_bstr = length bstr - 1 in
|
|
|
|
if max_idx_affix > max_idx_bstr then false
|
|
|
|
else
|
|
|
|
let rec go idx =
|
|
|
|
if idx > max_idx_affix then true
|
|
|
|
else if affix.[max_idx_affix - idx] != bstr.{max_idx_bstr - idx} then
|
|
|
|
false
|
2025-02-11 18:29:15 +00:00
|
|
|
else go (idx + 1)
|
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let ends_with ~suffix bstr =
|
|
|
|
let max_idx_suffix = length suffix - 1 in
|
|
|
|
let max_idx_bstr = length bstr - 1 in
|
|
|
|
if max_idx_suffix > max_idx_bstr then false
|
|
|
|
else
|
|
|
|
let rec go idx =
|
|
|
|
if idx > max_idx_suffix then true
|
|
|
|
else if
|
|
|
|
unsafe_get suffix (max_idx_suffix - idx)
|
|
|
|
!= unsafe_get bstr (max_idx_bstr - idx)
|
|
|
|
then false
|
|
|
|
else go (idx + 1)
|
2025-01-15 15:43:37 +00:00
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
exception Break
|
|
|
|
|
|
|
|
let for_all sat bstr =
|
|
|
|
try
|
|
|
|
for idx = 0 to length bstr - 1 do
|
|
|
|
if sat (unsafe_get bstr idx) == false then raise_notrace Break
|
|
|
|
done;
|
|
|
|
true
|
|
|
|
with Break -> false
|
|
|
|
|
|
|
|
let exists sat bstr =
|
|
|
|
try
|
|
|
|
for idx = 0 to length bstr - 1 do
|
|
|
|
if sat (unsafe_get bstr idx) then raise_notrace Break
|
|
|
|
done;
|
|
|
|
false
|
|
|
|
with Break -> true
|
|
|
|
|
|
|
|
let equal a b =
|
|
|
|
if length a == length b then
|
|
|
|
try
|
|
|
|
let len = length a in
|
|
|
|
let len0 = len land 3 in
|
|
|
|
let len1 = len lsr 2 in
|
|
|
|
for i = 0 to len1 - 1 do
|
|
|
|
let i = i * 4 in
|
|
|
|
if unsafe_get_int32_ne a i <> unsafe_get_int32_ne b i then
|
|
|
|
raise_notrace Break
|
|
|
|
done;
|
|
|
|
for i = 0 to len0 - 1 do
|
|
|
|
let i = (len1 * 4) + i in
|
|
|
|
if unsafe_get_uint8 a i != unsafe_get_uint8 b i then raise_notrace Break
|
|
|
|
done;
|
|
|
|
true
|
|
|
|
with Break -> false
|
|
|
|
else false
|
|
|
|
|
|
|
|
let with_range ?(first = 0) ?(len = max_int) bstr =
|
|
|
|
if len < 0 then invalid_arg "Bstr.with_range";
|
|
|
|
if len == 0 then empty
|
|
|
|
else
|
|
|
|
let bstr_len = length bstr in
|
|
|
|
let max_idx = bstr_len - 1 in
|
|
|
|
let last =
|
|
|
|
match len with
|
|
|
|
| len when len = max_int -> max_idx
|
|
|
|
| len ->
|
|
|
|
let last = first + len - 1 in
|
|
|
|
if last > max_idx then max_idx else last
|
|
|
|
in
|
|
|
|
let first = if first < 0 then 0 else first in
|
|
|
|
if first = 0 && last = max_idx then bstr
|
|
|
|
else sub bstr ~off:first ~len:(last + 1 - first)
|
|
|
|
|
|
|
|
let with_index_range ?(first = 0) ?last bstr =
|
|
|
|
let bstr_len = length bstr in
|
|
|
|
let max_idx = bstr_len - 1 in
|
|
|
|
let last =
|
|
|
|
match last with
|
|
|
|
| None -> max_idx
|
|
|
|
| Some last -> if last > max_idx then max_idx else last
|
|
|
|
in
|
|
|
|
let first = if first < 0 then 0 else first in
|
|
|
|
if first > max_idx || last < 0 || first > last then empty
|
|
|
|
else if first == 0 && last = max_idx then bstr
|
|
|
|
else sub bstr ~off:first ~len:(last + 1 - first)
|
|
|
|
|
2025-02-11 18:29:15 +00:00
|
|
|
let is_white chr = chr == ' ' || chr == '\t'
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let trim ?(drop = is_white) bstr =
|
|
|
|
let len = length bstr in
|
|
|
|
if len == 0 then bstr
|
|
|
|
else
|
|
|
|
let max_idx = len - 1 in
|
|
|
|
let rec left_pos idx =
|
|
|
|
if idx > max_idx then len
|
|
|
|
else if drop bstr.{idx} then left_pos (succ idx)
|
|
|
|
else idx
|
|
|
|
in
|
|
|
|
let rec right_pos idx =
|
|
|
|
if idx < 0 then 0
|
|
|
|
else if drop bstr.{idx} then right_pos (pred idx)
|
|
|
|
else succ idx
|
|
|
|
in
|
|
|
|
let left = left_pos 0 in
|
|
|
|
if left = len then empty
|
|
|
|
else
|
|
|
|
let right = right_pos max_idx in
|
|
|
|
if left == 0 && right == len then bstr
|
|
|
|
else sub bstr ~off:left ~len:(right - left)
|
|
|
|
|
|
|
|
let fspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
|
|
|
|
if min < 0 then invalid_arg "Bstr.fspan";
|
|
|
|
if max < 0 then invalid_arg "Bstr.fspan";
|
|
|
|
if min > max || max == 0 then (empty, bstr)
|
|
|
|
else
|
|
|
|
let len = length bstr in
|
|
|
|
let max_idx = len - 1 in
|
|
|
|
let max_idx =
|
|
|
|
let k = max - 1 in
|
|
|
|
if k > max_idx then max_idx else k
|
|
|
|
in
|
|
|
|
let need_idx = min in
|
|
|
|
let rec go idx =
|
|
|
|
if idx <= max_idx && sat bstr.{idx} then go (succ idx)
|
|
|
|
else if idx < need_idx || idx == 0 then (empty, bstr)
|
|
|
|
else if idx == len then (bstr, empty)
|
|
|
|
else (sub bstr ~off:0 ~len:idx, sub bstr ~off:idx ~len:(len - idx))
|
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let rspan ?(min = 0) ?(max = max_int) ?(sat = Fun.const true) bstr =
|
|
|
|
if min < 0 then invalid_arg "Bstr.rspan";
|
|
|
|
if max < 0 then invalid_arg "Bstr.rspan";
|
|
|
|
if min > max || max == 0 then (bstr, empty)
|
|
|
|
else
|
|
|
|
let len = length bstr in
|
|
|
|
let max_idx = len - 1 in
|
|
|
|
let min_idx =
|
|
|
|
let k = len - max in
|
|
|
|
if k < 0 then 0 else k
|
|
|
|
in
|
2025-02-11 18:29:15 +00:00
|
|
|
let need_idx = len - min - 1 in
|
2025-01-15 15:43:37 +00:00
|
|
|
let rec go idx =
|
|
|
|
if idx >= min_idx && sat bstr.{idx} then go (pred idx)
|
|
|
|
else if idx > need_idx || idx == max_idx then (bstr, empty)
|
2025-02-11 18:29:15 +00:00
|
|
|
else if idx < 0 then (empty, bstr)
|
2025-01-15 15:43:37 +00:00
|
|
|
else
|
|
|
|
let cut = idx + 1 in
|
|
|
|
(sub bstr ~off:0 ~len:cut, sub bstr ~off:cut ~len:(len - cut))
|
|
|
|
in
|
2025-02-11 18:29:15 +00:00
|
|
|
go max_idx
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let span ?(rev = false) ?min ?max ?sat bstr =
|
|
|
|
match rev with
|
|
|
|
| true -> rspan ?min ?max ?sat bstr
|
|
|
|
| false -> fspan ?min ?max ?sat bstr
|
|
|
|
|
|
|
|
let take ?(rev = false) ?min ?max ?sat bstr =
|
2025-02-11 18:29:15 +00:00
|
|
|
let a, b = span ~rev ?min ?max ?sat bstr in
|
|
|
|
if rev then b else a
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let drop ?(rev = false) ?min ?max ?sat bstr =
|
2025-02-11 18:29:15 +00:00
|
|
|
let a, b = span ~rev ?min ?max ?sat bstr in
|
|
|
|
if rev then a else b
|
2025-01-15 15:43:37 +00:00
|
|
|
|
|
|
|
let shift bstr off =
|
|
|
|
if off > length bstr then invalid_arg "Bstr.shift";
|
|
|
|
let len = length bstr - off in
|
|
|
|
Bigarray.Array1.sub bstr off len
|
|
|
|
|
|
|
|
external ptr : t -> (nativeint[@unboxed])
|
|
|
|
= "bstr_bytecode_ptr" "bstr_native_ptr"
|
|
|
|
[@@noalloc]
|
|
|
|
|
|
|
|
let overlap a b =
|
|
|
|
let src_a = ptr a in
|
|
|
|
let src_b = ptr b in
|
|
|
|
let len_a = Nativeint.of_int (length a) in
|
|
|
|
let len_b = Nativeint.of_int (length b) in
|
|
|
|
let len =
|
|
|
|
let ( + ) = Nativeint.add in
|
|
|
|
let ( - ) = Nativeint.sub in
|
|
|
|
Nativeint.max 0n (Nativeint.min (src_a + len_a) (src_b + len_b))
|
|
|
|
- Nativeint.max src_a src_b
|
|
|
|
in
|
|
|
|
let len = Nativeint.to_int len in
|
|
|
|
if src_a >= src_b && src_a < Nativeint.add src_b len_b then
|
|
|
|
let offset = Nativeint.(to_int (sub src_a src_b)) in
|
|
|
|
Some (len, 0, offset)
|
|
|
|
else if src_b >= src_a && src_b < Nativeint.add src_a len_a then
|
|
|
|
let offset = Nativeint.(to_int (sub src_b src_a)) in
|
|
|
|
Some (len, offset, 0)
|
|
|
|
else None
|
|
|
|
|
|
|
|
let split_on_char sep bstr =
|
|
|
|
let lst = ref [] in
|
|
|
|
let max = ref (length bstr) in
|
|
|
|
for idx = length bstr - 1 downto 0 do
|
|
|
|
if unsafe_get bstr idx == sep then begin
|
|
|
|
lst := sub bstr ~off:(idx + 1) ~len:(!max - idx - 1) :: !lst;
|
|
|
|
max := idx
|
|
|
|
end
|
|
|
|
done;
|
|
|
|
sub bstr ~off:0 ~len:!max :: !lst
|
|
|
|
|
|
|
|
let to_seq bstr =
|
|
|
|
let rec go idx () =
|
|
|
|
if idx == length bstr then Seq.Nil
|
|
|
|
else
|
|
|
|
let chr = unsafe_get bstr idx in
|
|
|
|
Seq.Cons (chr, go (idx + 1))
|
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let to_seqi bstr =
|
|
|
|
let rec go idx () =
|
|
|
|
if idx == length bstr then Seq.Nil
|
|
|
|
else
|
|
|
|
let chr = unsafe_get bstr idx in
|
|
|
|
Seq.Cons ((idx, chr), go (idx + 1))
|
|
|
|
in
|
|
|
|
go 0
|
|
|
|
|
|
|
|
let of_seq seq =
|
|
|
|
let n = ref 0 in
|
|
|
|
let buf = ref (make 0x7ff '\000') in
|
|
|
|
let resize () =
|
|
|
|
let new_len = Int.min (2 * length !buf) Sys.max_string_length in
|
|
|
|
(* TODO(dinosaure): should we keep this limit? *)
|
|
|
|
if length !buf == new_len then failwith "Bstr.of_seq: cannot grow bigstring";
|
|
|
|
let new_buf = make new_len '\000' in
|
|
|
|
Bigarray.Array1.blit !buf (sub new_buf ~off:0 ~len:(length !buf));
|
|
|
|
buf := new_buf
|
|
|
|
in
|
|
|
|
let fn chr =
|
|
|
|
if !n == length !buf then resize ();
|
|
|
|
unsafe_set !buf !n chr;
|
|
|
|
incr n
|
|
|
|
in
|
|
|
|
Seq.iter fn seq; sub !buf ~off:0 ~len:!n
|