(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Basic String Utils} *) (*$inject open CCShims_.Stdlib *) open CCShims_ type 'a iter = ('a -> unit) -> unit type 'a gen = unit -> 'a option (* standard implementations *) include String let compare_int (a : int) b = Stdlib.compare a b let compare = String.compare let hash s = Hashtbl.hash s let length = String.length let is_empty s = equal s "" let rev s = let n = length s in init n (fun i -> s.[n-i-1]) (*$Q Q.printable_string (fun s -> s = rev (rev s)) Q.printable_string (fun s -> length s = length (rev s)) *) (*$Q Q.printable_string (fun s -> \ rev s = (to_list s |> List.rev |> of_list)) *) (*$= "abc" (rev "cba") "" (rev "") " " (rev " ") *) let rec _to_list s acc i len = if len=0 then List.rev acc else _to_list s (s.[i]::acc) (i+1) (len-1) let _is_sub ~sub i s j ~len = let rec check k = if k = len then true else CCChar.equal sub.[i+k] s.[j+k] && check (k+1) in j+len <= String.length s && check 0 let is_sub ~sub i s j ~len = if i+len > String.length sub then invalid_arg "CCString.is_sub"; _is_sub ~sub i s j ~len type _ direction = | Direct : [`Direct] direction | Reverse : [`Reverse] direction (* we follow https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm *) module Find = struct type 'a kmp_pattern = { failure : int array; str : string; } (* invariant: [length failure = length str]. We use a phantom type to avoid mixing the directions. *) let kmp_pattern_length p = String.length p.str (* access the [i]-th element of [s] according to direction [dir] *) let get_ : type a. dir:a direction -> string -> int -> char = fun ~dir -> match dir with | Direct -> String.get | Reverse -> (fun s i -> s.[String.length s - i - 1]) let kmp_compile_ : type a. dir:a direction -> string -> a kmp_pattern = fun ~dir str -> let len = length str in let get = get_ ~dir in (* how to read elements of the string *) match len with | 0 -> {failure=[| |]; str;} | 1 -> {failure=[| -1 |]; str;} | _ -> (* at least 2 elements, the algorithm can work *) let failure = Array.make len 0 in failure.(0) <- -1; (* i: current index in str *) let i = ref 2 in (* j: index of candidate substring *) let j = ref 0 in while !i < len do match !j with | _ when CCChar.equal (get str (!i-1)) (get str !j) -> (* substring starting at !j continues matching current char *) incr j; failure.(!i) <- !j; incr i; | 0 -> (* back to the beginning *) failure.(!i) <- 0; incr i; | _ -> (* fallback for the prefix string *) assert (!j > 0); j := failure.(!j) done; (* Format.printf "{@[failure:%a, str:%s@]}@." CCFormat.(array int) failure str; *) { failure; str; } let kmp_compile s = kmp_compile_ ~dir:Direct s let kmp_rcompile s = kmp_compile_ ~dir:Reverse s (* proper search function. [i] index in [s] [j] index in [pattern] [len] length of [s] *) let kmp_find ~pattern s idx = let len = length s in let i = ref idx in let j = ref 0 in let pat_len = kmp_pattern_length pattern in while !j < pat_len && !i + !j < len do let c = String.get s (!i + !j) in let expected = String.get pattern.str !j in if CCChar.equal c expected then ( (* char matches *) incr j; ) else ( let fail_offset = pattern.failure.(!j) in if fail_offset >= 0 then ( assert (fail_offset < !j); (* follow the failure link *) i := !i + !j - fail_offset; j := fail_offset ) else ( (* beginning of pattern *) j := 0; incr i ) ) done; if !j = pat_len then !i else -1 (* proper search function, from the right. [i] index in [s] [j] index in [pattern] [len] length of [s] *) let kmp_rfind ~pattern s idx = let len = length s in let i = ref (len - idx - 1) in let j = ref 0 in let pat_len = kmp_pattern_length pattern in while !j < pat_len && !i + !j < len do let c = String.get s (len - !i - !j - 1) in let expected = String.get pattern.str (String.length pattern.str - !j - 1) in if CCChar.equal c expected then ( (* char matches *) incr j; ) else ( let fail_offset = pattern.failure.(!j) in if fail_offset >= 0 then ( assert (fail_offset < !j); (* follow the failure link *) i := !i + !j - fail_offset; j := fail_offset ) else ( (* beginning of pattern *) j := 0; incr i ) ) done; (* adjust result: first, [res = string.length s - res -1] to convert back to real indices; then, what we got is actually the position of the end of the pattern, so we subtract the [length of the pattern -1] to obtain the real result. *) if !j = pat_len then len - !i - kmp_pattern_length pattern else -1 type 'a pattern = | P_char of char | P_KMP of 'a kmp_pattern let pattern_length = function | P_char _ -> 1 | P_KMP p -> kmp_pattern_length p let compile sub : [`Direct] pattern = if length sub=1 then P_char sub.[0] else P_KMP (kmp_compile sub) let rcompile sub : [`Reverse] pattern = if length sub=1 then P_char sub.[0] else P_KMP (kmp_rcompile sub) let find ?(start=0) ~(pattern:[`Direct] pattern) s = match pattern with | P_char c -> (try String.index_from s start c with Not_found -> -1) | P_KMP pattern -> kmp_find ~pattern s start let rfind ?start ~(pattern:[`Reverse] pattern) s = let start = match start with | Some n -> n | None -> String.length s - 1 in match pattern with | P_char c -> (try String.rindex_from s start c with Not_found -> -1) | P_KMP pattern -> kmp_rfind ~pattern s start end let find ?(start=0) ~sub = let pattern = Find.compile sub in fun s -> Find.find ~start ~pattern s (*$= & ~printer:string_of_int 1 (find ~sub:"bc" "abcd") ~-1 (find ~sub:"bc" "abd") 1 (find ~sub:"a" "_a_a_a_") 6 (find ~start:5 ~sub:"a" "a1a234a") *) (*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = find ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) *) let find_all ?(start=0) ~sub = let pattern = Find.compile sub in fun s -> let i = ref start in fun () -> let res = Find.find ~start:!i ~pattern s in if res = ~-1 then None else ( i := res + 1; (* possible overlap *) Some res ) let find_all_l ?start ~sub s = let rec aux acc g = match g () with | None -> List.rev acc | Some i -> aux (i::acc) g in aux [] (find_all ?start ~sub s) (*$= & ~printer:Q.Print.(list int) [1; 6] (find_all_l ~sub:"bc" "abc aabc aab") [] (find_all_l ~sub:"bc" "abd") [76] (find_all_l ~sub:"aaaaaa" \ "aabbaabbaaaaabbbbabababababbbbabbbabbaaababbbaaabaabbaabbaaaabbababaaaabbaabaaaaaabbbaaaabababaabaaabbaabaaaabbababbaabbaaabaabbabababbbaabababaaabaaababbbaaaabbbaabaaababbabaababbaabbaaaaabababbabaababbbaaabbabbabababaaaabaaababaaaaabbabbaabbabbbbbbbbbbbbbbaabbabbbbbabbaaabbabbbbabaaaaabbababbbaaaa") *) let mem ?start ~sub s = find ?start ~sub s >= 0 (*$T mem ~sub:"bc" "abcd" not (mem ~sub:"a b" "abcd") *) let rfind ~sub = let pattern = Find.rcompile sub in fun s -> Find.rfind ~start:(String.length s-1) ~pattern s (*$= & ~printer:string_of_int 1 (rfind ~sub:"bc" "abcd") ~-1 (rfind ~sub:"bc" "abd") 5 (rfind ~sub:"a" "_a_a_a_") 4 (rfind ~sub:"bc" "abcdbcd") 6 (rfind ~sub:"a" "a1a234a") *) (*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = rfind ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) *) (* Replace substring [s.[pos] … s.[pos+len-1]] by [by] in [s] *) let replace_at_ ~pos ~len ~by s = let b = Buffer.create (length s + length by - len) in Buffer.add_substring b s 0 pos; Buffer.add_string b by; Buffer.add_substring b s (pos+len) (String.length s - pos - len); Buffer.contents b let replace ?(which=`All) ~sub ~by s = if is_empty sub then invalid_arg "CCString.replace"; match which with | `Left -> let i = find ~start:0 ~sub s in if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s | `Right -> let i = rfind ~sub s in if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s | `All -> (* compile search pattern only once *) let pattern = Find.compile sub in let b = Buffer.create (String.length s) in let start = ref 0 in while !start < String.length s do let i = Find.find ~start:!start ~pattern s in if i>=0 then ( (* between last and cur occurrences *) Buffer.add_substring b s !start (i- !start); Buffer.add_string b by; start := i + String.length sub ) else ( (* add remainder *) Buffer.add_substring b s !start (String.length s - !start); start := String.length s (* stop *) ) done; Buffer.contents b (*$= & ~printer:CCFun.id (replace ~which:`All ~sub:"a" ~by:"b" "abcdabcd") "bbcdbbcd" (replace ~which:`Left ~sub:"a" ~by:"b" "abcdabcd") "bbcdabcd" (replace ~which:`Right ~sub:"a" ~by:"b" "abcdabcd") "abcdbbcd" (replace ~which:`All ~sub:"ab" ~by:"hello" " abab cdabb a") \ " hellohello cdhellob a" (replace ~which:`Left ~sub:"ab" ~by:"nope" " a b c d ") " a b c d " (replace ~sub:"a" ~by:"b" "1aa234a") "1bb234b" *) module Split = struct type drop_if_empty = { first: bool; last: bool; } let no_drop = {first=false; last=false} let default_drop = no_drop type split_state = | SplitStop | SplitAt of int (* previous *) let rec _split ~by s state = match state with | SplitStop -> None | SplitAt prev -> _split_search ~by s prev and _split_search ~by s prev = let j = Find.find ~start:prev ~pattern:by s in if j < 0 then Some (SplitStop, prev, String.length s - prev) else Some (SplitAt (j+Find.pattern_length by), prev, j-prev) let _tuple3 x y z = x,y,z let _mkgen ~drop ~by s k = let state = ref (SplitAt 0) in let by = Find.compile by in let rec next() = match _split ~by s !state with | None -> None | Some (state', 0, 0) when drop.first -> state := state'; next() | Some (_, i, 0) when drop.last && i = length s -> None | Some (state', i, len) -> state := state'; Some (k s i len) in next let gen ?(drop=default_drop) ~by s = _mkgen ~drop ~by s _tuple3 let gen_cpy ?(drop=default_drop) ~by s = _mkgen ~drop ~by s String.sub let _mklist ~drop ~by s k = let by = Find.compile by in let rec build acc state = match _split ~by s state with | None -> List.rev acc | Some (state',0,0) when drop.first -> build acc state' | Some (_, i, 0) when drop.last && i=length s -> List.rev acc | Some (state', i, len) -> build (k s i len ::acc) state' in build [] (SplitAt 0) let list_ ?(drop=default_drop) ~by s = _mklist ~drop ~by s _tuple3 let list_cpy ?(drop=default_drop) ~by s = _mklist ~drop ~by s String.sub (*$T Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""] Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"] *) let _mkseq ~drop ~by s k = let by = Find.compile by in let rec make state () = match _split ~by s state with | None -> Seq.Nil | Some (state', 0, 0) when drop.first -> make state' () | Some (_, i, 0) when drop.last && i=length s -> Seq.Nil | Some (state', i, len) -> Seq.Cons (k s i len , make state') in make (SplitAt 0) let seq ?(drop=default_drop) ~by s = _mkseq ~drop ~by s _tuple3 let seq_cpy ?(drop=default_drop) ~by s = _mkseq ~drop ~by s String.sub let _mk_iter ~drop ~by s f k = let by = Find.compile by in let rec aux state = match _split ~by s state with | None -> () | Some (state', 0, 0) when drop.first -> aux state' | Some (_, i, 0) when drop.last && i=length s -> () | Some (state', i, len) -> k (f s i len); aux state' in aux (SplitAt 0) let iter ?(drop=default_drop) ~by s = _mk_iter ~drop ~by s _tuple3 let iter_cpy ?(drop=default_drop) ~by s = _mk_iter ~drop ~by s String.sub let left_exn ~by s = let i = find ~sub:by s in if i = ~-1 then raise Not_found else let right = i + String.length by in String.sub s 0 i, String.sub s right (String.length s - right) let left ~by s = try Some (left_exn ~by s) with Not_found -> None (*$T Split.left ~by:" " "ab cde f g " = Some ("ab", "cde f g ") Split.left ~by:"__" "a__c__e_f" = Some ("a", "c__e_f") Split.left ~by:"_" "abcde" = None Split.left ~by:"bb" "abbc" = Some ("a", "c") Split.left ~by:"a_" "abcde" = None *) let right_exn ~by s = let i = rfind ~sub:by s in if i = ~-1 then raise Not_found else let right = i + String.length by in String.sub s 0 i, String.sub s right (String.length s - right) let right ~by s = try Some (right_exn ~by s) with Not_found -> None (*$T Split.right ~by:" " "ab cde f g" = Some ("ab cde f", "g") Split.right ~by:"__" "a__c__e_f" = Some ("a__c", "e_f") Split.right ~by:"_" "abcde" = None Split.right ~by:"a_" "abcde" = None *) end [@@@ifge 4.04] [@@@else_] let split_on_char c s: _ list = Split.list_cpy ~drop:Split.no_drop ~by:(String.make 1 c) s [@@@endif] (*$= & ~printer:Q.Print.(list string) ["a"; "few"; "words"; "from"; "our"; "sponsors"] \ (split_on_char ' ' "a few words from our sponsors") *) (*$Q Q.(printable_string) (fun s -> \ let s = split_on_char ' ' s |> String.concat " " in \ s = (split_on_char ' ' s |> String.concat " ")) *) let split ~by s = Split.list_cpy ~by s let compare_versions a b = let of_int s = try Some (int_of_string s) with Failure _ -> None in let rec cmp_rec a b = match a(), b() with | None, None -> 0 | Some _, None -> 1 | None, Some _ -> -1 | Some x, Some y -> match of_int x, of_int y with | None, None -> let c = String.compare x y in if c<>0 then c else cmp_rec a b | Some _, None -> 1 | None, Some _ -> -1 | Some x, Some y -> let c = compare_int x y in if c<>0 then c else cmp_rec a b in cmp_rec (Split.gen_cpy ~by:"." a) (Split.gen_cpy ~by:"." b) (*$T compare_versions "0.1.3" "0.1" > 0 compare_versions "10.1" "2.0" > 0 compare_versions "0.1.alpha" "0.1" > 0 compare_versions "0.3.dev" "0.4" < 0 compare_versions "0.foo" "0.0" < 0 compare_versions "1.2.3.4" "01.2.4.3" < 0 *) (*$Q Q.(pair printable_string printable_string) (fun (a,b) -> \ CCOrd.equiv (compare_versions a b) (CCOrd.opp compare_versions b a)) *) type nat_chunk = | NC_char of char | NC_int of int let compare_natural a b = (* stream of chunks *) let chunks s : unit -> nat_chunk option = let i = ref 0 in let rec next () = if !i = length s then None else match String.get s !i with | '0'..'9' as c -> incr i; read_int (Char.code c - Char.code '0') | c -> incr i; Some (NC_char c) and read_int n = if !i = length s then Some (NC_int n) else match String.get s !i with | '0'..'9' as c -> incr i; read_int (10 * n + Char.code c - Char.code '0') | _ -> Some (NC_int n) in next in let rec cmp_rec a b = match a(), b() with | None, None -> 0 | Some _, None -> 1 | None, Some _ -> -1 | Some x, Some y -> match x, y with | NC_char x, NC_char y -> let c = Char.compare x y in if c<>0 then c else cmp_rec a b | NC_int _, NC_char _ -> 1 | NC_char _, NC_int _ -> -1 | NC_int x, NC_int y -> let c = compare_int x y in if c<>0 then c else cmp_rec a b in cmp_rec (chunks a) (chunks b) (*$T compare_natural "foo1" "foo2" < 0 compare_natural "foo11" "foo2" > 0 compare_natural "foo11" "foo11" = 0 compare_natural "foo011" "foo11" = 0 compare_natural "foo1a" "foo1b" < 0 compare_natural "foo1a1" "foo1a2" < 0 compare_natural "foo1a17" "foo1a2" > 0 *) (*Q (Q.pair printable_string printable_string) (fun (a,b) -> \ CCOrd.opp (compare_natural a b) = compare_natural b a) (Q.printable_string) (fun a -> compare_natural a a = 0) (Q.triple printable_string printable_string printable_string) (fun (a,b,c) -> \ if compare_natural a b < 0 && compare_natural b c < 0 \ then compare_natural a c < 0 else Q.assume_fail()) *) let edit_distance ?(cutoff=max_int) s1 s2 = let n1 = length s1 in let n2 = length s2 in if n1 = 0 then min cutoff n2 else if n2 = 0 then min cutoff n1 else if equal s1 s2 then 0 else if n1-n2 >= cutoff || n2-n1 >= cutoff then cutoff (* at least cutoff inserts *) else try (* distance vectors (v0=previous, v1=current) *) let v0 = Array.make (length s2 + 1) 0 in let v1 = Array.make (length s2 + 1) 0 in (* initialize v0: v0(i) = A(0)(i) = delete i chars from t *) let lower_bound = ref max_int in for i = 0 to length s2 do v0.(i) <- i done; (* main loop for the bottom up dynamic algorithm *) for i = 0 to length s1 - 1 do (* first edit distance is the deletion of i+1 elements from s *) v1.(0) <- i+1; (* try add/delete/replace operations *) for j = 0 to length s2 - 1 do let cost = if Char.equal (String.get s1 i) (String.get s2 j) then 0 else 1 in v1.(j+1) <- min (v1.(j) + 1) (min (v0.(j+1) + 1) (v0.(j) + cost)); done; if cutoff < Array.length v1 && i <= 2 * cutoff && 2 * cutoff - i < String.length s2 then ( lower_bound := min !lower_bound v1.(2 * cutoff - i); ); (* did we compute up to the diagonal 2*cutoff+1? *) if cutoff < Array.length v1 && i = cutoff * 2 && !lower_bound >= cutoff then ( raise_notrace Exit; ); (* copy v1 into v0 for next iteration *) Array.blit v1 0 v0 0 (length s2 + 1); done; v1.(length s2) with Exit -> cutoff (*$Q Q.(string_of_size Gen.(0 -- 30)) (fun s -> \ edit_distance s s = 0) Q.(let p = string_of_size Gen.(0 -- 20) in pair p p) (fun (s1,s2) -> \ edit_distance s1 s2 = edit_distance s2 s1) Q.(let p = string_of_size Gen.(0 -- 20) in pair p p) (fun (s1,s2) -> \ let e = edit_distance s1 s2 in \ let e' = edit_distance ~cutoff:3 s1 s2 in \ (if e' < 3 then e=e' else e >= 3) && \ (if e <= 3 then e=e' else true)) *) (*$= & ~printer:string_of_int 2 (edit_distance "hello" "helo!") 5 (edit_distance "abcde" "tuvwx") 2 (edit_distance ~cutoff:2 "abcde" "tuvwx") 1 (edit_distance ("a" ^ String.make 100 '_') ("b"^String.make 100 '_')) 1 (edit_distance ~cutoff:4 ("a" ^ String.make 1000 '_') ("b"^String.make 1000 '_')) 2 (edit_distance ~cutoff:3 ("a" ^ String.make 1000 '_' ^ "c")\ ("b" ^ String.make 1000 '_' ^ "d")) *) (* test that building a from s, and mutating one char of s, yields a string s' that is accepted by a. --> generate triples (s, i, c) where c is a char, s a non empty string and i a valid index in s. *) (*$QR ( let gen = Q.Gen.( 3 -- 10 >>= fun len -> 0 -- (len-1) >>= fun i -> string_size (return len) >>= fun s -> char >|= fun c -> (s,i,c) ) in let small (s,_,_) = String.length s in Q.make ~small gen ) (fun (s,i,c) -> let s' = Bytes.of_string s in Bytes.set s' i c; edit_distance s (Bytes.to_string s') <= 1) *) let repeat s n = assert (n>=0); let len = String.length s in assert(len > 0); init (len * n) (fun i -> s.[i mod len]) let prefix ~pre s = let len = String.length pre in if len > String.length s then false else ( let rec check i = if i=len then true else if Stdlib.(<>) (String.unsafe_get s i) (String.unsafe_get pre i) then false else check (i+1) in check 0 ) (*$T prefix ~pre:"aab" "aabcd" not (prefix ~pre:"ab" "aabcd") not (prefix ~pre:"abcd" "abc") prefix ~pre:"abc" "abcde" prefix ~pre:"" "" prefix ~pre:"" "abc" prefix ~pre:"abc" "abc" *) let suffix ~suf s = let len = String.length suf in if len > String.length s then false else ( let off = String.length s - len in let rec check i = if i=len then true else if Stdlib.(<>) (String.unsafe_get s (off+i)) (String.unsafe_get suf i) then false else check (i+1) in check 0 ) (*$T suffix ~suf:"cd" "abcd" suffix ~suf:"" "" suffix ~suf:"" "abc" not (suffix ~suf:"cd" "abcde") not (suffix ~suf:"abcd" "cd") *) let take n s = if n < String.length s then String.sub s 0 n else s let drop n s = if n < String.length s then String.sub s n (String.length s - n) else "" let take_drop n s = take n s, drop n s (*$= ("ab", "cd") (take_drop 2 "abcd") ("abc", "") (take_drop 3 "abc") ("abc", "") (take_drop 5 "abc") *) let chop_suffix ~suf s = if suffix ~suf s then Some (String.sub s 0 (String.length s-String.length suf)) else None (*$= & ~printer:Q.Print.(option string) (Some "ab") (chop_suffix ~suf:"cd" "abcd") None (chop_suffix ~suf:"cd" "abcde") None (chop_suffix ~suf:"abcd" "cd") *) let chop_prefix ~pre s = if prefix ~pre s then Some (String.sub s (String.length pre) (String.length s-String.length pre)) else None (*$= & ~printer:Q.Print.(option string) (Some "cd") (chop_prefix ~pre:"aab" "aabcd") None (chop_prefix ~pre:"ab" "aabcd") None (chop_prefix ~pre:"abcd" "abc") *) let blit = String.blit let fold f acc s = let rec fold_rec f acc s i = if i = String.length s then acc else fold_rec f (f acc s.[i]) s (i+1) in fold_rec f acc s 0 let foldi f acc s = let rec fold_rec f acc s i = if i = String.length s then acc else fold_rec f (f acc i s.[i]) s (i+1) in fold_rec f acc s 0 let pad ?(side=`Left) ?(c=' ') n s = let len_s = String.length s in if len_s >= n then s else let pad_len = n - len_s in match side with | `Left -> init n (fun i -> if i < pad_len then c else s.[i-pad_len]) | `Right -> init n (fun i -> if i < len_s then s.[i] else c) (*$= & ~printer:Q.Print.string " 42" (pad 4 "42") "0042" (pad ~c:'0' 4 "42") "4200" (pad ~side:`Right ~c:'0' 4 "42") "hello" (pad 4 "hello") "aaa" (pad ~c:'a' 3 "") "aaa" (pad ~side:`Right ~c:'a' 3 "") *) let _to_gen s i0 len = let i = ref i0 in fun () -> if !i = i0+len then None else ( let c = String.unsafe_get s !i in incr i; Some c ) let to_gen s = _to_gen s 0 (String.length s) let of_char c = String.make 1 c let of_gen g = let b = Buffer.create 32 in let rec aux () = match g () with | None -> Buffer.contents b | Some c -> Buffer.add_char b c; aux () in aux () let to_iter s k = String.iter k s let rec _to_seq s i len () = if len=0 then Seq.Nil else Seq.Cons (s.[i], _to_seq s (i+1)(len-1)) let to_seq s = _to_seq s 0 (String.length s) let of_iter i = let b = Buffer.create 32 in i (Buffer.add_char b); Buffer.contents b let of_seq seq = let b = Buffer.create 32 in Seq.iter (Buffer.add_char b) seq; Buffer.contents b let to_list s = _to_list s [] 0 (String.length s) let of_list l = let buf = Buffer.create (List.length l) in List.iter (Buffer.add_char buf) l; Buffer.contents buf (*$T of_list ['a'; 'b'; 'c'] = "abc" of_list [] = "" *) let of_array a = init (Array.length a) (fun i -> a.(i)) let to_array s = Array.init (String.length s) (fun i -> s.[i]) let lines_gen s = Split.gen_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s let lines_iter s = Split.iter_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s let lines_seq s = Split.seq_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s let lines s = Split.list_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s (*$= & ~printer:Q.Print.(list @@ Printf.sprintf "%S") ["ab"; "c"] (lines "ab\nc") ["ab"; "c"] (lines "ab\nc\n") [] (lines "") [""] (lines "\n") [""; "a"] (lines "\na") *) (*$Q Q.(printable_string) (fun s -> \ lines s = (lines_gen s |> Gen.to_list)) Q.(printable_string) (fun s -> \ lines s = (lines_iter s |> Iter.to_list)) *) let concat_gen_buf ~sep g : Buffer.t = let b = Buffer.create 256 in let rec aux ~first () = match g () with | None -> b | Some s -> if not first then Buffer.add_string b sep; Buffer.add_string b s; aux ~first:false () in aux ~first:true () let concat_gen ~sep g = let buf = concat_gen_buf ~sep g in Buffer.contents buf let concat_iter_buf ~sep i : Buffer.t = let buf = Buffer.create 256 in let first = ref true in i (fun s -> if !first then first := false else Buffer.add_string buf sep; Buffer.add_string buf s); buf let concat_iter ~sep i = let buf = concat_iter_buf ~sep i in Buffer.contents buf let concat_seq_buf ~sep seq : Buffer.t = let buf = Buffer.create 256 in let first = ref true in Seq.iter (fun s -> if !first then first := false else Buffer.add_string buf sep; Buffer.add_string buf s) seq; buf let concat_seq ~sep seq = let buf = concat_seq_buf ~sep seq in Buffer.contents buf (*$Q Q.(small_list printable_string) (fun l -> \ concat_iter ~sep:"\n" (Iter.of_list l) = concat "\n" l) Q.(small_list printable_string) (fun l -> \ concat_gen ~sep:"\n" (Gen.of_list l) = concat "\n" l) Q.(small_list printable_string) (fun l -> \ concat_seq ~sep:"\n" (CCSeq.of_list l) = concat "\n" l) *) let unlines l = let len = List.fold_left (fun n s -> n + 1 + String.length s) 0 l in let buf = Bytes.create len in let rec aux_blit i l = match l with | [] -> assert (i=len); Bytes.to_string buf | s :: tail -> let len_s = String.length s in Bytes.blit_string s 0 buf i len_s; Bytes.set buf (i+len_s) '\n'; aux_blit (i+len_s+1) tail in aux_blit 0 l let unlines_gen g = let buf = concat_gen_buf ~sep:"\n" g in Buffer.add_char buf '\n'; Buffer.contents buf let unlines_iter i = let buf = concat_iter_buf ~sep:"\n" i in Buffer.add_char buf '\n'; Buffer.contents buf let unlines_seq seq = let buf = concat_seq_buf ~sep:"\n" seq in Buffer.add_char buf '\n'; Buffer.contents buf (*$= & ~printer:CCFun.id "" (unlines []) "ab\nc\n" (unlines ["ab"; "c"]) *) (*$Q Q.printable_string (fun s -> trim (unlines (lines s)) = trim s) Q.printable_string (fun s -> trim (unlines_gen (lines_gen s)) = trim s) *) (*$Q Q.(small_list small_string) (fun l -> \ let l = unlines l |> lines in \ l = (unlines l |> lines)) *) let set s i c = if i<0 || i>= String.length s then invalid_arg "CCString.set"; init (String.length s) (fun j -> if i=j then c else s.[j]) (*$T set "abcd" 1 '_' = "a_cd" set "abcd" 0 '-' = "-bcd" (try ignore (set "abc" 5 '_'); false with Invalid_argument _ -> true) *) let iter = String.iter let filter_map f s = let buf = Buffer.create (String.length s) in iter (fun c -> match f c with | None -> () | Some c' -> Buffer.add_char buf c') s; Buffer.contents buf (*$= & ~printer:Q.Print.string "bcef" (filter_map \ (function 'c' -> None | c -> Some (Char.chr (Char.code c + 1))) "abcde") *) let filter f s = let buf = Buffer.create (String.length s) in iter (fun c -> if f c then Buffer.add_char buf c) s; Buffer.contents buf (*$= & ~printer:Q.Print.string "abde" (filter (function 'c' -> false | _ -> true) "abcdec") *) (*$Q Q.printable_string (fun s -> filter (fun _ -> true) s = s) *) let uniq eq s = if String.length s = 0 then s else begin let buf = Buffer.create (String.length s) in Buffer.add_char buf s.[0]; let _ = fold (fun previous_c c -> if not (eq previous_c c) then Buffer.add_char buf c; c ) s.[0] s in Buffer.contents buf end (*$= & ~printer:Q.Print.string "abcde" (uniq CCShims_.Stdlib.(=) "abbccdeeeee") *) let flat_map ?sep f s = let buf = Buffer.create (String.length s) in iteri (fun i c -> begin match sep with | Some _ when i=0 -> () | None -> () | Some sep -> Buffer.add_string buf sep end; Buffer.add_string buf (f c) ) s; Buffer.contents buf exception MyExit let for_all p s = try iter (fun c -> if not (p c) then raise MyExit) s; true with MyExit -> false let exists p s = try iter (fun c -> if p c then raise MyExit) s; false with MyExit -> true let drop_while f s = let i = ref 0 in while !i < length s && f (unsafe_get s !i) do incr i done; if !i > 0 then sub s !i (length s - !i) else s let rdrop_while f s = let i = ref (length s-1) in while !i >= 0 && f (unsafe_get s !i) do decr i done; if !i < length s-1 then sub s 0 (!i+1) else s (* notion of whitespace for trim *) let is_space_ = function | ' ' | '\012' | '\n' | '\r' | '\t' -> true | _ -> false let ltrim s = drop_while is_space_ s let rtrim s = rdrop_while is_space_ s (*$= & ~printer:id "abc " (ltrim " abc ") " abc" (rtrim " abc ") *) (*$Q Q.(printable_string) (fun s -> \ String.trim s = (s |> ltrim |> rtrim)) Q.(printable_string) (fun s -> ltrim s = ltrim (ltrim s)) Q.(printable_string) (fun s -> rtrim s = rtrim (rtrim s)) Q.(printable_string) (fun s -> \ let s' = ltrim s in \ if s'="" then Q.assume_fail() else s'.[0] <> ' ') Q.(printable_string) (fun s -> \ let s' = rtrim s in \ if s'="" then Q.assume_fail() else s'.[String.length s'-1] <> ' ') *) let map2 f s1 s2 = if length s1 <> length s2 then invalid_arg "CCString.map2"; init (String.length s1) (fun i -> f s1.[i] s2.[i]) let iter2 f s1 s2 = if length s1 <> length s2 then invalid_arg "CCString.iter2"; for i = 0 to String.length s1 - 1 do f s1.[i] s2.[i] done let iteri2 f s1 s2 = if length s1 <> length s2 then invalid_arg "CCString.iteri2"; for i = 0 to String.length s1 - 1 do f i s1.[i] s2.[i] done let fold2 f acc s1 s2 = if length s1 <> length s2 then invalid_arg "CCString.fold2"; let rec fold' acc s1 s2 i = if i = String.length s1 then acc else fold' (f acc s1.[i] s2.[i]) s1 s2 (i+1) in fold' acc s1 s2 0 let for_all2 p s1 s2 = try iter2 (fun c1 c2 -> if not (p c1 c2) then raise MyExit) s1 s2; true with MyExit -> false let exists2 p s1 s2 = try iter2 (fun c1 c2 -> if p c1 c2 then raise MyExit) s1 s2; false with MyExit -> true (** {2 Ascii functions} *) let equal_caseless s1 s2: bool = String.length s1 = String.length s2 && for_all2 (fun c1 c2 -> CCChar.equal (CCChar.lowercase_ascii c1) (CCChar.lowercase_ascii c2)) s1 s2 (*$T equal_caseless "foo" "FoO" equal_caseless "helLo" "HEllO" *) (*$Q Q.(pair printable_string printable_string) (fun (s1,s2) -> \ equal_caseless s1 s2 = (lowercase_ascii s1=lowercase_ascii s2)) Q.(printable_string) (fun s -> equal_caseless s s) Q.(printable_string) (fun s -> equal_caseless (uppercase_ascii s) s) *) let pp_buf buf s = Buffer.add_char buf '"'; Buffer.add_string buf s; Buffer.add_char buf '"' let pp fmt s = Format.fprintf fmt "\"%s\"" s module Infix = struct let (=) = equal let (<>) a b = not (equal a b) let (>) : t -> t -> bool = CCShims_.Stdlib.(>) let (>=) : t -> t -> bool = CCShims_.Stdlib.(>=) let (<) : t -> t -> bool = CCShims_.Stdlib.(<) let (<=) : t -> t -> bool = CCShims_.Stdlib.(<=) end include Infix (*$T "ab" < "abc" "123" < "14" *)