rx.ml (23635B)
1 (* Unison file synchronizer: src/ubase/rx.ml *) 2 (* Copyright 1999-2020, Benjamin C. Pierce 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 *) 17 18 (* 19 Inspired by some code and algorithms from Mark William Hopkins 20 (regexp.tar.gz, available in the comp.compilers file archive) 21 *) 22 23 (* 24 Missing POSIX features 25 ---------------------- 26 - Collating sequences 27 *) 28 29 type v = 30 Cst of int list 31 | Alt of u list 32 | Seq of u list 33 | Rep of u * int * int option 34 | Bol | Eol 35 | Int of u list 36 | Dif of u * u 37 38 and u = { desc : v; hash : int } 39 40 (****) 41 42 let hash x = 43 match x with 44 Cst l -> List.fold_left (fun h i -> h + 757 * i) 0 l 45 | Alt l -> 199 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l 46 | Seq l -> 821 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l 47 | Rep (y, i, Some j) -> 197 * y.hash + 137 * i + j 48 | Rep (y, i, None) -> 197 * y.hash + 137 * i + 552556457 49 | Bol -> 165160782 50 | Eol -> 152410806 51 | Int l -> 71 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l 52 | Dif (y, z) -> 379 * y.hash + 563 * z.hash 53 54 let make x = {desc = x; hash = hash x} 55 56 let epsilon = make (Seq []) 57 let empty = make (Alt []) 58 59 (**** Printing ****) 60 61 open Format 62 63 let print_list sep print l = 64 match l with 65 [] -> () 66 | v::r -> print v; List.iter (fun v -> sep (); print v) r 67 68 let rec print n t = 69 match t.desc with 70 Cst l -> 71 open_box 1; print_string "["; 72 print_list print_space print_int l; 73 print_string "]"; close_box () 74 | Alt tl -> 75 if n > 0 then begin open_box 1; print_string "(" end; 76 print_list (fun () -> print_string "|"; print_cut ()) (print 1) tl; 77 if n > 0 then begin print_string ")"; close_box () end 78 | Seq tl -> 79 if n > 1 then begin open_box 1; print_string "(" end; 80 print_list (fun () -> print_cut ()) (print 2) tl; 81 if n > 1 then begin print_string ")"; close_box () end 82 | Rep (t, 0, None) -> 83 print 2 t; print_string "*" 84 | Rep (t, i, None) -> 85 print 2 t; print_string "{"; print_int i; print_string ",}" 86 | Rep (t, i, Some j) -> 87 print 2 t; 88 print_string "{"; print_int i; print_string ","; 89 print_int j; print_string "}" 90 | _ -> assert false 91 92 (**** Constructors for regular expressions *) 93 94 let seq2 x y = 95 match x.desc, y.desc with 96 Alt [], _ | _, Alt [] -> empty 97 | Seq [], s -> y 98 | r, Seq [] -> x 99 | Seq r, Seq s -> make (Seq (r @ s)) 100 | Seq r, _ -> make (Seq (r @ [y])) 101 | _, Seq s -> make (Seq (x :: s)) 102 | r, s -> make (Seq [x; y]) 103 104 let seq l = List.fold_right seq2 l epsilon 105 106 let seq' l = match l with [] -> epsilon | [x] -> x | _ -> make (Seq l) 107 108 let rec alt_merge r s = 109 match r, s with 110 [], _ -> s 111 | _, [] -> r 112 | {desc = Seq (x::m)} :: s, {desc = Seq (y::n)} :: r when x = y -> 113 alt_merge (seq2 x (alt2 (seq' m) (seq' n))::s) r 114 | x :: r', y :: s' -> 115 let c = compare x y in 116 if c = 0 then x :: alt_merge r' s' 117 else if c < 0 then x :: alt_merge r' s 118 else (* if c > 0 then *) y :: alt_merge r s' 119 120 and alt2 x y = 121 let c = compare x y in 122 if c = 0 then x else 123 match x.desc, y.desc with 124 Alt [], _ -> y 125 | _, Alt [] -> x 126 | Alt r, Alt s -> make (Alt (alt_merge r s)) 127 | Alt [r], _ when r = y -> y 128 | _, Alt [s] when x = s -> x 129 | Alt r, _ -> make (Alt (alt_merge r [y])) 130 | _, Alt s -> make (Alt (alt_merge [x] s)) 131 | Seq (r::m), Seq (s::n) when r = s -> seq2 r (alt2 (seq' m) (seq' n)) 132 | _, _ -> make (if c < 0 then Alt [x; y] else Alt [y; x]) 133 134 let alt l = List.fold_right alt2 l empty 135 136 let rep x i j = 137 match x.desc with 138 Alt [] when i > 0 -> empty 139 | Alt [] | Seq [] -> epsilon 140 | _ -> 141 match i, j with 142 _, Some 0 -> epsilon 143 | 0, Some 1 -> alt2 epsilon x 144 | 1, Some 1 -> x 145 | _ -> make (Rep (x, i, j)) 146 147 let rec int2 x y = 148 let c = compare x y in 149 if c = 0 then x else 150 match x.desc, y.desc with 151 Int [], _ -> y 152 | _, Int [] -> x 153 | Int r, Int s -> make (Int (alt_merge r s)) 154 | Int [r], _ when r = y -> y 155 | _, Int [s] when s = x -> x 156 | Int r, _ -> make (Int (alt_merge r [y])) 157 | _, Int s -> make (Int (alt_merge [x] s)) 158 | _, _ -> make (if c < 0 then Int [x; y] else Int [y; x]) 159 160 let int l = List.fold_right int2 l empty 161 162 let cst c = Cst [Char.code c] 163 164 let rec dif x y = 165 if x = y then empty else 166 match x.desc, y.desc with 167 Dif (x1, y1), _ -> dif x1 (alt2 y1 y) 168 | Alt [], _ -> empty 169 | _, Alt [] -> x 170 | _ -> make (Dif (x, y)) 171 172 (**** Computation of the next states of an automata ****) 173 174 type pos = Pos_bol | Pos_other 175 let never = 0 176 let always = (-1) 177 let when_eol = 2 178 179 let combine top bot op f l = 180 let rec combine v l = 181 match l with 182 [] -> v 183 | a::r -> 184 let c = f a in 185 if c = bot then c else combine (op v c) r 186 in 187 combine top l 188 189 module ReTbl = 190 Hashtbl.Make 191 (struct 192 type t = u 193 let equal x y = x.hash = y.hash && x = y 194 let hash x = x.hash 195 end) 196 197 let h = ReTbl.create 101 198 let rec contains_epsilon pos x = 199 try ReTbl.find h x with Not_found -> 200 let res = 201 match x.desc with 202 Cst _ -> never 203 | Alt l -> combine never always (lor) (contains_epsilon pos) l 204 | Seq l -> combine always never (land) (contains_epsilon pos) l 205 | Rep (_, 0, _) -> always 206 | Rep (y, _, _) -> contains_epsilon pos y 207 | Bol -> if pos = Pos_bol then always else never 208 | Eol -> when_eol 209 | Int l -> combine always never (land) (contains_epsilon pos) l 210 | Dif (y, z) -> contains_epsilon pos y land 211 (lnot (contains_epsilon pos z)) 212 in 213 ReTbl.add h x res; res 214 215 module DiffTbl = 216 Hashtbl.Make 217 (struct 218 type t = int * u 219 let equal ((c : int), x) (d, y) = c = d && x.hash = y.hash && x = y 220 let hash (c, x) = x.hash + 11 * c 221 end) 222 223 let diff_cache = DiffTbl.create 101 224 225 let rec delta_seq nl pos c l = 226 match l with 227 [] -> 228 empty 229 | x::r -> 230 let rdx = seq2 (delta nl pos c x) (seq' r) in 231 let eps = contains_epsilon pos x in 232 if eps land always = always then 233 alt2 rdx (delta_seq nl pos c r) 234 else if eps land when_eol = when_eol && c = nl then 235 alt2 rdx (delta_seq nl pos c r) 236 else 237 rdx 238 239 and delta nl pos c x = 240 let p = (c, x) in 241 try DiffTbl.find diff_cache p with Not_found -> 242 let res = 243 match x.desc with 244 Cst l -> if List.mem c l then epsilon else empty 245 | Alt l -> alt (List.map (delta nl pos c) l) 246 | Seq l -> delta_seq nl pos c l 247 | Rep (y, 0, None) -> seq2 (delta nl pos c y) x 248 | Rep (y, i, None) -> seq2 (delta nl pos c y) (rep y (i - 1) None) 249 | Rep (y, 0, Some j) -> seq2 (delta nl pos c y) (rep y 0 (Some (j - 1))) 250 | Rep (y, i, Some j) -> seq2 (delta nl pos c y) (rep y (i - 1) (Some (j-1))) 251 | Eol | Bol -> empty 252 | Int l -> int (List.map (delta nl pos c) l) 253 | Dif (y, z) -> dif (delta nl pos c y) (delta nl pos c z) 254 in 255 DiffTbl.add diff_cache p res; 256 res 257 258 (**** String matching ****) 259 260 type state = 261 { mutable valid : bool; 262 mutable next : state array; 263 pos : pos; 264 final : bool; 265 desc : u } 266 267 type rx = 268 { initial : state; 269 categ : int array; 270 ncat : int; 271 states : state ReTbl.t } 272 273 let unknown = 274 { valid = false; next = [||]; desc = empty ; pos = Pos_bol; final = false } 275 276 let mk_state ncat pos desc = 277 { valid = desc <> empty; 278 next = Array.make ncat unknown; 279 pos = pos; 280 desc = desc; 281 final = contains_epsilon pos desc <> 0 } 282 283 let find_state states ncat pos desc = 284 try 285 ReTbl.find states desc 286 with Not_found -> 287 let st = mk_state ncat pos desc in 288 ReTbl.add states desc st; 289 st 290 291 let rec validate s i l rx cat st c = 292 let nl = cat.(Char.code '\n') in 293 let desc = delta nl st.pos c st.desc in 294 st.next.(c) <- 295 find_state rx.states rx.ncat (if c = nl then Pos_bol else Pos_other) desc; 296 loop s i l rx cat st 297 298 and loop s i l rx cat st = 299 let rec loop i st = 300 let c = Array.unsafe_get cat (Char.code (String.unsafe_get s i)) in 301 let st' = Array.unsafe_get st.next c in 302 if st'.valid then begin 303 let i = i + 1 in 304 if i < l then 305 loop i st' 306 else 307 st'.final 308 end else if st' != unknown then 309 false 310 else 311 validate s i l rx cat st c 312 in 313 loop i st 314 315 let match_str rx s = 316 let l = String.length s in 317 if l = 0 then rx.initial.final else 318 loop s 0 l rx rx.categ rx.initial 319 320 (* Combining the final and valid fields may make things slightly faster 321 (one less memory access) *) 322 let rec validate_pref s i l l0 rx cat st c = 323 let nl = cat.(Char.code '\n') in 324 let desc = delta nl st.pos c st.desc in 325 st.next.(c) <- 326 find_state rx.states rx.ncat (if c = nl then Pos_bol else Pos_other) desc; 327 loop_pref s i l l0 rx cat st 328 329 and loop_pref s i l l0 rx cat st = 330 let rec loop i l0 st = 331 let c = Array.unsafe_get cat (Char.code (String.unsafe_get s i)) in 332 let st' = Array.unsafe_get st.next c in 333 if st'.valid then begin 334 let i = i + 1 in 335 let l0 = if st'.final then i else l0 in 336 if i < l then 337 loop i l0 st' 338 else 339 l0 340 end else if st' != unknown then 341 l0 342 else 343 validate_pref s i l l0 rx cat st c 344 in 345 loop i l0 st 346 347 let match_pref rx s p = 348 let l = String.length s in 349 if p < 0 || p > l then invalid_arg "Rx.rep"; 350 let l0 = if rx.initial.final then p else -1 in 351 let l0 = 352 if l = p then l0 else 353 loop_pref s p l l0 rx rx.categ rx.initial 354 in 355 if l0 >= 0 then Some (l0 - p) else None 356 357 let mk_rx init categ ncat = 358 let states = ReTbl.create 97 in 359 { initial = find_state states ncat Pos_bol init; 360 categ = categ; 361 ncat = ncat; 362 states = states } 363 364 (**** Character sets ****) 365 366 let rec cunion l l' = 367 match l, l' with 368 _, [] -> l 369 | [], _ -> l' 370 | (c1, c2)::r, (c1', c2')::r' -> 371 if c2 + 1 < c1' then 372 (c1, c2)::cunion r l' 373 else if c2' + 1 < c1 then 374 (c1', c2')::cunion l r' 375 else if c2 < c2' then 376 cunion r ((min c1 c1', c2')::r') 377 else 378 cunion ((min c1 c1', c2)::r) r' 379 380 let rec cinter l l' = 381 match l, l' with 382 _, [] -> [] 383 | [], _ -> [] 384 | (c1, c2)::r, (c1', c2')::r' -> 385 if c2 < c1' then 386 cinter r l' 387 else if c2' < c1 then 388 cinter l r' 389 else if c2 < c2' then 390 (max c1 c1', c2)::cinter r l' 391 else 392 (max c1 c1', c2')::cinter l r' 393 394 let rec cnegate mi ma l = 395 match l with 396 [] -> 397 if mi <= ma then [(mi, ma)] else [] 398 | (c1, c2)::r when ma < c1 -> 399 if mi <= ma then [(mi, ma)] else [] 400 | (c1, c2)::r when mi < c1 -> 401 (mi, c1 - 1) :: cnegate c1 ma l 402 | (c1, c2)::r (* when c1 <= mi *) -> 403 cnegate (max mi (c2 + 1)) ma r 404 405 let csingle c = let i = Char.code c in [i, i] 406 407 let cadd c l = cunion (csingle c) l 408 409 let cseq c c' = 410 let i = Char.code c in let i' = Char.code c' in 411 if i <= i' then [i, i'] else [i', i] 412 413 let rec ctrans o l = 414 match l with 415 [] -> [] 416 | (c1, c2) :: r -> 417 if c2 + o < 0 || c1 + o > 255 then 418 ctrans o r 419 else 420 (c1 + o, c2 + o) :: ctrans o r 421 422 let cany = [0, 255] 423 424 type cset = (int * int) list 425 426 (**** Compilation of a regular expression ****) 427 428 type regexp = 429 Set of cset 430 | Sequence of regexp list 431 | Alternative of regexp list 432 | Repeat of regexp * int * int option 433 | Beg_of_line | End_of_line 434 | Intersection of regexp list 435 | Difference of regexp * regexp 436 437 let rec split s cm = 438 match s with 439 [] -> () 440 | (i, j)::r -> cm.(i) <- true; cm.(j + 1) <- true; split r cm 441 442 let rec colorize c regexp = 443 let rec colorize regexp = 444 match regexp with 445 Set s -> split s c 446 | Sequence l -> List.iter colorize l 447 | Alternative l -> List.iter colorize l 448 | Repeat (r, _, _) -> colorize r 449 | Beg_of_line | End_of_line -> split (csingle '\n') c 450 | Intersection l -> List.iter colorize l 451 | Difference (s, t) -> colorize s; colorize t 452 in 453 colorize regexp 454 455 let make_cmap () = Array.make 257 false 456 457 let flatten_cmap cm = 458 let c = Array.make 256 0 in 459 let v = ref 0 in 460 for i = 1 to 255 do 461 if cm.(i) then incr v; 462 c.(i) <- !v 463 done; 464 (c, !v + 1) 465 466 let rec interval i j = if i > j then [] else i :: interval (i + 1) j 467 468 let rec cset_hash_rec l = 469 match l with 470 [] -> 0 471 | (i, j)::r -> i + 13 * j + 257 * cset_hash_rec r 472 let cset_hash l = (cset_hash_rec l) land 0x3FFFFFFF 473 474 module CSetMap = 475 Map.Make 476 (struct 477 type t = int * (int * int) list 478 let compare (i, u) (j, v) = 479 let c = compare i j in if c <> 0 then c else compare u v 480 end) 481 482 let trans_set cache cm s = 483 match s with 484 [i, j] when i = j -> 485 [cm.(i)] 486 | _ -> 487 let v = (cset_hash_rec s, s) in 488 try 489 CSetMap.find v !cache 490 with Not_found -> 491 let l = 492 List.fold_right (fun (i, j) l -> cunion [cm.(i), cm.(j)] l) s [] 493 in 494 let res = 495 List.flatten (List.map (fun (i, j) -> interval i j) l) 496 in 497 cache := CSetMap.add v res !cache; 498 res 499 500 let rec trans_seq cache c r rem = 501 match r with 502 Sequence l -> List.fold_right (trans_seq cache c) l rem 503 | _ -> seq2 (translate cache c r) rem 504 505 and translate cache c r = 506 match r with 507 Set s -> make (Cst (trans_set cache c s)) 508 | Alternative l -> alt (List.map (translate cache c) l) 509 | Sequence l -> trans_seq cache c r epsilon 510 | Repeat (r', i, j) -> rep (translate cache c r') i j 511 | Beg_of_line -> make Bol 512 | End_of_line -> make Eol 513 | Intersection l -> int (List.map (translate cache c) l) 514 | Difference (r', r'') -> dif (translate cache c r') (translate cache c r'') 515 516 let compile regexp = 517 let c = make_cmap () in 518 colorize c regexp; 519 let (cat, ncat) = flatten_cmap c in 520 let r = translate (ref (CSetMap.empty)) cat regexp in 521 mk_rx r cat ncat 522 523 (**** Regexp type ****) 524 525 type t = {def : regexp; mutable comp: rx option; mutable comp': rx option} 526 527 let force r = 528 match r.comp with 529 Some r' -> r' 530 | None -> let r' = compile r.def in r.comp <- Some r'; r' 531 532 let anything = Repeat (Set [0, 255], 0, None) 533 let force' r = 534 match r.comp' with 535 Some r' -> r' 536 | None -> 537 let r1 = Sequence [anything; r.def; anything] in 538 let r' = compile r1 in r.comp' <- Some r'; r' 539 540 let wrap r = {def = r; comp = None; comp' = None} 541 let def r = r.def 542 543 let alt rl = wrap (Alternative (List.map def rl)) 544 let seq rl = wrap (Sequence (List.map def rl)) 545 let empty = alt [] 546 let epsilon = seq [] 547 let rep r i j = 548 if i < 0 then invalid_arg "Rx.rep"; 549 begin match j with Some j when j < i -> invalid_arg "Rx.rep" | _ -> () end; 550 wrap (Repeat (def r, i, j)) 551 let rep0 r = rep r 0 None 552 let rep1 r = rep r 1 None 553 let opt r = alt [epsilon; r] 554 let bol = wrap Beg_of_line 555 let eol = wrap End_of_line 556 let any = wrap (Set [0, 255]) 557 let notnl = wrap (Set (cnegate 0 255 (csingle '\n'))) 558 let inter rl = wrap (Intersection (List.map def rl)) 559 let diff r r' = wrap (Difference (def r, def r')) 560 561 let set str = 562 let s = ref [] in 563 for i = 0 to String.length str - 1 do 564 s := cunion (csingle str.[i]) !s 565 done; 566 wrap (Set !s) 567 568 let str s = 569 let l = ref [] in 570 for i = String.length s - 1 downto 0 do 571 l := Set (csingle s.[i]) :: !l 572 done; 573 wrap (Sequence !l) 574 575 let match_string t s = match_str (force t) s 576 let match_substring t s = match_str (force' t) s 577 let match_prefix t s p = match_pref (force t) s p 578 579 let uppercase = 580 cunion (cseq 'A' 'Z') (cunion (cseq '\192' '\214') (cseq '\216' '\222')) 581 582 let lowercase = ctrans 32 uppercase 583 584 let rec case_insens r = 585 match r with 586 Set s -> 587 Set (cunion s (cunion (ctrans 32 (cinter s uppercase)) 588 (ctrans (-32) (cinter s lowercase)))) 589 | Sequence l -> 590 Sequence (List.map case_insens l) 591 | Alternative l -> 592 Alternative (List.map case_insens l) 593 | Repeat (r, i, j) -> 594 Repeat (case_insens r, i, j) 595 | Beg_of_line | End_of_line -> 596 r 597 | Intersection l -> 598 Intersection (List.map case_insens l) 599 | Difference (r, r') -> 600 Difference (case_insens r, case_insens r') 601 602 let case_insensitive r = 603 wrap (case_insens (def r)) 604 605 (**** Parser ****) 606 607 exception Parse_error 608 exception Not_supported 609 610 let parse s = 611 let i = ref 0 in 612 let l = String.length s in 613 let eos () = !i = l in 614 let test c = not (eos ()) && s.[!i] = c in 615 let accept c = let r = test c in if r then incr i; r in 616 let get () = let r = s.[!i] in incr i; r in 617 let unget () = decr i in 618 619 let rec regexp () = regexp' (branch ()) 620 and regexp' left = 621 if accept '|' then regexp' (Alternative [left; branch ()]) else left 622 and branch () = branch' (piece ()) 623 and branch' left = 624 if eos () || test '|' || test ')' then left 625 else branch' (Sequence [left; piece ()]) 626 and piece () = 627 let r = atom () in 628 if accept '*' then Repeat (r, 0, None) else 629 if accept '+' then Repeat (r, 1, None) else 630 if accept '?' then Alternative [Sequence []; r] else 631 if accept '{' then 632 match integer () with 633 Some i -> 634 let j = if accept ',' then integer () else Some i in 635 if not (accept '}') then raise Parse_error; 636 begin match j with 637 Some j when j < i -> raise Parse_error | _ -> () 638 end; 639 Repeat (r, i, j) 640 | None -> 641 unget (); r 642 else 643 r 644 and atom () = 645 if accept '.' then Set cany else 646 if accept '(' then begin 647 let r = regexp () in 648 if not (accept ')') then raise Parse_error; 649 r 650 end else 651 if accept '^' then Beg_of_line else 652 if accept '$' then End_of_line else 653 if accept '[' then begin 654 if accept '^' then 655 Set (cnegate 0 255 (bracket [])) 656 else 657 Set (bracket []) 658 end else 659 if accept '\\' then begin 660 if eos () then raise Parse_error; 661 match get () with 662 '|' | '(' | ')' | '*' | '+' | '?' 663 | '[' | '.' | '^' | '$' | '{' | '\\' as c -> Set (csingle c) 664 | _ -> raise Parse_error 665 end else begin 666 if eos () then raise Parse_error; 667 match get () with 668 '*' | '+' | '?' | '{' | '\\' -> raise Parse_error 669 | c -> Set (csingle c) 670 end 671 and integer () = 672 if eos () then None else 673 match get () with 674 '0'..'9' as d -> integer' (Char.code d - Char.code '0') 675 | _ -> unget (); None 676 and integer' i = 677 if eos () then Some i else 678 match get () with 679 '0'..'9' as d -> 680 let i' = 10 * i + (Char.code d - Char.code '0') in 681 if i' < i then raise Parse_error; 682 integer' i' 683 | _ -> 684 unget (); Some i 685 and bracket s = 686 if s <> [] && accept ']' then s else begin 687 let c = char () in 688 if accept '-' then begin 689 if accept ']' then (cadd c (cadd '-' s)) else begin 690 let c' = char () in 691 bracket (cunion (cseq c c') s) 692 end 693 end else 694 bracket (cadd c s) 695 end 696 and char () = 697 if eos () then raise Parse_error; 698 let c = get () in 699 if c = '[' then begin 700 if accept '=' || accept ':' then raise Not_supported; 701 if accept '.' then begin 702 if eos () then raise Parse_error; 703 let c = get () in 704 if not (accept '.') then raise Not_supported; 705 if not (accept ']') then raise Parse_error; 706 c 707 end else 708 c 709 end else 710 c 711 in 712 let res = regexp () in 713 if not (eos ()) then raise Parse_error; 714 res 715 716 let rx s = wrap (parse s) 717 718 (**** File globbing ****) 719 720 let gany = cnegate 0 255 (csingle '/') 721 let notdot = cnegate 0 255 (cunion (csingle '.') (csingle '/')) 722 let dot = csingle '.' 723 724 type loc = Beg | BegAny | Mid 725 726 let beg_start = 727 Alternative [Sequence []; Sequence [Set notdot; Repeat (Set gany, 0, None)]] 728 729 let beg_start' = 730 Sequence [Set notdot; Repeat (Set gany, 0, None)] 731 732 let glob_parse init s = 733 let i = ref 0 in 734 let l = String.length s in 735 let eos () = !i = l in 736 let test c = not (eos ()) && s.[!i] = c in 737 let accept c = let r = test c in if r then incr i; r in 738 let get () = let r = s.[!i] in incr i; r in 739 (* let unget () = decr i in *) 740 741 let rec expr () = expr' init (Sequence []) 742 and expr' beg left = 743 if eos () then 744 match beg with 745 Mid | Beg -> left 746 | BegAny -> Sequence [left; beg_start] 747 else 748 let (piec, beg) = piece beg in expr' beg (Sequence [left; piec]) 749 and piece beg = 750 if accept '*' then begin 751 if beg <> Mid then 752 (Sequence [], BegAny) 753 else 754 (Repeat (Set gany, 0, None), Mid) 755 end else if accept '?' then 756 (begin match beg with 757 Beg -> Set notdot 758 | BegAny -> Sequence [Set notdot; Repeat (Set gany, 0, None)] 759 | Mid -> Set gany 760 end, 761 Mid) 762 else if accept '[' then begin 763 (* let mask = if beg <> Mid then notdot else gany in *) 764 let set = 765 if accept '^' || accept '!' then 766 cnegate 0 255 (bracket []) 767 else 768 bracket [] 769 in 770 (begin match beg with 771 Beg -> Set (cinter notdot set) 772 | BegAny -> Alternative [Sequence [beg_start; Set (cinter notdot set)]; 773 Sequence [beg_start'; Set (cinter dot set)]] 774 | Mid -> Set (cinter gany set) 775 end, 776 Mid) 777 end else 778 let c = char () in 779 ((if beg <> BegAny then 780 Set (csingle c) 781 else if c = '.' then 782 Sequence [beg_start'; Set (csingle c)] 783 else 784 Sequence [beg_start; Set (csingle c)]), 785 if c = '/' then init else Mid) 786 and bracket s = 787 if s <> [] && accept ']' then s else begin 788 let c = char () in 789 if accept '-' then begin 790 if accept ']' then (cadd c (cadd '-' s)) else begin 791 let c' = char () in 792 bracket (cunion (cseq c c') s) 793 end 794 end else 795 bracket (cadd c s) 796 end 797 and char () = 798 ignore (accept '\\'); 799 if eos () then raise Parse_error; 800 get () 801 in 802 let res = expr () in 803 res 804 805 let rec mul l l' = 806 List.flatten (List.map (fun s -> List.map (fun s' -> s ^ s') l') l) 807 808 let explode str = 809 let l = String.length str in 810 let rec expl inner s i acc beg = 811 if i >= l then begin 812 if inner then raise Parse_error; 813 (mul beg [String.sub str s (i - s)], i) 814 end else 815 match str.[i] with 816 '\\' -> expl inner s (i + 2) acc beg 817 | '{' -> 818 let (t, i') = expl true (i + 1) (i + 1) [] [""] in 819 expl inner i' i' acc 820 (mul beg (mul [String.sub str s (i - s)] t)) 821 | ',' when inner -> 822 expl inner (i + 1) (i + 1) 823 (mul beg [String.sub str s (i - s)] @ acc) [""] 824 | '}' when inner -> 825 (mul beg [String.sub str s (i - s)] @ acc, i + 1) 826 | _ -> 827 expl inner s (i + 1) acc beg 828 in 829 List.rev (fst (expl false 0 0 [] [""])) 830 831 let glob' nodot s = wrap (glob_parse (if nodot then Beg else Mid) s) 832 let glob s = glob' true s 833 let globx' nodot s = alt (List.map (glob' nodot) (explode s)) 834 let globx s = globx' true s