unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

rx.ml (23635B)


      1 (* Unison file synchronizer: src/ubase/rx.ml *)
      2 (* Copyright 1999-2020, Benjamin C. Pierce
      3 
      4     This program is free software: you can redistribute it and/or modify
      5     it under the terms of the GNU General Public License as published by
      6     the Free Software Foundation, either version 3 of the License, or
      7     (at your option) any later version.
      8 
      9     This program is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12     GNU General Public License for more details.
     13 
     14     You should have received a copy of the GNU General Public License
     15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16 *)
     17 
     18 (*
     19   Inspired by some code and algorithms from Mark William Hopkins
     20   (regexp.tar.gz, available in the comp.compilers file archive)
     21 *)
     22 
     23 (*
     24 Missing POSIX features
     25 ----------------------
     26 - Collating sequences
     27 *)
     28 
     29 type v =
     30     Cst of int list
     31   | Alt of u list
     32   | Seq of u list
     33   | Rep of u * int * int option
     34   | Bol | Eol
     35   | Int of u list
     36   | Dif of u * u
     37 
     38 and u = { desc : v; hash : int }
     39 
     40 (****)
     41 
     42 let hash x =
     43   match x with
     44     Cst l -> List.fold_left (fun h i -> h + 757 * i) 0 l
     45   | Alt l -> 199 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l
     46   | Seq l -> 821 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l
     47   | Rep (y, i, Some j) -> 197 * y.hash + 137 * i + j
     48   | Rep (y, i, None) -> 197 * y.hash + 137 * i + 552556457
     49   | Bol -> 165160782
     50   | Eol -> 152410806
     51   | Int l -> 71 * List.fold_left (fun h y -> h + 883 * y.hash) 0 l
     52   | Dif (y, z) -> 379 * y.hash + 563 * z.hash
     53 
     54 let make x = {desc = x; hash = hash x}
     55 
     56 let epsilon = make (Seq [])
     57 let empty = make (Alt [])
     58 
     59 (**** Printing ****)
     60 
     61 open Format
     62 
     63 let print_list sep print l =
     64   match l with
     65     [] -> ()
     66   | v::r -> print v; List.iter (fun v -> sep (); print v) r
     67 
     68 let rec print n t =
     69   match t.desc with
     70     Cst l ->
     71       open_box 1; print_string "[";
     72       print_list print_space print_int l;
     73       print_string "]"; close_box ()
     74   | Alt tl ->
     75       if n > 0 then begin open_box 1; print_string "(" end;
     76       print_list (fun () -> print_string "|"; print_cut ()) (print 1) tl;
     77       if n > 0 then begin print_string ")"; close_box () end
     78   | Seq tl ->
     79       if n > 1 then begin open_box 1; print_string "(" end;
     80       print_list (fun () -> print_cut ()) (print 2) tl;
     81       if n > 1 then begin print_string ")"; close_box () end
     82   | Rep (t, 0, None) ->
     83       print 2 t; print_string "*"
     84   | Rep (t, i, None) ->
     85       print 2 t; print_string "{"; print_int i; print_string ",}"
     86   | Rep (t, i, Some j) ->
     87       print 2 t;
     88       print_string "{"; print_int i; print_string ",";
     89       print_int j; print_string "}"
     90   | _ -> assert false
     91 
     92 (**** Constructors for regular expressions *)
     93 
     94 let seq2 x y =
     95   match x.desc, y.desc with
     96     Alt [], _ | _, Alt [] -> empty
     97   | Seq [], s             -> y
     98   | r, Seq []             -> x
     99   | Seq r, Seq s          -> make (Seq (r @ s))
    100   | Seq r, _              -> make (Seq (r @ [y]))
    101   | _, Seq s              -> make (Seq (x :: s))
    102   | r, s                  -> make (Seq [x; y])
    103 
    104 let seq l = List.fold_right seq2 l epsilon
    105 
    106 let seq' l = match l with [] -> epsilon | [x] -> x | _ -> make (Seq l)
    107 
    108 let rec alt_merge r s =
    109   match r, s with
    110     [], _ -> s
    111   | _, [] -> r
    112   | {desc = Seq (x::m)} :: s, {desc = Seq (y::n)} :: r when x = y ->
    113       alt_merge (seq2 x (alt2 (seq' m) (seq' n))::s) r
    114   | x :: r', y :: s' ->
    115       let c = compare x y in
    116       if c = 0 then x :: alt_merge r' s'
    117       else if c < 0 then x :: alt_merge r' s
    118       else (* if c > 0 then *) y :: alt_merge r s'
    119 
    120 and alt2 x y =
    121   let c = compare x y in
    122   if c = 0 then x else
    123   match x.desc, y.desc with
    124     Alt [], _             -> y
    125   | _, Alt []             -> x
    126   | Alt r, Alt s          -> make (Alt (alt_merge r s))
    127   | Alt [r], _ when r = y -> y
    128   | _, Alt [s] when x = s -> x
    129   | Alt r, _              -> make (Alt (alt_merge r [y]))
    130   | _, Alt s              -> make (Alt (alt_merge [x] s))
    131   | Seq (r::m), Seq (s::n) when r = s -> seq2 r (alt2 (seq' m) (seq' n))
    132   | _, _                  -> make (if c < 0 then Alt [x; y] else Alt [y; x])
    133 
    134 let alt l = List.fold_right alt2 l empty
    135 
    136 let rep x i j =
    137   match x.desc with
    138     Alt [] when i > 0 -> empty
    139   | Alt [] | Seq []   -> epsilon
    140   | _                 ->
    141       match i, j with
    142         _, Some 0 -> epsilon
    143       | 0, Some 1 -> alt2 epsilon x
    144       | 1, Some 1 -> x
    145       | _         -> make (Rep (x, i, j))
    146 
    147 let rec int2 x y =
    148   let c = compare x y in
    149   if c = 0 then x else
    150   match x.desc, y.desc with
    151     Int [], _             -> y
    152   | _, Int []             -> x
    153   | Int r, Int s          -> make (Int (alt_merge r s))
    154   | Int [r], _ when r = y -> y
    155   | _, Int [s] when s = x -> x
    156   | Int r, _              -> make (Int (alt_merge r [y]))
    157   | _, Int s              -> make (Int (alt_merge [x] s))
    158   | _, _                  -> make (if c < 0 then Int [x; y] else Int [y; x])
    159 
    160 let int l = List.fold_right int2 l empty
    161 
    162 let cst c = Cst [Char.code c]
    163 
    164 let rec dif x y =
    165   if x = y then empty else
    166   match x.desc, y.desc with
    167     Dif (x1, y1), _ -> dif x1 (alt2 y1 y)
    168   | Alt [], _       -> empty
    169   | _, Alt []       -> x
    170   | _               -> make (Dif (x, y))
    171 
    172 (**** Computation of the next states of an automata ****)
    173 
    174 type pos = Pos_bol | Pos_other
    175 let never = 0
    176 let always = (-1)
    177 let when_eol = 2
    178 
    179 let combine top bot op f l =
    180   let rec combine v l =
    181     match l with
    182       [] -> v
    183     | a::r ->
    184         let c = f a in
    185         if c = bot then c else combine (op v c) r
    186   in
    187   combine top l
    188 
    189 module ReTbl =
    190   Hashtbl.Make
    191     (struct
    192        type t = u
    193        let equal x y = x.hash = y.hash && x = y
    194        let hash x = x.hash
    195      end)
    196 
    197 let h = ReTbl.create 101
    198 let rec contains_epsilon pos x =
    199 try ReTbl.find h x with Not_found ->
    200 let res =
    201   match x.desc with
    202     Cst _         -> never
    203   | Alt l         -> combine never always (lor) (contains_epsilon pos) l
    204   | Seq l         -> combine always never (land) (contains_epsilon pos) l
    205   | Rep (_, 0, _) -> always
    206   | Rep (y, _, _) -> contains_epsilon pos y
    207   | Bol           -> if pos = Pos_bol then always else never
    208   | Eol           -> when_eol
    209   | Int l         -> combine always never (land) (contains_epsilon pos) l
    210   | Dif (y, z)    -> contains_epsilon pos y land
    211                      (lnot (contains_epsilon pos z))
    212 in
    213 ReTbl.add h x res; res
    214 
    215 module DiffTbl =
    216   Hashtbl.Make
    217     (struct
    218        type t = int * u
    219        let equal ((c : int), x) (d, y) = c = d && x.hash = y.hash && x = y
    220        let hash (c, x) = x.hash + 11 * c
    221      end)
    222 
    223 let diff_cache = DiffTbl.create 101
    224 
    225 let rec delta_seq nl pos c l =
    226   match l with
    227     [] ->
    228       empty
    229   | x::r ->
    230       let rdx = seq2 (delta nl pos c x) (seq' r) in
    231       let eps = contains_epsilon pos x in
    232       if eps land always = always then
    233         alt2 rdx (delta_seq nl pos c r)
    234       else if eps land when_eol = when_eol && c = nl then
    235         alt2 rdx (delta_seq nl pos c r)
    236       else
    237         rdx
    238 
    239 and delta nl pos c x =
    240 let p = (c, x) in
    241 try DiffTbl.find diff_cache p with Not_found ->
    242 let res =
    243   match x.desc with
    244     Cst l -> if List.mem c l then epsilon else empty
    245   | Alt l -> alt (List.map (delta nl pos c) l)
    246   | Seq l -> delta_seq nl pos c l
    247   | Rep (y, 0, None) -> seq2 (delta nl pos c y) x
    248   | Rep (y, i, None) -> seq2 (delta nl pos c y) (rep y (i - 1) None)
    249   | Rep (y, 0, Some j) -> seq2 (delta nl pos c y) (rep y 0 (Some (j - 1)))
    250   | Rep (y, i, Some j) -> seq2 (delta nl pos c y) (rep y (i - 1) (Some (j-1)))
    251   | Eol | Bol -> empty
    252   | Int l -> int (List.map (delta nl pos c) l)
    253   | Dif (y, z) -> dif (delta nl pos c y) (delta nl pos c z)
    254 in
    255 DiffTbl.add diff_cache p res;
    256 res
    257 
    258 (**** String matching ****)
    259 
    260 type state =
    261   { mutable valid : bool;
    262     mutable next : state array;
    263             pos : pos;
    264             final : bool;
    265             desc : u }
    266 
    267 type rx =
    268   { initial : state;
    269     categ   : int array;
    270     ncat    : int;
    271     states  : state ReTbl.t }
    272 
    273 let unknown =
    274   { valid = false; next = [||]; desc = empty ; pos = Pos_bol; final = false }
    275 
    276 let mk_state ncat pos desc =
    277   { valid = desc <> empty;
    278     next = Array.make ncat unknown;
    279     pos = pos;
    280     desc = desc;
    281     final = contains_epsilon pos desc <> 0 }
    282 
    283 let find_state states ncat pos desc =
    284   try
    285     ReTbl.find states desc
    286   with Not_found ->
    287     let st = mk_state ncat pos desc in
    288     ReTbl.add states desc st;
    289     st
    290 
    291 let rec validate s i l rx cat st c =
    292   let nl = cat.(Char.code '\n') in
    293   let desc = delta nl st.pos c st.desc in
    294   st.next.(c) <-
    295     find_state rx.states rx.ncat (if c = nl then Pos_bol else Pos_other) desc;
    296   loop s i l rx cat st
    297 
    298 and loop s i l rx cat st =
    299   let rec loop i st =
    300     let c = Array.unsafe_get cat (Char.code (String.unsafe_get s i)) in
    301     let st' = Array.unsafe_get st.next c in
    302     if st'.valid then begin
    303       let i = i + 1 in
    304       if i < l then
    305         loop i st'
    306       else
    307         st'.final
    308     end else if st' != unknown then
    309       false
    310     else
    311       validate s i l rx cat st c
    312   in
    313   loop i st
    314 
    315 let match_str rx s =
    316   let l = String.length s in
    317   if l = 0 then rx.initial.final else
    318   loop s 0 l rx rx.categ rx.initial
    319 
    320 (* Combining the final and valid fields may make things slightly faster
    321    (one less memory access) *)
    322 let rec validate_pref s i l l0 rx cat st c =
    323   let nl = cat.(Char.code '\n') in
    324   let desc = delta nl st.pos c st.desc in
    325   st.next.(c) <-
    326     find_state rx.states rx.ncat (if c = nl then Pos_bol else Pos_other) desc;
    327   loop_pref s i l l0 rx cat st
    328 
    329 and loop_pref s i l l0 rx cat st =
    330   let rec loop i l0 st =
    331     let c = Array.unsafe_get cat (Char.code (String.unsafe_get s i)) in
    332     let st' = Array.unsafe_get st.next c in
    333     if st'.valid then begin
    334       let i = i + 1 in
    335       let l0 = if st'.final then i else l0 in
    336       if i < l then
    337         loop i l0 st'
    338       else
    339         l0
    340     end else if st' != unknown then
    341       l0
    342     else
    343       validate_pref s i l l0 rx cat st c
    344   in
    345   loop i l0 st
    346 
    347 let match_pref rx s p =
    348   let l = String.length s in
    349   if p < 0 || p > l then invalid_arg "Rx.rep";
    350   let l0 = if rx.initial.final then p else -1 in
    351   let l0 =
    352     if l = p then l0 else
    353     loop_pref s p l l0 rx rx.categ rx.initial
    354   in
    355   if l0 >= 0 then Some (l0 - p) else None
    356 
    357 let mk_rx init categ ncat =
    358   let states = ReTbl.create 97 in
    359   { initial = find_state states ncat Pos_bol init;
    360     categ = categ;
    361     ncat = ncat;
    362     states = states }
    363 
    364 (**** Character sets ****)
    365 
    366 let rec cunion l l' =
    367   match l, l' with
    368     _, [] -> l
    369   | [], _ -> l'
    370   | (c1, c2)::r, (c1', c2')::r' ->
    371       if c2 + 1 < c1' then
    372         (c1, c2)::cunion r l'
    373       else if c2' + 1 < c1 then
    374         (c1', c2')::cunion l r'
    375       else if c2 < c2' then
    376         cunion r ((min c1 c1', c2')::r')
    377       else
    378         cunion ((min c1 c1', c2)::r) r'
    379 
    380 let rec cinter l l' =
    381   match l, l' with
    382     _, [] -> []
    383   | [], _ -> []
    384   | (c1, c2)::r, (c1', c2')::r' ->
    385       if c2 < c1' then
    386         cinter r l'
    387       else if c2' < c1 then
    388         cinter l r'
    389       else if c2 < c2' then
    390         (max c1 c1', c2)::cinter r l'
    391       else
    392         (max c1 c1', c2')::cinter l r'
    393 
    394 let rec cnegate mi ma l =
    395   match l with
    396     [] ->
    397       if mi <= ma then [(mi, ma)] else []
    398   | (c1, c2)::r when ma < c1 ->
    399       if mi <= ma then [(mi, ma)] else []
    400   | (c1, c2)::r when mi < c1 ->
    401       (mi, c1 - 1) :: cnegate c1 ma l
    402   | (c1, c2)::r (* when c1 <= mi *) ->
    403       cnegate (max mi (c2 + 1)) ma r
    404 
    405 let csingle c = let i = Char.code c in [i, i]
    406 
    407 let cadd c l = cunion (csingle c) l
    408 
    409 let cseq c c' =
    410   let i = Char.code c in let i' = Char.code c' in
    411   if i <= i' then [i, i'] else [i', i]
    412 
    413 let rec ctrans o l =
    414   match l with
    415     [] -> []
    416   | (c1, c2) :: r ->
    417       if c2 + o < 0 || c1 + o > 255 then
    418         ctrans o r
    419       else
    420         (c1 + o, c2 + o) :: ctrans o r
    421 
    422 let cany = [0, 255]
    423 
    424 type cset = (int * int) list
    425 
    426 (**** Compilation of a regular expression ****)
    427 
    428 type regexp =
    429     Set of cset
    430   | Sequence of regexp list
    431   | Alternative of regexp list
    432   | Repeat of regexp * int * int option
    433   | Beg_of_line | End_of_line
    434   | Intersection of regexp list
    435   | Difference of regexp * regexp
    436 
    437 let rec split s cm =
    438   match s with
    439     []    -> ()
    440   | (i, j)::r -> cm.(i) <- true; cm.(j + 1) <- true; split r cm
    441 
    442 let rec colorize c regexp =
    443   let rec colorize regexp =
    444     match regexp with
    445       Set s                     -> split s c
    446     | Sequence l                -> List.iter colorize l
    447     | Alternative l             -> List.iter colorize l
    448     | Repeat (r, _, _)          -> colorize r
    449     | Beg_of_line | End_of_line -> split (csingle '\n') c
    450     | Intersection l            -> List.iter colorize l
    451     | Difference (s, t)         -> colorize s; colorize t
    452   in
    453   colorize regexp
    454 
    455 let make_cmap () = Array.make 257 false
    456 
    457 let flatten_cmap cm =
    458   let c = Array.make 256 0 in
    459   let v = ref 0 in
    460   for i = 1 to 255 do
    461     if cm.(i) then incr v;
    462     c.(i) <- !v
    463   done;
    464   (c, !v + 1)
    465 
    466 let rec interval i j = if i > j then [] else i :: interval (i + 1) j
    467 
    468 let rec cset_hash_rec l =
    469   match l with
    470     []        -> 0
    471   | (i, j)::r -> i + 13 * j + 257 * cset_hash_rec r
    472 let cset_hash l = (cset_hash_rec l) land 0x3FFFFFFF
    473 
    474 module CSetMap =
    475   Map.Make
    476   (struct
    477     type t = int * (int * int) list
    478     let compare (i, u) (j, v) =
    479       let c = compare i j in if c <> 0 then c else compare u v
    480    end)
    481 
    482 let trans_set cache cm s =
    483   match s with
    484     [i, j] when i = j ->
    485       [cm.(i)]
    486   | _ ->
    487       let v = (cset_hash_rec s, s) in
    488       try
    489         CSetMap.find v !cache
    490       with Not_found ->
    491         let l =
    492           List.fold_right (fun (i, j) l -> cunion [cm.(i), cm.(j)] l) s []
    493         in
    494         let res =
    495           List.flatten (List.map (fun (i, j) -> interval i j) l)
    496         in
    497         cache := CSetMap.add v res !cache;
    498         res
    499 
    500 let rec trans_seq cache c r rem =
    501   match r with
    502     Sequence l -> List.fold_right (trans_seq cache c) l rem
    503   | _ -> seq2 (translate cache c r) rem
    504 
    505 and translate cache c r =
    506   match r with
    507     Set s -> make (Cst (trans_set cache c s))
    508   | Alternative l -> alt (List.map (translate cache c) l)
    509   | Sequence l -> trans_seq cache c r epsilon
    510   | Repeat (r', i, j) -> rep (translate cache c r') i j
    511   | Beg_of_line -> make Bol
    512   | End_of_line -> make Eol
    513   | Intersection l -> int (List.map (translate cache c) l)
    514   | Difference (r', r'') -> dif (translate cache c r') (translate cache c r'')
    515 
    516 let compile regexp =
    517   let c = make_cmap () in
    518   colorize c regexp;
    519   let (cat, ncat) = flatten_cmap c in
    520   let r = translate (ref (CSetMap.empty)) cat regexp in
    521   mk_rx r cat ncat
    522 
    523 (**** Regexp type ****)
    524 
    525 type t = {def : regexp; mutable comp: rx option; mutable comp': rx option}
    526 
    527 let force r =
    528   match r.comp with
    529     Some r' -> r'
    530   | None -> let r' = compile r.def in r.comp <- Some r'; r'
    531 
    532 let anything = Repeat (Set [0, 255], 0, None)
    533 let force' r =
    534   match r.comp' with
    535     Some r' -> r'
    536   | None ->
    537       let r1 = Sequence [anything; r.def; anything] in
    538       let r' = compile r1 in r.comp' <- Some r'; r'
    539 
    540 let wrap r = {def = r; comp = None; comp' = None}
    541 let def r = r.def
    542 
    543 let alt rl = wrap (Alternative (List.map def rl))
    544 let seq rl = wrap (Sequence (List.map def rl))
    545 let empty = alt []
    546 let epsilon = seq []
    547 let rep r i j =
    548   if i < 0 then invalid_arg "Rx.rep";
    549   begin match j with Some j when j < i -> invalid_arg "Rx.rep" | _ -> () end;
    550   wrap (Repeat (def r, i, j))
    551 let rep0 r = rep r 0 None
    552 let rep1 r = rep r 1 None
    553 let opt r = alt [epsilon; r]
    554 let bol = wrap Beg_of_line
    555 let eol = wrap End_of_line
    556 let any = wrap (Set [0, 255])
    557 let notnl = wrap (Set (cnegate 0 255 (csingle '\n')))
    558 let inter rl = wrap (Intersection (List.map def rl))
    559 let diff r r' = wrap (Difference (def r, def r'))
    560 
    561 let set str =
    562   let s = ref [] in
    563   for i = 0 to String.length str - 1 do
    564     s := cunion (csingle str.[i]) !s
    565   done;
    566   wrap (Set !s)
    567 
    568 let str s =
    569   let l = ref [] in
    570   for i = String.length s - 1 downto 0 do
    571     l := Set (csingle s.[i]) :: !l
    572   done;
    573   wrap (Sequence !l)
    574 
    575 let match_string t s = match_str (force t) s
    576 let match_substring t s = match_str (force' t) s
    577 let match_prefix t s p = match_pref (force t) s p
    578 
    579 let uppercase =
    580   cunion (cseq 'A' 'Z') (cunion (cseq '\192' '\214') (cseq '\216' '\222'))
    581 
    582 let lowercase = ctrans 32 uppercase
    583 
    584 let rec case_insens r =
    585   match r with
    586     Set s ->
    587       Set (cunion s (cunion (ctrans 32 (cinter s uppercase))
    588                             (ctrans (-32) (cinter s lowercase))))
    589   | Sequence l ->
    590       Sequence (List.map case_insens l)
    591   | Alternative l ->
    592       Alternative (List.map case_insens l)
    593   | Repeat (r, i, j) ->
    594       Repeat (case_insens r, i, j)
    595   | Beg_of_line | End_of_line ->
    596       r
    597   | Intersection l ->
    598       Intersection (List.map case_insens l)
    599   | Difference (r, r') ->
    600       Difference (case_insens r, case_insens r')
    601 
    602 let case_insensitive r =
    603   wrap (case_insens (def r))
    604 
    605 (**** Parser ****)
    606 
    607 exception Parse_error
    608 exception Not_supported
    609 
    610 let parse s =
    611   let i = ref 0 in
    612   let l = String.length s in
    613   let eos () = !i = l in
    614   let test c = not (eos ()) && s.[!i] = c in
    615   let accept c = let r = test c in if r then incr i; r in
    616   let get () = let r = s.[!i] in incr i; r in
    617   let unget () = decr i in
    618 
    619   let rec regexp () = regexp' (branch ())
    620   and regexp' left =
    621     if accept '|' then regexp' (Alternative [left; branch ()]) else left
    622   and branch () = branch' (piece ())
    623   and branch' left =
    624     if eos () || test '|' || test ')' then left
    625     else branch' (Sequence [left; piece ()])
    626   and piece () =
    627     let r = atom () in
    628     if accept '*' then Repeat (r, 0, None) else
    629     if accept '+' then Repeat (r, 1, None) else
    630     if accept '?' then Alternative [Sequence []; r] else
    631     if accept '{' then
    632       match integer () with
    633         Some i ->
    634           let j = if accept ',' then integer () else Some i in
    635           if not (accept '}') then raise Parse_error;
    636           begin match j with
    637             Some j when j < i -> raise Parse_error | _ -> ()
    638           end;
    639           Repeat (r, i, j)
    640       | None ->
    641           unget (); r
    642     else
    643       r
    644   and atom () =
    645     if accept '.' then Set cany else
    646     if accept '(' then begin
    647       let r = regexp () in
    648       if not (accept ')') then raise Parse_error;
    649       r
    650     end else
    651     if accept '^' then Beg_of_line else
    652     if accept '$' then End_of_line else
    653     if accept '[' then begin
    654       if accept '^' then
    655         Set (cnegate 0 255 (bracket []))
    656       else
    657         Set (bracket [])
    658     end else
    659     if accept '\\' then begin
    660       if eos () then raise Parse_error;
    661       match get () with
    662         '|' | '(' | ')' | '*' | '+' | '?'
    663       | '[' | '.' | '^' | '$' | '{' | '\\' as c -> Set (csingle c)
    664       |                 _                       -> raise Parse_error
    665     end else begin
    666       if eos () then raise Parse_error;
    667       match get () with
    668         '*' | '+' | '?' | '{' | '\\' -> raise Parse_error
    669       |                 c            -> Set (csingle c)
    670     end
    671   and integer () =
    672     if eos () then None else
    673     match get () with
    674       '0'..'9' as d -> integer' (Char.code d - Char.code '0')
    675     |     _        -> unget (); None
    676   and integer' i =
    677     if eos () then Some i else
    678     match get () with
    679       '0'..'9' as d ->
    680         let i' = 10 * i + (Char.code d - Char.code '0') in
    681         if i' < i then raise Parse_error;
    682         integer' i'
    683     | _ ->
    684         unget (); Some i
    685   and bracket s =
    686     if s <> [] && accept ']' then s else begin
    687       let c = char () in
    688       if accept '-' then begin
    689         if accept ']' then (cadd c (cadd '-' s)) else begin
    690           let c' = char () in
    691           bracket (cunion (cseq c c') s)
    692         end
    693       end else
    694         bracket (cadd c s)
    695     end
    696   and char () =
    697     if eos () then raise Parse_error;
    698     let c = get () in
    699     if c = '[' then begin
    700       if accept '=' || accept ':' then raise Not_supported;
    701       if accept '.' then begin
    702         if eos () then raise Parse_error;
    703         let c = get () in
    704         if not (accept '.') then raise Not_supported;
    705         if not (accept ']') then raise Parse_error;
    706         c
    707       end else
    708         c
    709     end else
    710       c
    711   in
    712   let res = regexp () in
    713   if not (eos ()) then raise Parse_error;
    714   res
    715 
    716 let rx s = wrap (parse s)
    717 
    718 (**** File globbing ****)
    719 
    720 let gany = cnegate 0 255 (csingle '/')
    721 let notdot = cnegate 0 255 (cunion (csingle '.') (csingle '/'))
    722 let dot = csingle '.'
    723 
    724 type loc = Beg | BegAny | Mid
    725 
    726 let beg_start =
    727   Alternative [Sequence []; Sequence [Set notdot; Repeat (Set gany, 0, None)]]
    728 
    729 let beg_start' =
    730   Sequence [Set notdot; Repeat (Set gany, 0, None)]
    731 
    732 let glob_parse init s =
    733   let i = ref 0 in
    734   let l = String.length s in
    735   let eos () = !i = l in
    736   let test c = not (eos ()) && s.[!i] = c in
    737   let accept c = let r = test c in if r then incr i; r in
    738   let get () = let r = s.[!i] in incr i; r in
    739   (* let unget () = decr i in *)
    740 
    741   let rec expr () = expr' init (Sequence [])
    742   and expr' beg left =
    743     if eos () then
    744       match beg with
    745         Mid | Beg -> left
    746       | BegAny -> Sequence [left; beg_start]
    747     else
    748       let (piec, beg) = piece beg in expr' beg (Sequence [left; piec])
    749   and piece beg =
    750     if accept '*' then begin
    751       if beg <> Mid then
    752         (Sequence [], BegAny)
    753       else
    754         (Repeat (Set gany, 0, None), Mid)
    755     end else if accept '?' then
    756       (begin match beg with
    757          Beg    -> Set notdot
    758        | BegAny -> Sequence [Set notdot; Repeat (Set gany, 0, None)]
    759        | Mid    -> Set gany
    760        end,
    761        Mid)
    762     else if accept '[' then begin
    763       (* let mask = if beg <> Mid then notdot else gany in *)
    764       let set =
    765         if accept '^' || accept '!' then
    766           cnegate 0 255 (bracket [])
    767         else
    768           bracket []
    769       in
    770       (begin match beg with
    771          Beg -> Set (cinter notdot set)
    772        | BegAny -> Alternative [Sequence [beg_start; Set (cinter notdot set)];
    773                                 Sequence [beg_start'; Set (cinter dot set)]]
    774        | Mid -> Set (cinter gany set)
    775        end,
    776        Mid)
    777     end else
    778       let c = char () in
    779       ((if beg <> BegAny then
    780           Set (csingle c)
    781         else if c = '.' then
    782           Sequence [beg_start'; Set (csingle c)]
    783         else
    784           Sequence [beg_start; Set (csingle c)]),
    785        if c = '/' then init else Mid)
    786   and bracket s =
    787     if s <> [] && accept ']' then s else begin
    788       let c = char () in
    789       if accept '-' then begin
    790         if accept ']' then (cadd c (cadd '-' s)) else begin
    791           let c' = char () in
    792           bracket (cunion (cseq c c') s)
    793         end
    794       end else
    795         bracket (cadd c s)
    796     end
    797   and char () =
    798     ignore (accept '\\');
    799     if eos () then raise Parse_error;
    800     get ()
    801   in
    802   let res = expr () in
    803   res
    804 
    805 let rec mul l l' =
    806   List.flatten (List.map (fun s -> List.map (fun s' -> s ^ s') l') l)
    807 
    808 let explode str =
    809   let l = String.length str in
    810   let rec expl inner s i acc beg =
    811     if i >= l then begin
    812       if inner then raise Parse_error;
    813       (mul beg [String.sub str s (i - s)], i)
    814     end else
    815     match str.[i] with
    816       '\\' -> expl inner s (i + 2) acc beg
    817     | '{' ->
    818         let (t, i') = expl true (i + 1) (i + 1) [] [""] in
    819         expl inner i' i' acc
    820           (mul beg (mul [String.sub str s (i - s)] t))
    821     | ',' when inner ->
    822         expl inner (i + 1) (i + 1)
    823           (mul beg [String.sub str s (i - s)] @ acc) [""]
    824     | '}' when inner ->
    825         (mul beg [String.sub str s (i - s)] @ acc, i + 1)
    826     | _ ->
    827         expl inner s (i + 1) acc beg
    828   in
    829   List.rev (fst (expl false 0 0 [] [""]))
    830 
    831 let glob' nodot s = wrap (glob_parse (if nodot then Beg else Mid) s)
    832 let glob s = glob' true s
    833 let globx' nodot s = alt (List.map (glob' nodot) (explode s))
    834 let globx s = globx' true s