unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

watcher.ml (13843B)


      1 (* Unison file synchronizer: src/fsmonitor/solaris/watcher.ml *)
      2 (* Copyright 2021, Tõivo Leedjärv
      3 
      4     This program is free software: you can redistribute it and/or modify
      5     it under the terms of the GNU General Public License as published by
      6     the Free Software Foundation, either version 3 of the License, or
      7     (at your option) any later version.
      8 
      9     This program is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12     GNU General Public License for more details.
     13 
     14     You should have received a copy of the GNU General Public License
     15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16 *)
     17 
     18 (* A brief overview of the File Event Notification (FEN) interface
     19  *
     20  * Events are delivered via ports. A port is created by [port_create].
     21  * Each file or directory to be watched must be individually associated
     22  * with the port by [port_associate].
     23  *
     24  * When associating an object with a port, the stat times of the object
     25  * can be passed in. During association, the FEN system will compare these
     26  * times with their current values to detect if there has been a change
     27  * between the stat() call and the association. If yes, then an event is
     28  * delivered immediately. If the times have not changed, or they are all
     29  * passed in as zero, then the next event is monitored and delivered when
     30  * it occurs.
     31  *
     32  * Objects are associated and events monitored per vnode. It is due to this
     33  * approach that all files must be watched individually. For example, while
     34  * adding and deleting a file will be detected as a modification event on the
     35  * parent directory, modifying an existing file within the same directory
     36  * will not be detected by watching the directory.
     37  *
     38  * For each association, only one event is delivered. The object is then
     39  * automatically dissociated (equivalent of [port_dissociate]) and must
     40  * be associated again to receive further events.
     41  *
     42  * Events are polled and retrieved by [port_get].
     43  *
     44  * A port has a limit to the number of objects associated with it. As all
     45  * files must be watched individually, the number of objects can grow very
     46  * large. This implementation maintains a pool of ports and automatically
     47  * creates new ports as needed, and closes ports that are no longer needed.
     48  *)
     49 
     50 let (>>=) = Lwt.bind
     51 
     52 let () = Gc.set { (Gc.get()) with space_overhead = 40 }
     53 
     54 (****)
     55 
     56 type port = int
     57 
     58 (* event_objects are allocated in C heap and are not GC'd by OCaml. Care must
     59  * be taken to free each event_object explicitly and not use them after having
     60  * been freed.
     61  *
     62  * event_objects have released their backing system resources and must be freed
     63  * with [free_event_object] in the following cases:
     64  *  - after [port_get] when having received an event and event_object is not
     65  *    re-associated, and
     66  *  - after calling [port_dissociate] (exceptions here must be fatal or
     67  *    ensure that [free_event_object] still gets called).
     68  *
     69  * Current implementation builds on the following assumption:
     70  *  - A Watchercommon watch (= value that keeps track of live event_objects) is
     71  *    not discarded without explicitly closing it.
     72  *
     73  *    All watches are closed explicitly by either [release_watch] or
     74  *    in [cleanup_watch]. These functions take care of releasing the resources
     75  *    if needed and freeing the objects properly after they've been released.
     76  *    This also means that exceptions must be fatal or take care not to discard
     77  *    event_objects without properly releasing and freeing them.
     78  *
     79  * If the above assumption changes or can no longer be guaranteed then the
     80  * implementation may have to be changed. There are a few ways to make
     81  * event_objects GC'd. All of these solutions carry a rather high memory
     82  * overhead as FEN requires each file to be monitored individually.
     83  *
     84  * One possible way is to change the type event_object to nativeint and use
     85  * [Gc.finalise] to attach a [free_event_object] to the returned event_object
     86  * after every successful [port_associate].
     87  *
     88  * The other is to in C stub wrap the event object in a Custom block with
     89  * [free_event_object] as the finalizer. Ultimately, this could be the safest
     90  * solution, as in addition to enabling GC on event_objects, it makes it
     91  * possible to prevent use-after-free by setting the value to NULL after free.
     92  *)
     93 
     94 type event_object = int
     95 
     96 let string_of_eo eo = Format.sprintf "%#x" (eo * 2)
     97 
     98 type assocs = (event_object, string) Hashtbl.t
     99 
    100 type watch_t = (port, assocs) Hashtbl.t * bool
    101 
    102 module M = Watchercommon.F (struct type watch = watch_t end)
    103 include M
    104 
    105 (****)
    106 
    107 module Solaris = struct
    108 
    109 let clear_event_memory () = ()
    110 
    111 (****)
    112 
    113 type cookie = int
    114 
    115 type fen_event =
    116   | FILE_ACCESS | FILE_MODIFIED | FILE_ATTRIB | FILE_DELETE | FILE_RENAME_TO
    117   | FILE_RENAME_FROM | FILE_TRUNC | FILE_NOFOLLOW | UNMOUNTED | MOUNTEDOVER
    118 
    119 let print_event ev =
    120   let print_ev ev =
    121     let s = match ev with
    122     | FILE_ACCESS -> "FILE_ACCESS"
    123     | FILE_MODIFIED -> "FILE_MODIFIED"
    124     | FILE_ATTRIB -> "FILE_ATTRIB"
    125     | FILE_DELETE -> "FILE_DELETE"
    126     | FILE_RENAME_TO -> "FILE_RENAME_TO"
    127     | FILE_RENAME_FROM -> "FILE_RENAME_FROM"
    128     | FILE_TRUNC -> "FILE_TRUNC"
    129     | FILE_NOFOLLOW -> "FILE_NOFOLLOW"
    130     | UNMOUNTED -> "UNMOUNTED"
    131     | MOUNTEDOVER -> "MOUNTEDOVER"
    132     in
    133     Format.eprintf "%s " s
    134   in
    135   List.iter print_ev ev;
    136   Format.eprintf "@."
    137 
    138 let event_kind =
    139   let kind = function
    140   | FILE_ACCESS -> `OTHER
    141   | FILE_MODIFIED -> `MODIF
    142   | FILE_ATTRIB -> `MODIF
    143   | FILE_DELETE -> `DEL
    144   | FILE_RENAME_TO -> `CREAT
    145   | FILE_RENAME_FROM -> `DEL
    146   | FILE_TRUNC -> `MODIF
    147   | FILE_NOFOLLOW -> `OTHER
    148   | UNMOUNTED -> `OTHER
    149   | MOUNTEDOVER -> `OTHER
    150   in
    151   List.fold_left (fun k v -> if k = `OTHER then kind v else k) `OTHER
    152 
    153 (****)
    154 
    155 external port_create : unit -> port = "unsn_port_create"
    156 external port_close : port -> unit = "unsn_port_close"
    157 external port_associate : port -> string -> bool -> cookie -> event_object = "unsn_port_associate"
    158 external port_reassociate : port -> event_object -> bool -> bool = "unsn_port_reassociate"
    159 external port_dissociate : port -> event_object -> unit = "unsn_port_dissociate"
    160 external port_get : port -> (port * event_object * cookie * (fen_event list)) list = "unsn_port_get"
    161 external free_event_object : event_object -> unit = "unsn_free_event_object"
    162 
    163 (****)
    164 
    165 let max_ev_per_port = 65000 (* A safe max. The OS limit should be at 64k. *)
    166      (* The number of ports is limited at 8k per process, so not a worry. *)
    167 
    168 let ports = ref []
    169 
    170 let allocate_port () =
    171   let avail_port, _ =
    172     try
    173       List.find (fun (_, count) -> count < max_ev_per_port) !ports
    174     with Not_found ->
    175       let p = port_create (), 0 in ports := p :: !ports; p
    176   in
    177   ports := List.map (fun p' ->
    178     let port, count = p' in
    179     if port <> avail_port then
    180       p'
    181     else
    182       port, count + 1
    183   ) !ports;
    184   avail_port
    185 
    186 let release_port p =
    187   ports := List.fold_left (fun nl p' ->
    188     let port, count = p' in
    189     if p <> port then
    190       p' :: nl
    191     else begin
    192       if count > 1 then
    193         (port, count - 1) :: nl
    194       else begin
    195         let () = port_close port in
    196         nl
    197       end
    198     end
    199   ) [] !ports
    200 
    201 (****)
    202 
    203 let is_directory path follow =
    204   let st = match follow with
    205   | false -> Unix.lstat path
    206   | true -> begin
    207       try
    208         Unix.stat path
    209       with Unix.Unix_error ((Unix.ENOENT | Unix.ENOTDIR), _, _) ->
    210         Watchercommon.error (Format.sprintf
    211           "Unable to follow link '%s' because its target is missing" path)
    212     end
    213   in
    214   Unix.S_DIR = st.st_kind
    215 
    216 let associate is_child wh id follow absname name =
    217   if not is_child || not (is_directory absname follow) then begin
    218     let port = allocate_port () in
    219     let wh_p = try Hashtbl.find wh port with Not_found ->
    220       let wh_p = Hashtbl.create (if is_child then 1 else 1024) in
    221       Hashtbl.add wh port wh_p;
    222       wh_p
    223     in
    224     let eo = port_associate port absname follow id in
    225     Hashtbl.add wh_p eo name
    226   end
    227 
    228 let add_watch_children path assoc_f =
    229   let rec loop dir =
    230     match Unix.readdir dir with
    231     | exception End_of_file -> ()
    232     | "." | ".." -> loop dir
    233     | name ->
    234         let () = assoc_f name in
    235         loop dir
    236   in
    237   let dir = Unix.opendir path in
    238   try
    239     let () = loop dir in
    240     Unix.closedir dir
    241   with Unix.Unix_error _ as e ->
    242     begin try
    243       Unix.closedir dir
    244     with Unix.Unix_error _ -> () end;
    245     raise e
    246 
    247 let rec add_watch path file follow =
    248   match get_watch file with
    249   | Some (_, follow') when follow = follow' ->
    250       ()
    251   | Some _ ->
    252       release_watch file;
    253       add_watch path file follow
    254   | None ->
    255       let id = get_id file
    256       and wh = Hashtbl.create 1 in
    257       let () = set_watch file (Some (wh, follow)) in
    258       try
    259         let () = associate false wh id follow path "" in
    260         if is_directory path follow then add_watch_children path
    261           (fun nm -> associate true wh id follow (Filename.concat path nm) nm)
    262       with
    263       | Unix.Unix_error (ENOENT, _, _) ->
    264           raise Watchercommon.Already_lost
    265       | Unix.Unix_error (EACCES, _, _)
    266       | Unix.Unix_error (ENOTDIR, _, _)
    267       | Unix.Unix_error (ELOOP, _, _) ->
    268           (* These are handled well by Unison *)
    269           ()
    270       | Unix.Unix_error _ as e ->
    271           Watchercommon.error
    272             (Format.sprintf
    273               "Error while starting to watch for changes: [%s] %s"
    274               path (Watchercommon.format_exc e))
    275 
    276 and release_watch file =
    277   match get_watch file with
    278   | None -> ()
    279   | Some (wh, _) ->
    280       set_watch file None;
    281       let unwatch port eo name =
    282         port_dissociate port eo;
    283         free_event_object eo;
    284         release_port port
    285       in
    286       Hashtbl.iter (fun port wh_p -> Hashtbl.iter (unwatch port) wh_p) wh
    287 
    288 (* Once an event is delivered, the FEN automatically dissociates the object.
    289  *
    290  * The object must be re-associated in the following cases:
    291  *  - It was not requested by [add_watch] but was implicitly added by
    292  *    [add_watch_children]. In other words, the name is not "".
    293  *
    294  * When the object is not to be re-associated or re-association did not
    295  * succeed then the following must be done:
    296  *  - The associated port must be released.
    297  *  - The event object must be freed and then discarded (event object must
    298  *    no longer be referenced or used in any way).
    299  *  - The watch must be released completely by calling [release_watch],
    300  *    even if it was an implicitly added child that failed re-association.
    301  *
    302  * Unison and Watchercommon will associate the path again if and when needed.
    303  *
    304  * This releasing and associating can potentially be terrible for performance
    305  * on large directories (with several tens or hundreds of thousands of files)
    306  * but it is the easiest way to guarantee that all children in a directory are
    307  * watched.
    308  *)
    309 let cleanup_watch file name port eo id ev =
    310   match get_watch file with
    311   | None -> ()
    312   | Some (wh, follow) ->
    313       let reassoc =
    314         try
    315           let wh_p = Hashtbl.find wh port in
    316           let r =
    317             match name with
    318             | "" -> false
    319             | _ -> port_reassociate port eo follow
    320           in
    321           if not r then begin
    322             Hashtbl.remove wh_p eo;
    323             free_event_object eo;
    324             release_port port
    325           end;
    326           r
    327         with Not_found -> false
    328       in
    329       if not reassoc then release_watch file
    330       (* [release_watch] here is safe because even if some events within the
    331        * watch may not have been processed yet, all event objects in a watch
    332        * will be dissociated, freed and the entire watch discarded.
    333        *
    334        * Dissocating an already dissociated object is a noop.
    335        *
    336        * Since the watch is discarded, there will not be any use-after-free
    337        * or double free possible as event objects are always looked up from
    338        * a watch before any processing. *)
    339 
    340 let process_ev time ((file, name), (port, eo, id, ev)) =
    341   if !Watchercommon.debug then begin
    342     Format.eprintf " %i: [%s] %s \"%s\": " port (string_of_eo eo)
    343       (dir_path file "") name;
    344     print_event ev
    345   end;
    346   let () = cleanup_watch file name port eo id ev in
    347   let name = match name with
    348   | "" -> None
    349   | _ -> Some name
    350   in
    351   signal_change time file name (event_kind ev)
    352 
    353 (* Always process events on children first and on parents last because
    354  * the cleanup procedure clears out children together with the parent. *)
    355 let compare_event e e' =
    356   match e, e' with
    357   | ((_, ""), _), ((_, ""), _) -> 0
    358   | ((_, ""), _), ((_, n), _) -> 1
    359   | ((_, n), _), ((_, ""), _) -> -1
    360   | ((_, n), _), ((_, n'), _) -> 0
    361 
    362 let process_ev_list ev_list =
    363   let time = Unix.gettimeofday () in
    364   let ev_list = List.fold_left
    365     (fun k ((port, eo, id, _) as o) ->
    366       try
    367         let file = Hashtbl.find file_by_id id in
    368         match get_watch file with
    369         | None ->
    370             k
    371         | Some (wh, _) ->
    372             let wh_p = Hashtbl.find wh port in
    373             let name = Hashtbl.find wh_p eo in
    374             ((file, name), o) :: k
    375       with Not_found ->
    376         k
    377     ) [] ev_list
    378   in
    379   let ev_list = List.sort compare_event ev_list in
    380   List.iter (process_ev time) ev_list
    381 
    382 let rec read_events () =
    383   (* FIXME: List.concat_map is available since OCaml 4.10.0 *)
    384   let ev_list = List.map (fun (port, _) -> port_get port) !ports in
    385   let ev_list = Safelist.concat ev_list in
    386   if List.length ev_list > 0 then
    387     Lwt_unix.yield () >>= fun () ->
    388     Lwt.return ev_list
    389   else
    390     Lwt_unix.sleep 1.5 >>=
    391     read_events
    392 
    393 let watch () =
    394   let rec watch_rec () =
    395     read_events () >>= fun ev_list ->
    396     let () = process_ev_list ev_list in
    397     watch_rec ()
    398   in
    399   ignore
    400     (Lwt.catch watch_rec
    401        (fun e ->
    402           Watchercommon.error
    403             ("error while handling events: " ^ Watchercommon.format_exc e)))
    404 
    405 end
    406 
    407 (****)
    408 
    409 include F(Solaris)