watcher.ml (13843B)
1 (* Unison file synchronizer: src/fsmonitor/solaris/watcher.ml *) 2 (* Copyright 2021, Tõivo Leedjärv 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 *) 17 18 (* A brief overview of the File Event Notification (FEN) interface 19 * 20 * Events are delivered via ports. A port is created by [port_create]. 21 * Each file or directory to be watched must be individually associated 22 * with the port by [port_associate]. 23 * 24 * When associating an object with a port, the stat times of the object 25 * can be passed in. During association, the FEN system will compare these 26 * times with their current values to detect if there has been a change 27 * between the stat() call and the association. If yes, then an event is 28 * delivered immediately. If the times have not changed, or they are all 29 * passed in as zero, then the next event is monitored and delivered when 30 * it occurs. 31 * 32 * Objects are associated and events monitored per vnode. It is due to this 33 * approach that all files must be watched individually. For example, while 34 * adding and deleting a file will be detected as a modification event on the 35 * parent directory, modifying an existing file within the same directory 36 * will not be detected by watching the directory. 37 * 38 * For each association, only one event is delivered. The object is then 39 * automatically dissociated (equivalent of [port_dissociate]) and must 40 * be associated again to receive further events. 41 * 42 * Events are polled and retrieved by [port_get]. 43 * 44 * A port has a limit to the number of objects associated with it. As all 45 * files must be watched individually, the number of objects can grow very 46 * large. This implementation maintains a pool of ports and automatically 47 * creates new ports as needed, and closes ports that are no longer needed. 48 *) 49 50 let (>>=) = Lwt.bind 51 52 let () = Gc.set { (Gc.get()) with space_overhead = 40 } 53 54 (****) 55 56 type port = int 57 58 (* event_objects are allocated in C heap and are not GC'd by OCaml. Care must 59 * be taken to free each event_object explicitly and not use them after having 60 * been freed. 61 * 62 * event_objects have released their backing system resources and must be freed 63 * with [free_event_object] in the following cases: 64 * - after [port_get] when having received an event and event_object is not 65 * re-associated, and 66 * - after calling [port_dissociate] (exceptions here must be fatal or 67 * ensure that [free_event_object] still gets called). 68 * 69 * Current implementation builds on the following assumption: 70 * - A Watchercommon watch (= value that keeps track of live event_objects) is 71 * not discarded without explicitly closing it. 72 * 73 * All watches are closed explicitly by either [release_watch] or 74 * in [cleanup_watch]. These functions take care of releasing the resources 75 * if needed and freeing the objects properly after they've been released. 76 * This also means that exceptions must be fatal or take care not to discard 77 * event_objects without properly releasing and freeing them. 78 * 79 * If the above assumption changes or can no longer be guaranteed then the 80 * implementation may have to be changed. There are a few ways to make 81 * event_objects GC'd. All of these solutions carry a rather high memory 82 * overhead as FEN requires each file to be monitored individually. 83 * 84 * One possible way is to change the type event_object to nativeint and use 85 * [Gc.finalise] to attach a [free_event_object] to the returned event_object 86 * after every successful [port_associate]. 87 * 88 * The other is to in C stub wrap the event object in a Custom block with 89 * [free_event_object] as the finalizer. Ultimately, this could be the safest 90 * solution, as in addition to enabling GC on event_objects, it makes it 91 * possible to prevent use-after-free by setting the value to NULL after free. 92 *) 93 94 type event_object = int 95 96 let string_of_eo eo = Format.sprintf "%#x" (eo * 2) 97 98 type assocs = (event_object, string) Hashtbl.t 99 100 type watch_t = (port, assocs) Hashtbl.t * bool 101 102 module M = Watchercommon.F (struct type watch = watch_t end) 103 include M 104 105 (****) 106 107 module Solaris = struct 108 109 let clear_event_memory () = () 110 111 (****) 112 113 type cookie = int 114 115 type fen_event = 116 | FILE_ACCESS | FILE_MODIFIED | FILE_ATTRIB | FILE_DELETE | FILE_RENAME_TO 117 | FILE_RENAME_FROM | FILE_TRUNC | FILE_NOFOLLOW | UNMOUNTED | MOUNTEDOVER 118 119 let print_event ev = 120 let print_ev ev = 121 let s = match ev with 122 | FILE_ACCESS -> "FILE_ACCESS" 123 | FILE_MODIFIED -> "FILE_MODIFIED" 124 | FILE_ATTRIB -> "FILE_ATTRIB" 125 | FILE_DELETE -> "FILE_DELETE" 126 | FILE_RENAME_TO -> "FILE_RENAME_TO" 127 | FILE_RENAME_FROM -> "FILE_RENAME_FROM" 128 | FILE_TRUNC -> "FILE_TRUNC" 129 | FILE_NOFOLLOW -> "FILE_NOFOLLOW" 130 | UNMOUNTED -> "UNMOUNTED" 131 | MOUNTEDOVER -> "MOUNTEDOVER" 132 in 133 Format.eprintf "%s " s 134 in 135 List.iter print_ev ev; 136 Format.eprintf "@." 137 138 let event_kind = 139 let kind = function 140 | FILE_ACCESS -> `OTHER 141 | FILE_MODIFIED -> `MODIF 142 | FILE_ATTRIB -> `MODIF 143 | FILE_DELETE -> `DEL 144 | FILE_RENAME_TO -> `CREAT 145 | FILE_RENAME_FROM -> `DEL 146 | FILE_TRUNC -> `MODIF 147 | FILE_NOFOLLOW -> `OTHER 148 | UNMOUNTED -> `OTHER 149 | MOUNTEDOVER -> `OTHER 150 in 151 List.fold_left (fun k v -> if k = `OTHER then kind v else k) `OTHER 152 153 (****) 154 155 external port_create : unit -> port = "unsn_port_create" 156 external port_close : port -> unit = "unsn_port_close" 157 external port_associate : port -> string -> bool -> cookie -> event_object = "unsn_port_associate" 158 external port_reassociate : port -> event_object -> bool -> bool = "unsn_port_reassociate" 159 external port_dissociate : port -> event_object -> unit = "unsn_port_dissociate" 160 external port_get : port -> (port * event_object * cookie * (fen_event list)) list = "unsn_port_get" 161 external free_event_object : event_object -> unit = "unsn_free_event_object" 162 163 (****) 164 165 let max_ev_per_port = 65000 (* A safe max. The OS limit should be at 64k. *) 166 (* The number of ports is limited at 8k per process, so not a worry. *) 167 168 let ports = ref [] 169 170 let allocate_port () = 171 let avail_port, _ = 172 try 173 List.find (fun (_, count) -> count < max_ev_per_port) !ports 174 with Not_found -> 175 let p = port_create (), 0 in ports := p :: !ports; p 176 in 177 ports := List.map (fun p' -> 178 let port, count = p' in 179 if port <> avail_port then 180 p' 181 else 182 port, count + 1 183 ) !ports; 184 avail_port 185 186 let release_port p = 187 ports := List.fold_left (fun nl p' -> 188 let port, count = p' in 189 if p <> port then 190 p' :: nl 191 else begin 192 if count > 1 then 193 (port, count - 1) :: nl 194 else begin 195 let () = port_close port in 196 nl 197 end 198 end 199 ) [] !ports 200 201 (****) 202 203 let is_directory path follow = 204 let st = match follow with 205 | false -> Unix.lstat path 206 | true -> begin 207 try 208 Unix.stat path 209 with Unix.Unix_error ((Unix.ENOENT | Unix.ENOTDIR), _, _) -> 210 Watchercommon.error (Format.sprintf 211 "Unable to follow link '%s' because its target is missing" path) 212 end 213 in 214 Unix.S_DIR = st.st_kind 215 216 let associate is_child wh id follow absname name = 217 if not is_child || not (is_directory absname follow) then begin 218 let port = allocate_port () in 219 let wh_p = try Hashtbl.find wh port with Not_found -> 220 let wh_p = Hashtbl.create (if is_child then 1 else 1024) in 221 Hashtbl.add wh port wh_p; 222 wh_p 223 in 224 let eo = port_associate port absname follow id in 225 Hashtbl.add wh_p eo name 226 end 227 228 let add_watch_children path assoc_f = 229 let rec loop dir = 230 match Unix.readdir dir with 231 | exception End_of_file -> () 232 | "." | ".." -> loop dir 233 | name -> 234 let () = assoc_f name in 235 loop dir 236 in 237 let dir = Unix.opendir path in 238 try 239 let () = loop dir in 240 Unix.closedir dir 241 with Unix.Unix_error _ as e -> 242 begin try 243 Unix.closedir dir 244 with Unix.Unix_error _ -> () end; 245 raise e 246 247 let rec add_watch path file follow = 248 match get_watch file with 249 | Some (_, follow') when follow = follow' -> 250 () 251 | Some _ -> 252 release_watch file; 253 add_watch path file follow 254 | None -> 255 let id = get_id file 256 and wh = Hashtbl.create 1 in 257 let () = set_watch file (Some (wh, follow)) in 258 try 259 let () = associate false wh id follow path "" in 260 if is_directory path follow then add_watch_children path 261 (fun nm -> associate true wh id follow (Filename.concat path nm) nm) 262 with 263 | Unix.Unix_error (ENOENT, _, _) -> 264 raise Watchercommon.Already_lost 265 | Unix.Unix_error (EACCES, _, _) 266 | Unix.Unix_error (ENOTDIR, _, _) 267 | Unix.Unix_error (ELOOP, _, _) -> 268 (* These are handled well by Unison *) 269 () 270 | Unix.Unix_error _ as e -> 271 Watchercommon.error 272 (Format.sprintf 273 "Error while starting to watch for changes: [%s] %s" 274 path (Watchercommon.format_exc e)) 275 276 and release_watch file = 277 match get_watch file with 278 | None -> () 279 | Some (wh, _) -> 280 set_watch file None; 281 let unwatch port eo name = 282 port_dissociate port eo; 283 free_event_object eo; 284 release_port port 285 in 286 Hashtbl.iter (fun port wh_p -> Hashtbl.iter (unwatch port) wh_p) wh 287 288 (* Once an event is delivered, the FEN automatically dissociates the object. 289 * 290 * The object must be re-associated in the following cases: 291 * - It was not requested by [add_watch] but was implicitly added by 292 * [add_watch_children]. In other words, the name is not "". 293 * 294 * When the object is not to be re-associated or re-association did not 295 * succeed then the following must be done: 296 * - The associated port must be released. 297 * - The event object must be freed and then discarded (event object must 298 * no longer be referenced or used in any way). 299 * - The watch must be released completely by calling [release_watch], 300 * even if it was an implicitly added child that failed re-association. 301 * 302 * Unison and Watchercommon will associate the path again if and when needed. 303 * 304 * This releasing and associating can potentially be terrible for performance 305 * on large directories (with several tens or hundreds of thousands of files) 306 * but it is the easiest way to guarantee that all children in a directory are 307 * watched. 308 *) 309 let cleanup_watch file name port eo id ev = 310 match get_watch file with 311 | None -> () 312 | Some (wh, follow) -> 313 let reassoc = 314 try 315 let wh_p = Hashtbl.find wh port in 316 let r = 317 match name with 318 | "" -> false 319 | _ -> port_reassociate port eo follow 320 in 321 if not r then begin 322 Hashtbl.remove wh_p eo; 323 free_event_object eo; 324 release_port port 325 end; 326 r 327 with Not_found -> false 328 in 329 if not reassoc then release_watch file 330 (* [release_watch] here is safe because even if some events within the 331 * watch may not have been processed yet, all event objects in a watch 332 * will be dissociated, freed and the entire watch discarded. 333 * 334 * Dissocating an already dissociated object is a noop. 335 * 336 * Since the watch is discarded, there will not be any use-after-free 337 * or double free possible as event objects are always looked up from 338 * a watch before any processing. *) 339 340 let process_ev time ((file, name), (port, eo, id, ev)) = 341 if !Watchercommon.debug then begin 342 Format.eprintf " %i: [%s] %s \"%s\": " port (string_of_eo eo) 343 (dir_path file "") name; 344 print_event ev 345 end; 346 let () = cleanup_watch file name port eo id ev in 347 let name = match name with 348 | "" -> None 349 | _ -> Some name 350 in 351 signal_change time file name (event_kind ev) 352 353 (* Always process events on children first and on parents last because 354 * the cleanup procedure clears out children together with the parent. *) 355 let compare_event e e' = 356 match e, e' with 357 | ((_, ""), _), ((_, ""), _) -> 0 358 | ((_, ""), _), ((_, n), _) -> 1 359 | ((_, n), _), ((_, ""), _) -> -1 360 | ((_, n), _), ((_, n'), _) -> 0 361 362 let process_ev_list ev_list = 363 let time = Unix.gettimeofday () in 364 let ev_list = List.fold_left 365 (fun k ((port, eo, id, _) as o) -> 366 try 367 let file = Hashtbl.find file_by_id id in 368 match get_watch file with 369 | None -> 370 k 371 | Some (wh, _) -> 372 let wh_p = Hashtbl.find wh port in 373 let name = Hashtbl.find wh_p eo in 374 ((file, name), o) :: k 375 with Not_found -> 376 k 377 ) [] ev_list 378 in 379 let ev_list = List.sort compare_event ev_list in 380 List.iter (process_ev time) ev_list 381 382 let rec read_events () = 383 (* FIXME: List.concat_map is available since OCaml 4.10.0 *) 384 let ev_list = List.map (fun (port, _) -> port_get port) !ports in 385 let ev_list = Safelist.concat ev_list in 386 if List.length ev_list > 0 then 387 Lwt_unix.yield () >>= fun () -> 388 Lwt.return ev_list 389 else 390 Lwt_unix.sleep 1.5 >>= 391 read_events 392 393 let watch () = 394 let rec watch_rec () = 395 read_events () >>= fun ev_list -> 396 let () = process_ev_list ev_list in 397 watch_rec () 398 in 399 ignore 400 (Lwt.catch watch_rec 401 (fun e -> 402 Watchercommon.error 403 ("error while handling events: " ^ Watchercommon.format_exc e))) 404 405 end 406 407 (****) 408 409 include F(Solaris)