fpcache.ml (11026B)
1 (* Unison file synchronizer: src/fpcache.ml *) 2 (* Copyright 1999-2020, Benjamin C. Pierce 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 *) 17 18 let debug = Trace.debug "fpcache" 19 20 (* In-memory cache *) 21 22 module PathTbl = 23 Hashtbl.Make 24 (struct 25 type t = string 26 let equal (s1 : string) (s2 : string) = s1 = s2 27 let hash = Hashtbl.hash 28 end) 29 30 let tbl = PathTbl.create 101 31 32 (* Information for writing to the on-disk cache *) 33 34 type entry = 35 int * string * (Props.t * Os.fullfingerprint * Fileinfo.stamp * Osx.ressStamp) 36 37 let mentry = Umarshal.(prod3 int string 38 (prod4 Props.m Os.mfullfingerprint Fileinfo.mstamp Osx.mressStamp id id) 39 id id) 40 41 let mentry_list = Umarshal.list mentry 42 43 type state = 44 { oc : out_channel; 45 mutable count : int; 46 mutable size : Uutil.Filesize.t; 47 mutable last : string; 48 mutable queue : entry list } 49 50 let state = ref None 51 52 (****) 53 54 (* Path compression and decompression (use delta from previous path for 55 compression) *) 56 57 let decompress st i path = 58 let l = String.length path in 59 let s = Bytes.create (l + i) in 60 String.blit !st 0 s 0 i; 61 String.blit path 0 s i l; 62 let s = Bytes.to_string s in 63 st := s; 64 s 65 66 let compress state path = 67 let s = state.last in 68 let p = Path.toString path in 69 let l = min (String.length p) (String.length s) in 70 let i = ref 0 in 71 while !i < l && p.[!i] = s.[!i] do incr i done; 72 state.last <- p; 73 (!i, String.sub p !i (String.length p - !i)) 74 75 (*****) 76 77 (* Read and write a chunk of file fingerprints from the cache *) 78 79 let read st ic = 80 (* I/O errors are dealt with at a higher level *) 81 let fp1 = Digest.input ic in 82 let fp2 = Digest.input ic in 83 let headerSize = Umarshal.header_size in 84 let header = Bytes.create headerSize in 85 really_input ic header 0 headerSize; 86 if fp1 <> Digest.bytes header then begin 87 debug (fun () -> Util.msg "bad header checksum\n"); 88 raise End_of_file 89 end; 90 let dataSize = Umarshal.data_size header 0 in 91 let s = Bytes.create (headerSize + dataSize) in 92 Bytes.blit header 0 s 0 headerSize; 93 really_input ic s headerSize dataSize; 94 if fp2 <> Digest.bytes s then begin 95 debug (fun () -> Util.msg "bad chunk checksum\n"); 96 raise End_of_file 97 end; 98 let q = 99 try Umarshal.from_bytes mentry_list s 0 with 100 | Umarshal.Error _ -> 101 debug (fun () -> Util.msg ("Umarshal error when reading from file, " 102 ^^ "ignoring and continuing\n")); 103 [] 104 in 105 debug (fun () -> Util.msg "read chunk of %d files\n" (List.length q)); 106 List.iter (fun (l, p, i) -> PathTbl.add tbl (decompress st l p) i) q 107 108 let closeOut st = 109 state := None; 110 try 111 close_out st.oc 112 with Sys_error error -> 113 debug (fun () -> Util.msg "error in closing cache file: %s\n" error) 114 115 let write state = 116 let q = Safelist.rev state.queue in 117 let s = Umarshal.to_string mentry_list q in 118 let fp1 = Digest.substring s 0 Umarshal.header_size in 119 let fp2 = Digest.string s in 120 begin try 121 Digest.output state.oc fp1; Digest.output state.oc fp2; 122 output_string state.oc s; flush state.oc 123 with Sys_error error -> 124 debug (fun () -> Util.msg "error in writing to cache file: %s\n" error); 125 closeOut state 126 end; 127 state.count <- 0; 128 state.size <- Uutil.Filesize.zero; 129 state.queue <- [] 130 131 (****) 132 133 (* Start and finish dealing with the cache *) 134 135 let finish () = 136 PathTbl.clear tbl; 137 match !state with 138 Some st -> if st.queue <> [] then write st; 139 closeOut st 140 | None -> () 141 142 let magic = "Unison fingerprint cache format 3" 143 144 let init fastCheck ignorearchives fspath = 145 finish (); 146 if fastCheck && not ignorearchives then begin 147 begin try 148 debug (fun () -> Util.msg "opening cache file %s for input\n" 149 (System.fspathToDebugString fspath)); 150 let ic = System.open_in_bin fspath in 151 begin try 152 let header = input_line ic in 153 if header <> magic then raise (Sys_error "wrong header"); 154 let st = ref "" in 155 while true do read st ic done 156 with 157 Sys_error error -> 158 debug (fun () -> Util.msg "error in loading cache file %s: %s\n" 159 (System.fspathToDebugString fspath) error) 160 | End_of_file -> 161 () 162 end; 163 begin try 164 close_in ic 165 with Sys_error error -> 166 debug (fun () -> Util.msg "error in closing cache file %s: %s\n" 167 (System.fspathToDebugString fspath) error) 168 end; 169 with Sys_error error -> 170 debug (fun () -> Util.msg "could not open cache file %s: %s\n" 171 (System.fspathToDebugString fspath) error) 172 end; 173 begin try 174 debug (fun () -> Util.msg "opening cache file %s for output\n" 175 (System.fspathToDebugString fspath)); 176 let oc = 177 System.open_out_gen 178 [Open_wronly; Open_creat; Open_trunc; Open_binary] 0o600 fspath in 179 output_string oc magic; output_string oc "\n"; flush oc; 180 state := 181 Some { oc = oc; count = 0; size = Uutil.Filesize.zero; 182 last = ""; queue = [] } 183 with Sys_error error -> 184 debug (fun () -> Util.msg "could not open cache file %s: %s\n" 185 (System.fspathToDebugString fspath) error) 186 end 187 end 188 189 (****) 190 191 (* Enqueue a fingerprint to be written to disk. *) 192 193 let maxCount = 5000 194 let maxSize = Uutil.Filesize.ofInt (100 * 1024 * 1024) 195 196 let save path v = 197 match !state with 198 None -> 199 () 200 | Some state -> 201 let (desc, _, _, _) = v in 202 let l = Props.length desc in 203 state.size <- Uutil.Filesize.add state.size l; 204 state.count <- state.count + 1; 205 let (l, s) = compress state path in 206 state.queue <- (l, s, v) :: state.queue; 207 if state.count > maxCount || state.size > maxSize then write state 208 209 (****) 210 211 (* Check whether a fingerprint is in the in-memory cache and store it 212 to the on-disk cache in any case. *) 213 214 (* HACK: we disable fastcheck for Excel (and MPP) files, as Excel 215 sometimes modifies a file without updating the time stamp. *) 216 let excelFile path = 217 let s = Path.toString path in 218 Util.endswith s ".xls" 219 || Util.endswith s ".mpp" 220 221 let dataClearlyUnchanged fastCheck path info desc stamp = 222 fastCheck 223 && 224 Props.same_time info.Fileinfo.desc desc 225 && 226 Props.length info.Fileinfo.desc = Props.length desc 227 && 228 not (excelFile path) 229 && 230 match stamp with 231 Fileinfo.InodeStamp inode -> 232 info.Fileinfo.inode = inode 233 | Fileinfo.NoStamp -> 234 true 235 | Fileinfo.RescanStamp -> 236 false 237 238 let ressClearlyUnchanged fastCheck info ress dataClearlyUnchanged = 239 fastCheck 240 && 241 Osx.ressUnchanged ress info.Fileinfo.osX.Osx.ressInfo 242 None dataClearlyUnchanged 243 244 let clearlyUnchanged fastCheck path newInfo oldDesc oldStamp oldRess = 245 let du = 246 dataClearlyUnchanged fastCheck path newInfo oldDesc oldStamp 247 in 248 du && ressClearlyUnchanged fastCheck newInfo oldRess du 249 250 let fastercheckUNSAFE = 251 Prefs.createBool "fastercheckUNSAFE" false 252 ~category:`Expert 253 "skip computing fingerprints for new files (experts only!)" 254 ( "THIS FEATURE IS STILL EXPERIMENTAL AND SHOULD BE USED WITH EXTREME CAUTION. " 255 ^ "\n\n" 256 ^ "When this flag is set to {\\tt true}, Unison will compute a 'pseudo-" 257 ^ "fingerprint' the first time it sees a file (either because the file is " 258 ^ "new or because Unison is running for the first time). This enormously " 259 ^ "speeds update detection, but it must be used with care, as it can cause " 260 ^ "Unison to miss conflicts: If " 261 ^ "a given path in the filesystem contains files on {\\em both} sides that " 262 ^ "Unison has not yet seen, and if those files have the same length but different " 263 ^ "contents, then Unison will not notice the presence of a conflict. If, later, one " 264 ^ "of the files is changed, the changed file will be propagated, overwriting " 265 ^ "the other. " 266 ^ "\n\n" 267 ^ "Moreover, even when the files are initially identical, setting this flag can lead " 268 ^ "to potentially confusing behavior: " 269 ^ "if a newly created file is later touched without being modified, Unison will " 270 ^ "treat this " 271 ^ "conservatively as a potential change (since it has no record of the earlier " 272 ^ "contents) and show it as needing to be propagated to the other replica. " 273 ^ "\n\n" 274 ^ "Most users should leave this flag off -- the small time savings of not " 275 ^ "fingerprinting new files is not worth the cost in terms of safety. However, " 276 ^ "it can be very useful for power users with huge replicas that are known to " 277 ^ "be already synchronized (e.g., because one replica is a newly created duplicate " 278 ^ "of the other, or because they have previously been synchronized with Unison but " 279 ^ "Unison's archives need to be rebuilt). In such situations, it is recommended " 280 ^ "that this flag be set only for the initial run of Unison, so that new archives " 281 ^ "can be created quickly, and then turned off for normal use.") 282 283 let fingerprint ?(newfile=false) fastCheck currfspath path info optFp = 284 let res = 285 try 286 let (cachedDesc, cachedFp, cachedStamp, cachedRess) = 287 PathTbl.find tbl (Path.toString path) in 288 if 289 not (clearlyUnchanged 290 fastCheck path info cachedDesc cachedStamp cachedRess) 291 then 292 raise Not_found; 293 debug (fun () -> Util.msg "cache hit for path %s\n" 294 (Path.toDebugString path)); 295 (info.Fileinfo.desc, cachedFp, Fileinfo.stamp info, 296 Fileinfo.ressStamp info) 297 with Not_found -> 298 if fastCheck then 299 debug (fun () -> Util.msg "cache miss for path %s\n" 300 (Path.toDebugString path)); 301 let (info, dig) = 302 if Prefs.read fastercheckUNSAFE && newfile then begin 303 debug (fun()-> Util.msg "skipping initial fingerprint of %s\n" 304 (Fspath.toDebugString (Fspath.concat currfspath path))); 305 (Fileinfo.get ~archProps:info.desc false currfspath path, 306 Os.pseudoFingerprint path (Props.length info.Fileinfo.desc)) 307 end else begin 308 Os.safeFingerprint currfspath path info optFp 309 end in 310 (info.Fileinfo.desc, dig, Fileinfo.stamp info, Fileinfo.ressStamp info) 311 in 312 save path res; 313 res