unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

fpcache.ml (11026B)


      1 (* Unison file synchronizer: src/fpcache.ml *)
      2 (* Copyright 1999-2020, Benjamin C. Pierce
      3 
      4     This program is free software: you can redistribute it and/or modify
      5     it under the terms of the GNU General Public License as published by
      6     the Free Software Foundation, either version 3 of the License, or
      7     (at your option) any later version.
      8 
      9     This program is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12     GNU General Public License for more details.
     13 
     14     You should have received a copy of the GNU General Public License
     15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16 *)
     17 
     18 let debug = Trace.debug "fpcache"
     19 
     20 (* In-memory cache *)
     21 
     22 module PathTbl =
     23   Hashtbl.Make
     24     (struct
     25        type t = string
     26        let equal (s1 : string) (s2 : string) = s1 = s2
     27        let hash = Hashtbl.hash
     28      end)
     29 
     30 let tbl = PathTbl.create 101
     31 
     32 (* Information for writing to the on-disk cache *)
     33 
     34 type entry =
     35   int * string * (Props.t * Os.fullfingerprint * Fileinfo.stamp * Osx.ressStamp)
     36 
     37 let mentry = Umarshal.(prod3 int string
     38                          (prod4 Props.m Os.mfullfingerprint Fileinfo.mstamp Osx.mressStamp id id)
     39                          id id)
     40 
     41 let mentry_list = Umarshal.list mentry
     42 
     43 type state =
     44   { oc : out_channel;
     45     mutable count : int;
     46     mutable size : Uutil.Filesize.t;
     47     mutable last : string;
     48     mutable queue : entry list }
     49 
     50 let state = ref None
     51 
     52 (****)
     53 
     54 (* Path compression and decompression (use delta from previous path for
     55    compression) *)
     56 
     57 let decompress st i path =
     58   let l = String.length path in
     59   let s = Bytes.create (l + i) in
     60   String.blit !st 0 s 0 i;
     61   String.blit path 0 s i l;
     62   let s = Bytes.to_string s in
     63   st := s;
     64   s
     65 
     66 let compress state path =
     67   let s = state.last in
     68   let p = Path.toString path in
     69   let l = min (String.length p) (String.length s) in
     70   let i = ref 0 in
     71   while !i < l && p.[!i] = s.[!i] do incr i done;
     72   state.last <- p;
     73   (!i, String.sub p !i (String.length p - !i))
     74 
     75 (*****)
     76 
     77 (* Read and write a chunk of file fingerprints from the cache *)
     78 
     79 let read st ic =
     80   (* I/O errors are dealt with at a higher level *)
     81   let fp1 = Digest.input ic in
     82   let fp2 = Digest.input ic in
     83   let headerSize = Umarshal.header_size in
     84   let header = Bytes.create headerSize in
     85   really_input ic header 0 headerSize;
     86   if fp1 <> Digest.bytes header then begin
     87     debug (fun () -> Util.msg "bad header checksum\n");
     88     raise End_of_file
     89   end;
     90   let dataSize = Umarshal.data_size header 0 in
     91   let s = Bytes.create (headerSize + dataSize) in
     92   Bytes.blit header 0 s 0 headerSize;
     93   really_input ic s headerSize dataSize;
     94   if fp2 <> Digest.bytes s then begin
     95     debug (fun () -> Util.msg "bad chunk checksum\n");
     96     raise End_of_file
     97   end;
     98   let q =
     99     try Umarshal.from_bytes mentry_list s 0 with
    100     | Umarshal.Error _ ->
    101         debug (fun () -> Util.msg ("Umarshal error when reading from file, "
    102                                 ^^ "ignoring and continuing\n"));
    103         []
    104   in
    105   debug (fun () -> Util.msg "read chunk of %d files\n" (List.length q));
    106   List.iter (fun (l, p, i) -> PathTbl.add tbl (decompress st l p) i) q
    107 
    108 let closeOut st =
    109   state := None;
    110   try
    111     close_out st.oc
    112   with Sys_error error ->
    113     debug (fun () -> Util.msg "error in closing cache file: %s\n" error)
    114 
    115 let write state =
    116   let q = Safelist.rev state.queue in
    117   let s = Umarshal.to_string mentry_list q in
    118   let fp1 = Digest.substring s 0 Umarshal.header_size in
    119   let fp2 = Digest.string s in
    120   begin try
    121     Digest.output state.oc fp1; Digest.output state.oc fp2;
    122     output_string state.oc s; flush state.oc
    123   with Sys_error error ->
    124     debug (fun () -> Util.msg "error in writing to cache file: %s\n" error);
    125     closeOut state
    126   end;
    127   state.count <- 0;
    128   state.size <- Uutil.Filesize.zero;
    129   state.queue <- []
    130 
    131 (****)
    132 
    133 (* Start and finish dealing with the cache *)
    134 
    135 let finish () =
    136   PathTbl.clear tbl;
    137   match !state with
    138     Some st -> if st.queue <> [] then write st;
    139                closeOut st
    140   | None    -> ()
    141 
    142 let magic = "Unison fingerprint cache format 3"
    143 
    144 let init fastCheck ignorearchives fspath =
    145   finish ();
    146   if fastCheck && not ignorearchives then begin
    147     begin try
    148       debug (fun () -> Util.msg "opening cache file %s for input\n"
    149                          (System.fspathToDebugString fspath));
    150       let ic = System.open_in_bin fspath in
    151       begin try
    152         let header = input_line ic in
    153         if header <> magic then raise (Sys_error "wrong header");
    154         let st = ref "" in
    155         while true do read st ic done
    156       with
    157         Sys_error error ->
    158           debug (fun () -> Util.msg "error in loading cache file %s: %s\n"
    159                              (System.fspathToDebugString fspath) error)
    160       | End_of_file ->
    161           ()
    162       end;
    163       begin try
    164         close_in ic
    165       with Sys_error error ->
    166         debug (fun () -> Util.msg "error in closing cache file %s: %s\n"
    167                              (System.fspathToDebugString fspath) error)
    168       end;
    169     with Sys_error error ->
    170       debug (fun () -> Util.msg "could not open cache file %s: %s\n"
    171                          (System.fspathToDebugString fspath) error)
    172     end;
    173     begin try
    174       debug (fun () -> Util.msg "opening cache file %s for output\n"
    175                          (System.fspathToDebugString fspath));
    176       let oc =
    177         System.open_out_gen
    178           [Open_wronly; Open_creat; Open_trunc; Open_binary] 0o600 fspath in
    179       output_string oc magic; output_string oc "\n"; flush oc;
    180       state :=
    181         Some { oc = oc; count = 0; size = Uutil.Filesize.zero;
    182                last = ""; queue = [] }
    183     with Sys_error error ->
    184       debug (fun () -> Util.msg "could not open cache file %s: %s\n"
    185                          (System.fspathToDebugString fspath) error)
    186     end
    187   end
    188 
    189 (****)
    190 
    191 (* Enqueue a fingerprint to be written to disk. *)
    192 
    193 let maxCount = 5000
    194 let maxSize = Uutil.Filesize.ofInt (100 * 1024 * 1024)
    195 
    196 let save path v =
    197   match !state with
    198     None ->
    199       ()
    200   | Some state ->
    201       let (desc, _, _, _) = v in
    202       let l = Props.length desc in
    203       state.size <- Uutil.Filesize.add state.size l;
    204       state.count <- state.count + 1;
    205       let (l, s) = compress state path in
    206       state.queue <- (l, s, v) :: state.queue;
    207       if state.count > maxCount || state.size > maxSize then write state
    208 
    209 (****)
    210 
    211 (* Check whether a fingerprint is in the in-memory cache and store it
    212    to the on-disk cache in any case. *)
    213 
    214 (* HACK: we disable fastcheck for Excel (and MPP) files, as Excel
    215    sometimes modifies a file without updating the time stamp. *)
    216 let excelFile path =
    217   let s = Path.toString path in
    218      Util.endswith s ".xls"
    219   || Util.endswith s ".mpp"
    220 
    221 let dataClearlyUnchanged fastCheck path info desc stamp =
    222   fastCheck
    223     &&
    224   Props.same_time info.Fileinfo.desc desc
    225     &&
    226   Props.length info.Fileinfo.desc = Props.length desc
    227     &&
    228   not (excelFile path)
    229     &&
    230   match stamp with
    231     Fileinfo.InodeStamp inode ->
    232       info.Fileinfo.inode = inode
    233   | Fileinfo.NoStamp ->
    234       true
    235   | Fileinfo.RescanStamp ->
    236       false
    237 
    238 let ressClearlyUnchanged fastCheck info ress dataClearlyUnchanged =
    239   fastCheck
    240     &&
    241   Osx.ressUnchanged ress info.Fileinfo.osX.Osx.ressInfo
    242     None dataClearlyUnchanged
    243 
    244 let clearlyUnchanged fastCheck path newInfo oldDesc oldStamp oldRess =
    245   let du =
    246     dataClearlyUnchanged fastCheck path newInfo oldDesc oldStamp
    247   in
    248   du && ressClearlyUnchanged fastCheck newInfo oldRess du
    249 
    250 let fastercheckUNSAFE =
    251   Prefs.createBool "fastercheckUNSAFE" false
    252     ~category:`Expert
    253     "skip computing fingerprints for new files (experts only!)"
    254     (  "THIS FEATURE IS STILL EXPERIMENTAL AND SHOULD BE USED WITH EXTREME CAUTION.  "
    255        ^ "\n\n"
    256        ^ "When this flag is set to {\\tt true}, Unison will compute a 'pseudo-"
    257        ^ "fingerprint' the first time it sees a file (either because the file is "
    258        ^ "new or because Unison is running for the first time).  This enormously "
    259        ^ "speeds update detection, but it must be used with care, as it can cause "
    260        ^ "Unison to miss conflicts: If "
    261        ^ "a given path in the filesystem contains files on {\\em both} sides that "
    262        ^ "Unison has not yet seen, and if those files have the same length but different "
    263        ^ "contents, then Unison will not notice the presence of a conflict.  If, later, one "
    264        ^ "of the files is changed, the changed file will be propagated, overwriting  "
    265        ^ "the other.  "
    266        ^ "\n\n"
    267        ^ "Moreover, even when the files are initially identical, setting this flag can lead "
    268        ^ "to potentially confusing behavior: "
    269        ^ "if a newly created file is later touched without being modified, Unison will "
    270        ^ "treat this "
    271        ^ "conservatively as a potential change (since it has no record of the earlier "
    272        ^ "contents) and show it as needing to be propagated to the other replica. "
    273        ^ "\n\n"
    274        ^ "Most users should leave this flag off -- the small time savings of not "
    275        ^ "fingerprinting new files is not worth the cost in terms of safety.  However, "
    276        ^ "it can be very useful for power users with huge replicas that are known to "
    277        ^ "be already synchronized (e.g., because one replica is a newly created duplicate "
    278        ^ "of the other, or because they have previously been synchronized with Unison but "
    279        ^ "Unison's archives need to be rebuilt).  In such situations, it is recommended "
    280        ^ "that this flag be set only for the initial run of Unison, so that new archives "
    281        ^ "can be created quickly, and then turned off for normal use.")
    282 
    283 let fingerprint ?(newfile=false) fastCheck currfspath path info optFp =
    284   let res =
    285     try
    286       let (cachedDesc, cachedFp, cachedStamp, cachedRess) =
    287         PathTbl.find tbl (Path.toString path) in
    288       if
    289         not (clearlyUnchanged
    290                fastCheck path info cachedDesc cachedStamp cachedRess)
    291       then
    292         raise Not_found;
    293       debug (fun () -> Util.msg "cache hit for path %s\n"
    294                          (Path.toDebugString path));
    295       (info.Fileinfo.desc, cachedFp, Fileinfo.stamp info,
    296        Fileinfo.ressStamp info)
    297     with Not_found ->
    298       if fastCheck then
    299         debug (fun () -> Util.msg "cache miss for path %s\n"
    300                            (Path.toDebugString path));
    301       let (info, dig) =
    302         if Prefs.read fastercheckUNSAFE && newfile then begin
    303           debug (fun()-> Util.msg "skipping initial fingerprint of %s\n"
    304                             (Fspath.toDebugString (Fspath.concat currfspath path)));
    305           (Fileinfo.get ~archProps:info.desc false currfspath path,
    306            Os.pseudoFingerprint path (Props.length info.Fileinfo.desc))
    307         end else begin
    308           Os.safeFingerprint currfspath path info optFp
    309         end in
    310       (info.Fileinfo.desc, dig, Fileinfo.stamp info, Fileinfo.ressStamp info)
    311   in
    312   save path res;
    313   res