unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

checksum.ml (2990B)


      1 (* Unison file synchronizer: src/checksum.ml *)
      2 (* Copyright 1999-2020, Benjamin C. Pierce
      3 
      4     This program is free software: you can redistribute it and/or modify
      5     it under the terms of the GNU General Public License as published by
      6     the Free Software Foundation, either version 3 of the License, or
      7     (at your option) any later version.
      8 
      9     This program is distributed in the hope that it will be useful,
     10     but WITHOUT ANY WARRANTY; without even the implied warranty of
     11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12     GNU General Public License for more details.
     13 
     14     You should have received a copy of the GNU General Public License
     15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16 *)
     17 
     18 
     19 (* The checksum (or fast fingerprinting) algorithm must be fast and has to   *)
     20 (* be called in a rolling fashion (i.e. we must be able to calculate a new   *)
     21 (* checksum when provided the current checksum, the outgoing character and   *)
     22 (* the incoming one).                                                        *)
     23 
     24 (* Definition: cksum([c_n, c_{n-1}, ..., c_0]) = Sum c_i * 16381 ^ i         *)
     25 
     26 type t = int
     27 
     28 type u = int array
     29 
     30 (* [power v n] computes [v ^ n]                                              *)
     31 let rec power v n =
     32   if n = 0 then 1 else
     33   let v' = power v (n / 2) in
     34   let v'' = v' * v' in
     35   if n land 1 <> 0 then v * v'' else v''
     36 
     37 (* Takes the block length and returns a pre-computed table for the function  *)
     38 (* roll: If [init l] => I, then I_n = n * 16381 ^ (l + 1), for 0 <= n < 256  *)
     39 (* NB: 256 is the upper-bound of ASCII code returned by Char.code            *)
     40 
     41 let init l =
     42   let p = power 16381 (l + 1) in
     43   Array.init 256 (fun i -> (i * p) land 0x7fffffff)
     44 
     45 (* Function [roll] computes fixed-length checksum from previous checksum     *)
     46 (* Roughly: given the pre-computed table, compute the new checksum from the  *)
     47 (* old one along with the outgoing and incoming characters, i.e.,            *)
     48 (*                                                                         - *)
     49 (* [roll cksum([c_n, ..., c_0]) c_n c'] => cksum([c_{n-1}, ..., c_0, c'])    *)
     50 (*                                                                         - *)
     51 let roll init cksum cout cin =
     52   let v = cksum + Char.code cin in
     53   (v lsl 14 - (v + v + v) (* v * 16381 *)
     54     - Array.unsafe_get init (Char.code cout)) land 0x7fffffff
     55 
     56 (* Function [substring] computes checksum for a given substring in one batch *)
     57 (* process: [substring s p l] => cksum([s_p, ..., s_{p + l - 1}])            *)
     58 
     59 let substring s p l =
     60   let cksum = ref 0 in
     61   for i = p to p + l - 1 do
     62     let v = !cksum + Char.code (String.unsafe_get s i) in
     63     cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *)
     64   done;
     65   !cksum land 0x7fffffff
     66 
     67 let subbytes s p l =
     68   let cksum = ref 0 in
     69   for i = p to p + l - 1 do
     70     let v = !cksum + Char.code (Bytes.unsafe_get s i) in
     71     cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *)
     72   done;
     73   !cksum land 0x7fffffff