checksum.ml (2990B)
1 (* Unison file synchronizer: src/checksum.ml *) 2 (* Copyright 1999-2020, Benjamin C. Pierce 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 *) 17 18 19 (* The checksum (or fast fingerprinting) algorithm must be fast and has to *) 20 (* be called in a rolling fashion (i.e. we must be able to calculate a new *) 21 (* checksum when provided the current checksum, the outgoing character and *) 22 (* the incoming one). *) 23 24 (* Definition: cksum([c_n, c_{n-1}, ..., c_0]) = Sum c_i * 16381 ^ i *) 25 26 type t = int 27 28 type u = int array 29 30 (* [power v n] computes [v ^ n] *) 31 let rec power v n = 32 if n = 0 then 1 else 33 let v' = power v (n / 2) in 34 let v'' = v' * v' in 35 if n land 1 <> 0 then v * v'' else v'' 36 37 (* Takes the block length and returns a pre-computed table for the function *) 38 (* roll: If [init l] => I, then I_n = n * 16381 ^ (l + 1), for 0 <= n < 256 *) 39 (* NB: 256 is the upper-bound of ASCII code returned by Char.code *) 40 41 let init l = 42 let p = power 16381 (l + 1) in 43 Array.init 256 (fun i -> (i * p) land 0x7fffffff) 44 45 (* Function [roll] computes fixed-length checksum from previous checksum *) 46 (* Roughly: given the pre-computed table, compute the new checksum from the *) 47 (* old one along with the outgoing and incoming characters, i.e., *) 48 (* - *) 49 (* [roll cksum([c_n, ..., c_0]) c_n c'] => cksum([c_{n-1}, ..., c_0, c']) *) 50 (* - *) 51 let roll init cksum cout cin = 52 let v = cksum + Char.code cin in 53 (v lsl 14 - (v + v + v) (* v * 16381 *) 54 - Array.unsafe_get init (Char.code cout)) land 0x7fffffff 55 56 (* Function [substring] computes checksum for a given substring in one batch *) 57 (* process: [substring s p l] => cksum([s_p, ..., s_{p + l - 1}]) *) 58 59 let substring s p l = 60 let cksum = ref 0 in 61 for i = p to p + l - 1 do 62 let v = !cksum + Char.code (String.unsafe_get s i) in 63 cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *) 64 done; 65 !cksum land 0x7fffffff 66 67 let subbytes s p l = 68 let cksum = ref 0 in 69 for i = p to p + l - 1 do 70 let v = !cksum + Char.code (Bytes.unsafe_get s i) in 71 cksum := (v lsl 14 - (v + v + v)) (* v * 16381 *) 72 done; 73 !cksum land 0x7fffffff