transfer.mli (5204B)
1 (* Unison file synchronizer: src/transfer.mli *) 2 (* Copyright 1999-2020, Benjamin C. Pierce (see COPYING for details) *) 3 4 (* 5 Rsync : general algorithm description 6 7 The rsync algorithm is a technique for reducing the cost of a file 8 transfer by avoiding the transfer of blocks that are already at the 9 destination. 10 Imagine we have source and destination computers that have files X and 11 Y respectively, where X and Y are similar. The algorithm proceeds as 12 follows : 13 - The destination computer divides file Y into blocks of an agreed-upon 14 size N. 15 - For each block, the destination computer computes two functions of the 16 block's contents : 17 - A 128-bit fingerprint of the block, which with very high 18 probability is different from the fingerprints of different blocks. 19 - A small checksum, which can be computed in a "rolling" fashion. 20 More precisely, if we are given the checksum for the N-byte block 21 at offset k, and we are given the bytes at offsets k and N+k, we 22 can efficiently compute the checksum for the N-byte block at offset 23 k+1. 24 - The destination computer sends a list of fingerprints and checksums to 25 the source computer. Blocks are identified implicitly by the order in 26 which they appear in the list. 27 - The source computer searches through file X to identify blocks that 28 have the same fingerprints as blocks that appear in the list sent 29 from B. The checksums are used to find candidate blocks in a single 30 pass through file X. Blocks with identical fingerprints are presumed 31 to be identical. 32 - The source computer sends instructions for reconstructing file X at the 33 destination. These instructions avoid transmitting blocks of X that are 34 identical to other blocks in Y by providing the numbers of identical 35 blocks and the strings containing the differences. 36 *) 37 38 39 (* Transfer instruction giving data to build a file incrementally *) 40 type transfer_instruction = Bytearray.t * int * int 41 42 type transmitter = transfer_instruction -> unit Lwt.t 43 44 45 (*************************************************************************) 46 (* GENERIC TRANSMISSION *) 47 (*************************************************************************) 48 49 (* Send the whole source file encoded in transfer instructions *) 50 val send : 51 in_channel (* source file descriptor *) 52 -> Uutil.Filesize.t (* source file length *) 53 -> (int -> unit) (* progress report *) 54 -> transmitter (* transfer instruction transmitter *) 55 -> unit Lwt.t 56 57 val receive : 58 out_channel (* destination file descriptor *) 59 -> (int -> unit) (* progress report *) 60 -> transfer_instruction (* transfer instruction received *) 61 -> bool (* Whether we have reach the end of the file *) 62 63 64 (*************************************************************************) 65 (* RSYNC TRANSMISSION *) 66 (*************************************************************************) 67 68 module Rsync : 69 sig 70 71 (*** DESTINATION HOST ***) 72 73 (* The rsync compression can only be activated when the file size is 74 greater than the threshold *) 75 val aboveRsyncThreshold : Uutil.Filesize.t -> bool 76 77 (* Built from the old file by the destination computer *) 78 type rsync_block_info 79 80 val mrsync_block_info : rsync_block_info Umarshal.t 81 82 (* Expected size of the [rsync_block_info] datastructure (in KiB). *) 83 val memoryFootprint : Uutil.Filesize.t -> Uutil.Filesize.t -> int 84 85 (* Compute block information from the old file *) 86 val rsyncPreprocess : 87 in_channel (* old file descriptor *) 88 -> Uutil.Filesize.t (* source file length *) 89 -> Uutil.Filesize.t (* destination file length *) 90 -> rsync_block_info * int 91 92 (* Interpret a transfer instruction *) 93 val rsyncDecompress : 94 int (* block size *) 95 -> in_channel (* old file descriptor *) 96 -> out_channel (* output file descriptor *) 97 -> ?copyFn: (* function for optimized copying *) 98 ( Uutil.Filesize.t (* input file offset *) 99 -> Uutil.Filesize.t (* data length *) 100 -> fallback: (* default function for copying *) 101 (Uutil.Filesize.t (* bytes copied before fallback *) 102 -> unit) 103 -> unit) 104 -> (int -> unit) (* progress report *) 105 -> transfer_instruction (* transfer instruction received *) 106 -> bool 107 108 (*** SOURCE HOST ***) 109 110 (* Using block information, parse the new file and send transfer 111 instructions accordingly *) 112 val rsyncCompress : 113 rsync_block_info 114 (* block info received from the destination *) 115 -> in_channel (* new file descriptor *) 116 -> Uutil.Filesize.t (* source file length *) 117 -> (int -> unit) (* progress report *) 118 -> transmitter (* transfer instruction transmitter *) 119 -> unit Lwt.t 120 121 end