unison

Fork of Unison, a bi-directional file synchronization tool
git clone git://git.laack.co/unison.git
Log | Files | Refs | README | LICENSE

transfer.mli (5204B)


      1 (* Unison file synchronizer: src/transfer.mli *)
      2 (* Copyright 1999-2020, Benjamin C. Pierce (see COPYING for details) *)
      3 
      4 (*
      5    Rsync : general algorithm description
      6 
      7      The rsync algorithm is a technique for reducing the cost of a file
      8    transfer by avoiding the transfer of blocks that are already at the
      9    destination.
     10      Imagine we have source and destination computers that have files X and
     11    Y respectively, where X and Y are similar. The algorithm proceeds as
     12    follows :
     13    - The destination computer divides file Y into blocks of an agreed-upon
     14      size N.
     15    - For each block, the destination computer computes two functions of the
     16      block's contents :
     17        - A 128-bit fingerprint of the block, which with very high
     18          probability is different from the fingerprints of different blocks.
     19        - A small checksum, which can be computed in a "rolling" fashion.
     20          More precisely, if we are given the checksum for the N-byte block
     21          at offset k, and we are given the bytes at offsets k and N+k, we
     22          can efficiently compute the checksum for the N-byte block at offset
     23          k+1.
     24    - The destination computer sends a list of fingerprints and checksums to
     25      the source computer. Blocks are identified implicitly by the order in
     26      which they appear in the list.
     27    - The source computer searches through file X to identify blocks that
     28      have the same fingerprints as blocks that appear in the list sent
     29      from B. The checksums are used to find candidate blocks in a single
     30      pass through file X. Blocks with identical fingerprints are presumed
     31      to be identical.
     32    - The source computer sends instructions for reconstructing file X at the
     33      destination. These instructions avoid transmitting blocks of X that are
     34      identical to other blocks in Y by providing the numbers of identical
     35      blocks and the strings containing the differences.
     36 *)
     37 
     38 
     39 (* Transfer instruction giving data to build a file incrementally *)
     40 type transfer_instruction = Bytearray.t * int * int
     41 
     42 type transmitter = transfer_instruction -> unit Lwt.t
     43 
     44 
     45 (*************************************************************************)
     46 (* GENERIC TRANSMISSION                                                  *)
     47 (*************************************************************************)
     48 
     49 (* Send the whole source file encoded in transfer instructions *)
     50 val send :
     51     in_channel             (* source file descriptor *)
     52  -> Uutil.Filesize.t       (* source file length *)
     53  -> (int -> unit)          (* progress report *)
     54  -> transmitter            (* transfer instruction transmitter *)
     55  -> unit Lwt.t
     56 
     57 val receive :
     58     out_channel            (* destination file descriptor *)
     59  -> (int -> unit)          (* progress report *)
     60  -> transfer_instruction   (* transfer instruction received *)
     61  -> bool                   (* Whether we have reach the end of the file *)
     62 
     63 
     64 (*************************************************************************)
     65 (* RSYNC TRANSMISSION                                                    *)
     66 (*************************************************************************)
     67 
     68 module Rsync :
     69   sig
     70 
     71     (*** DESTINATION HOST ***)
     72 
     73     (* The rsync compression can only be activated when the file size is
     74        greater than the threshold *)
     75     val aboveRsyncThreshold : Uutil.Filesize.t -> bool
     76 
     77     (* Built from the old file by the destination computer *)
     78     type rsync_block_info
     79 
     80     val mrsync_block_info : rsync_block_info Umarshal.t
     81 
     82     (* Expected size of the [rsync_block_info] datastructure (in KiB). *)
     83     val memoryFootprint : Uutil.Filesize.t -> Uutil.Filesize.t -> int
     84 
     85     (* Compute block information from the old file *)
     86     val rsyncPreprocess :
     87            in_channel            (* old file descriptor *)
     88         -> Uutil.Filesize.t      (* source file length *)
     89         -> Uutil.Filesize.t      (* destination file length *)
     90         -> rsync_block_info * int
     91 
     92     (* Interpret a transfer instruction *)
     93     val rsyncDecompress :
     94            int                   (* block size *)
     95         -> in_channel            (* old file descriptor *)
     96         -> out_channel           (* output file descriptor *)
     97         -> ?copyFn:              (* function for optimized copying *)
     98            (   Uutil.Filesize.t  (* input file offset *)
     99             -> Uutil.Filesize.t  (* data length *)
    100             -> fallback:         (* default function for copying *)
    101                (Uutil.Filesize.t (* bytes copied before fallback *)
    102                 -> unit)
    103             -> unit)
    104         -> (int -> unit)         (* progress report *)
    105         -> transfer_instruction  (* transfer instruction received *)
    106         -> bool
    107 
    108     (*** SOURCE HOST ***)
    109 
    110     (* Using block information, parse the new file and send transfer
    111        instructions accordingly *)
    112     val rsyncCompress :
    113            rsync_block_info
    114                               (* block info received from the destination *)
    115         -> in_channel         (* new file descriptor *)
    116         -> Uutil.Filesize.t   (* source file length *)
    117         -> (int -> unit)      (* progress report *)
    118         -> transmitter        (* transfer instruction transmitter *)
    119         -> unit Lwt.t
    120 
    121   end