While playing around with the bistro stuff, i've been slowly investigating other concepts that have been floating around over the last few years. One of them is F# (start here and go from there). I've spent a few weeks wrapping my brain around the rather orthogonal concept of functional programming, and followed someone's advice in trying to learn about Haskell first (the exact quote that sold me on the idea was "trying to learn F# without knowing Haskell is like trying to learn VB without knowing C#"). That approach has been immensely helpful, as it's introduced me to the purer concepts of functional programming without mixing things with the all-too-accessible .NET framework.

So. The first real experiment for me has been the most mathematical application I could easily wrap my head around - the rolling checksum in RSync. I've managed to cobble together an implementation that... at the very least behaves as the algorithm expects. I'll be filing this in as I go, hopefully making a full-fledged file synchronization app in F# that uses WCF. I am not planning on supporting the original C# port, or making this implementation protocol-compatible with the original RSync, as the protocol itself is not documented, and reverse-engineering it out of the port that we have would be a nightmare. I'll move that piece off to SourceForge in the next few weeks and make it open there.

Anywho, here's what i've got so far:

 
#light
 
open System
open System.IO
 
/// the rsync constant
let M = 2 |> pown <| 16
 
/// the a(k,l) in rsync, with k = 0. defined as a(k,l) = (sum (i=k:l) X[i]) mod M
let a_initial l (data: int[]) = (data |> Array.fold_left (+) 0) % M
 
/// the b(k,l) in rsync, with k = 0. defined as b(k,l) = (sum (i=k:l) (l-i+1) * X[i]) mod M
/// uses the stateful fold function to carry through two states - one is the to-date sum, the other
/// is a simple position counter, since our sum is a function of position. so in \l (v,i) l is l
/// from b_initial, v is the computed to-date sum, and i is the index
let b_initial l (data: int[]) = ((data |> Array.fold_left ((fun l (v, i) a -> ((l-i+1) * a + v, i+1)) l) (0,0)) |> fst) % M
 
/// the rolling a(k,l) defined as a(k+1,l+1) = (a(k,l) - X[k] + X[l+1]) mod M
/// note that the implementation of this function is actually done as a((k-1)+1,
/// (l-1)+1) so that it can work in a scanning, as opposed to a recursive fashion
let a1 a xk1 xl = (a - xk1 + xl) % M
 
/// the rolling b(k,l) defined as b(k+1,l+1) = (b(k,l) - (l-k+1) * X[k] + a(k+1,l+1)) mod M
/// note that the implementation of this function is actually done as b((k-1)+1, (l-1)+1) so
/// that it can work in a scanning, as opposed to a recursive fashion
let b1 a b xk1 k l = (b - (l - k + 1) * xk1 + a) % M
 
/// the final summation of a(k,l),b(k,l). defined as s(k,l) = a(k,l) + M * b(k,l)
let s1 a b = a + M * b
 
/// computes the hash sums with the given file
let computeSums (stream: Stream) (s_out: int -> int -> unit) =
    let block_size = int (Math.Round(Math.Sqrt(float stream.Length)))
    let (_buffer: byte[]) = Array.create block_size (byte 0)
 
    let mutable bytesRead = 0
    while bytesRead < _buffer.Length do
        bytesRead <- stream.Read(_buffer, bytesRead, _buffer.Length )
 
    let buffer = Array.map (fun a -> int a) _buffer
    let mutable prev_a = a_initial (block_size - 1) buffer
    let mutable prev_b = b_initial (block_size - 1) buffer
 
    let mutable k = 0
    let mutable l = buffer.Length-1
 
    // scanWindow should always be k:(l+1) or (k-1):l bytes, as we will peak ahead/look behind at one offset value
    let mutable scanWindow = (Array.to_list buffer) @ [stream.ReadByte()]
 
    while k + l < (int stream.Length) do
        s_out k (s1 prev_a prev_b)
        prev_a <- a1 prev_a (List.hd scanWindow) scanWindow.[scanWindow.Length-1]
        prev_b <- b1 prev_a prev_b (List.hd scanWindow) k l
        k <- k+1
        l <- l+1
        scanWindow <- (List.tl scanWindow) @ [stream.ReadByte()]
 
let outStream = new StreamWriter(new FileStream(@"c:\test.out", FileMode.Create))
 
let stream1 = new FileStream(@"C:\ph.jpg", FileMode.Open)
let sums1 = Array.create ((int stream1.Length) + 1) 0
computeSums stream1 (fun a b -> sums1.[a] <- b)
 
let stream2 = new FileStream(@"C:\ph1.jpg", FileMode.Open)
let sums2 = Array.create ((int stream2.Length) + 1) 0
computeSums stream2 (fun a b -> sums2.[a] <- b)
 
Array.iter2 (fun a b -> outStream.WriteLine((a,b, if a=b then "" else "neq"))) sums1 sums2
outStream.Close()
 

This should work and compile with the latest CTP of F#. As you can see, the last block invokes the computeSums function, with a stream pointed to a local file and a function that's used to then spit sum values back out. It also saves off two consequtive passes comparing the sum values. The idea there is that the two files are identical except for a few bytes in the middle, so this way i can track easily where the sums differ and where they don't. I welcome any "you're doing it wrong!" comments, as the whole functional thing is new to me.