large-scale programming with F#
Inspired by F# asynchronous workflows
Express distributed computation.
1: 2: 3: 4:
let hello = cloud {
return "Hello World!"
}
val hello: Cloud<string>
Express distribution and parallelism patterns
1: 2: 3: 4: 5: 6: 7: 8:
let first = cloud { return 15 }
let second = cloud { return 17 }
cloud {
let! x = first
let! y = second
return x + y
}
1: 2: 3: 4: 5: 6: 7: 8:
val (<||>): Cloud<'T> -> Cloud<'S> -> Cloud<'T * 'S>
cloud {
let first = cloud { return 15 }
let second = cloud { return 17 }
let! x, y = first <||> second
return x + y
}
1: 2: 3: 4: 5: 6: 7: 8:
val Cloud.Parallel: seq<Cloud<'T>> -> Cloud<'T []>
cloud {
let sqr x = cloud { return x * x }
let jobs = List.map sqr [1..10000]
let! sqrs = Cloud.Parallel jobs
return Array.sum sqrs
}
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15:
let rec mapReduce (mapF: 'T -> 'R)
(reduceF: 'R -> 'R -> 'R)
(id : 'R) (input: 'T list) =
cloud {
match input with
| [] -> return id
| [value] -> return mapF value
| _ ->
let left, right = List.split input
let! r, r' =
(mapReduce mapF reduceF id left)
<||>
(mapReduce mapF reduceF id right)
return reduceF r r'
}
It's a naive implementation.
Can you spot potential issues/problems?
Processed data needlessly passed copied arround worker machines.
Azure, SQL, Filesystem
1: 2: 3:
CloudRef.New: 'T -> Cloud<CloudRef<'T>> CloudRef.Read: CloudRef<'T> -> Cloud<'T>
1: 2: 3: 4:
type CloudTree<'T> =
| EmptyLeaf
| Leaf of 'T
| Branch of ICloudRef<CloudTree<'T>> * ICloudRef<CloudTree<'T>>
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11:
let rec createTree (input: 'T list) = cloud {
match input with
| [] -> return! CloudRef.New EmptyLeaf
| [value] -> return! CloudRef.New (Leaf value)
| _ ->
let left, right = List.split input
let! l, r =
(createTree left) <||> (createTree right)
return! CloudRef.New <| Branch(l, r)
}
1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15:
let rec mapReduceTree (mapF: 'T -> 'R)
(reduceF: 'R -> 'R -> 'R)
(id : 'R) (input: ICloudRef<CloudTree<'T>>) =
cloud {
let! tree = CloudRef.Read input
match tree with
| EmptyLeaf -> return id
| Leaf value -> return mapF value
| Branch(left, right) ->
let! l, r =
(mapReduceTree mapF reduceF id left)
<||>
(mapReduceTree mapF reduceF id right)
return reduceF l r
}
Distributed binary blob
1: 2: 3:
CloudFile.New: (Stream -> unit) -> Cloud<CloudFile> CloudFile.Read: CloudFile -> (Stream -> 'T) -> Cloud<'T>
1:
Cloud.GetWorkerCount: unit -> Cloud<int>
1:
Cloud.ToLocal: Cloud<'T> -> Cloud<'T>
A lightweight F#/C# library for efficient functional-style pipelines on streams of data.
1:
source |> inter |> inter |> inter |> terminal
1:
source |> inter |> inter |> inter |> terminal
1: 2: 3: 4: 5:
let data = [| 1..10000000 |] |> Array.map int64 Stream.ofArray data //source |> Stream.filter (fun i -> i % 2L = 0L) //lazy |> Stream.map (fun i -> i + 1L) //lazy |> Stream.sum //eager, forcing evaluation