add context and fip samples

samosica · Jan 13, 2024 · 5db2c0f · 5db2c0f
1 parent a2f31c5
commit 5db2c0f
Show file tree

Hide file tree

Showing 6 changed files with 273 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@
 .vs/
 .vscode/
 support/vscode/koka.language-koka/whatsnew.md
+src/Syntax/Lexer.hs.gen
 node_modules/
 out/
 bundle/

diff --git a/samples/all.kk b/samples/all.kk
@@ -10,6 +10,8 @@ import syntax/handler
 import syntax/with
 import syntax/qualifiers
 import syntax/implicits
+import syntax/contexts
+import syntax/fip
 
 import handlers/ambient
 import handlers/basic
@@ -36,6 +38,8 @@ pub fun main()
   run("fibonacci",fibonacci/main)
   run("garsia-wachs",garsia-wachs/main)
   run("rbtree",rbtree/main)
+  run("contexts",contexts/main)
+  run("fip",fip/main)
 
   run("unify",unify/main)
   run("heap",heap/main)

diff --git a/samples/syntax/contexts.kk b/samples/syntax/contexts.kk
@@ -0,0 +1,156 @@
+// First-class constructor contexts allow specifying efficient
+// top-down algorithms directly in a purely functional style.
+//
+// See the paper for a more in-depth discussion:
+//  "The Functional Essence of Imperative Binary Search Trees",
+//  Anton Lorenzen, Daan Leijen, Wouter Swierstra, and Sam Lindley,
+//  <https://www.microsoft.com/en-us/research/publication/fiptree-tr/>
+//
+// Prerequisites:
+//   - `syntax/basic`
+//   - `syntax/with`
+module syntax/contexts
+
+// Consider the well-known `map` function that maps a function over a list
+fun map-std( xs : list<a>, ^f : a -> e b ) : e list<b>
+  match xs
+    Cons(x,xx) -> Cons( f(x), xx.map-std(f) )
+    Nil        -> Nil
+
+// note: we use `^` on the `f` parameter to borrow it, which means it is
+// considered live for the entire duration of the function and does not need
+// to be reference counted or drop'd.
+
+// Naively, the `map-std` function would use stack space linear in the size
+// of the first list as it is not directly tail recursive. It turns out
+// that Koka can actually optimize tail recursive calls under a series
+// of constructors and `map-std` will be compiled in an efficient way.
+// See the paper:
+//   "Tail Recursion Modulo Context: An Equational Approach",
+//   Daan Leijen and Anton Lorenzen, POPL'23,
+//   <https://www.microsoft.com/en-us/research/publication/tail-recursion-modulo-context-an-equational-approach-2/>
+//
+// To be sure this optimization happens, we can write `tail fun` to get
+// a warning if our function is not quite TRMC.
+tail fun map-tail( xs : list<a>, ^f : a -> e b ) : e list<b>
+  match xs
+    Cons(x,xx) -> Cons( f(x), xx.map-tail(f) )
+    Nil        -> Nil
+
+pub fun test-tail()
+  [1,2,3].map(inc).println
+
+// The `map-tail` function is the recommended way to write functions
+// like `map` in Koka -- clean, functional, and efficient!
+
+// However, the TRMC optimization cannot always apply. How would we
+// then write an efficient `map` function? Commonly, functional programmers
+// use an accumulator to write a tail-recursive version of `map`.
+tail fun map-acc( xs : list<a>, f : a -> e b, acc : list<b> ) : e list<b>
+  match xs
+    Cons(x,xx) -> xx.map-acc(f, Cons(f(x), acc))
+    Nil        -> acc.reverse
+
+fun map-acc-entry(xs,f)
+  map-acc(xs,f,[])
+
+pub fun test-acc()
+  [1,2,3].map-acc-entry(inc).println
+
+// But `map-acc` is still not great since we build the accumulation list
+// in reverse, and need to `reverse` it again in the end
+// (_bottom-up_, doing two passes over the list).
+// A C programmer would instead write a _top-down_ algorithm
+// traversing the list once and returning the (remembered) root
+// of the list at the end. To do this in a purely functional way
+// we need first-class constructor contexts.
+
+// We can create a context using the `ctx` keyword. A context must
+// consist of constructors and have a single _hole_ (written as `_`).
+// For example, `ctx Cons(1,Cons(2,_))` or `ctx Bin(Bin(Leaf 1, _),Leaf 3)`
+// There are two operations on context, append and apply:
+//
+// `(++)  : (cctx<a,b>, cctx<b,c>) -> cctx<a,c>`  // append
+// `(++.) : (cctx<a,b>, b) -> a`                  // apply
+//
+// Append takes a context of type `:a` with hole `:b` and writes a
+// a context `cctx<b,c>` into the hole to get a new context of type
+// `:a` with hole `:c`. Apply `(.++)` plugs the hole `:b` in a
+// context `cctx<a,b>` and returns the whole structure `:a`.
+// Often the hole and the structure are of the same type, and
+// we have the `alias ctx<a> = cctx<a,a>`.
+//
+// Now we can write a top-down `map` ourselves: (note: we don't need `tail` here as `fip` implies `tail` already)
+fip fun map-td( xs : list<a>, ^f : a -> e b, acc : ctx<list<b>> ) : e list<b>
+  match xs
+    Cons(x,xx) -> xx.map-td( f, acc ++ ctx Cons(f(x), _) )  // plug `Cons(f(x),_)` into the hole in `acc`
+    Nil        -> acc ++. Nil                               // plug the hole with `Nil`
+
+fip fun map-td-entry(xs,^f)
+  map-td(xs,f,ctx _)     // start with an empty context
+
+pub fun test-td()
+  [1,2,3].map-td-entry(inc).println
+
+// The top-down version is efficient and in the fast path (where the
+// list is unique at runtime and not used persistently) executes
+// much like the loop that the C programmer would write: traversing the
+// list once and updating the elements in place (due to Perceus). When the arguments
+// are unique, the append and apply of contexts are constant time operations.
+
+// The TRMC version `map-tail` is still preferred as it is more clear and
+// internally optimizes to the `map-td` version. However, the optimization
+// can not alway be done and in such case we need first-class constructor contexts.
+// Consider the `flatten` function that concatenates a list of lists:
+
+fip fun concat(xs : list<a>, ys : list<a>) : list<a>   // also defined as `std/core/list/(++)`
+  match xs
+    Cons(x,xx) -> Cons(x, concat(xx,ys))
+    Nil        -> ys
+
+fbip fun flatten( xss : list<list<a>> ) : list<a>
+  match xss
+    Cons(xs,xxs) -> concat( xs, flatten(xxs) )
+    Nil          -> Nil
+
+// Here, `concat` is TRMC, but `flatten` is not (since the recursive call is inside a function call)
+// With constructor contexts though we can represent the flattened list as a context
+// with a hole in the tail element:
+fip fun concat-td( xs : list<a>, acc : ctx<list<a>> ) : ctx<list<a>>
+  match xs
+    Cons(x,xx) -> concat-td( xx, acc ++ ctx Cons(x,_) )
+    Nil        -> acc
+
+fbip fun flatten-td( xss : list<list<a>>, acc : ctx<list<a>> ) : ctx<list<a>>
+  match xss
+    Cons(xs,xxs) -> flatten-td( xxs, concat-td( xs, acc ) )
+    Nil          -> acc
+
+fbip fun flatten-entry( xss : list<list<a>> ) : list<a>
+  flatten-td( xss, ctx _) ++. Nil
+
+pub fun example-flatten()
+  [[1,2],[3,4],[]].flatten-entry.println
+
+// Another example is the `partition` function where we
+// need two contexts to build the two result lists in one
+// traversal.
+tail fun partition-td(xs : list<a>, ^pred : a -> e bool, acc1 : ctx<list<a>>, acc2 : ctx<list<a>>): e (list<a>, list<a>)
+  match xs
+    Nil -> (acc1 ++. Nil, acc2 ++. Nil)
+    Cons(x,xx) -> if pred(x)
+      then partition-td(xx, pred, acc1 ++ ctx Cons(x,_), acc2)
+      else partition-td(xx, pred, acc1,                  acc2 ++ ctx Cons(x,_))
+
+fun partition-entry( xs : list<a>, ^pred : a -> e bool ) : e (list<a>,list<a>)
+  partition-td(xs, pred, ctx _, ctx _)
+
+pub fun example-partition()
+  [1,2,3,4,5].partition-entry(is-odd)
+
+
+// See <https://www.microsoft.com/en-us/research/uploads/prod/2023/07/fiptree-tr-v4.pdf>
+// for more complex examples on balanced tree insertion algorithms.
+
+pub fun main()
+  example-partition().println()
diff --git a/samples/syntax/fip.kk b/samples/syntax/fip.kk
@@ -0,0 +1,104 @@
+// Examples of "Fully in-place" functional programming
+//
+// Work in progress.
+// For a nice overview, see the introduction of:
+//  "FP^2: Fully in-Place Functional Programming",
+//  Anton Lorenzen, Daan Leijen, and Wouter Swierstra, ICFP'23.
+//  <https://www.microsoft.com/en-us/research/uploads/prod/2023/05/fip-tr-v2.pdf>
+//
+// And also:
+//  "The Functional Essence of Imperative Binary Search Trees",
+//  Anton Lorenzen, Daan Leijen, Wouter Swierstra, and Sam Lindley,
+//  <https://www.microsoft.com/en-us/research/publication/fiptree-tr/>
+//
+// Prerequisites:
+//   `syntax/basic`
+module syntax/fip
+
+// We can use the `fip` keyword to ensure that,
+// _if the (non-borrowed) parameters are unique at runtime_,
+// the function does not allocate or deallocate memory, and uses constant stack space.
+// For example:
+fip fun rev-acc( xs : list<a>, acc : list<a> ) : list<a>
+  match xs
+    Cons(x,xx) -> rev-acc(xx, Cons(x,acc))
+    Nil        -> acc
+
+fip fun rev( xs : list<a> ) : list<a>
+  rev-acc(xs,[])
+
+pub fun example-rev()
+  [1,2,3].rev.println
+
+// The `rev` function is fip: due to Perceus reference counting,
+// if the argument list `xs` is unique at runtime, each `Cons` cell
+// is reused and updated _in-place_ for the reversed accumulator:
+// no memory is (de)allocated and constant stack space is used (as it
+// is tail-recursive).
+//
+// There are severe restrictions on `fip` functions to make this guarantee.
+// See the paper for details. In essence, all owned parameters must be
+// used linearly, and the function can only call `fip` functions itself.
+
+// We can still use `rev` persistently as well and have a full
+// functional semantics where the argument list is copied when needed.
+// The best of both worlds: we can write a purely functional version
+// but get in-place update when possible without having to write multiple
+// versions of the same functionality, e.g. an in-place updating set and a persistent set
+pub fun example-persistent()
+  val xs = list(1,5)
+  xs ++ rev(xs)
+
+// Sometimes, this copying can be unnecessary .. we are working on providing
+// better warnings for these situations
+pub fun example-bad()
+  val xs = list(1,5)
+  val ys = rev(xs)        // `xs` gets copied as it used later on for its `length`
+  val n  = xs.length      // if we would swap the `ys` and `n` definitions we would avoid a copy of `xs`
+  (ys,n).println
+
+// Similarly, we can write the weaker `fbip` keyword for a function that does not allocate any memory,
+// but is allowed to deallocate and use arbitrary stack space.
+fbip fun filter-odd( xs : list<int> ) : list<int>
+  match xs
+    Cons(x,xx) -> if is-odd(x) then Cons(x,xx.filter-odd) else xx.filter-odd
+    Nil        -> Nil
+
+
+// We can write `fip(n)` (and `fbip(n)`), where `n` is a constant, to allow the function
+// to allocate at most `n` constructors. This is useful for example to write in-place
+// tree insertion where we may need to allocate a leaf node for a newly inserted element
+type tree<a>
+  Bin( left : tree<a>, value : a, right : tree<a> )
+  Tip
+
+fip(1) fun insert( t : tree<int>, k : int ) : tree<int>
+  match t
+    Bin(l,x,r) -> if (x==k) then Bin(l,k,r)
+                  elif (x < k) then Bin(l,x,insert(r,k))
+                  else Bin(insert(l,k),x,r)
+    Tip        -> Bin(Tip,k,Tip)
+
+
+// Unfortunately, we cannot quite check a recursive polymorphic fip version of `insert` yet
+// since we cannot (yet) express second-rank borrow information where the compare
+// function does not only need to be borrowed, but borrow its arguments as well
+// (e.g. we need `^?cmp : (^a,^a) -> order`).
+//
+// The `insert-poly`` actually _does_ execute in-place at runtime like a `fip(1)`
+// function, we just can't check it statically (at this time).
+fun insert-poly( t : tree<a>, k : a, ^?cmp : (a,a) -> order ) : tree<a>
+  match t
+    Bin(l,x,r) -> if (x==k) then Bin(l,k,r)
+                  elif (x < k) then Bin(l,x,insert-poly(r,k))
+                  else Bin(insert-poly(l,k),x,r)
+    Tip        -> Bin(Tip,k,Tip)
+
+
+// Todo.. more examples with zippers:
+// - Tree mapping
+// - Morris traversal
+// - Splay tree restructuring
+
+pub fun main()
+  example-persistent().println()
diff --git a/samples/syntax/implicits.kk b/samples/syntax/implicits.kk
@@ -92,7 +92,7 @@ fun list/eq( xs : list<a>, ys : list<a>, ?eq : (a,a) -> bool ) : bool
 // the second `eq(xx,yy)` resolves to `list/eq(xx,yy)`, bypassing
 // the local `?eq` parameter (as its fully qualified name is `?eq` (==`@implicit/eq`)).
 // The implicit parameter to `list/eq` is resolved to the local parameter `?eq` again.
-// (these are just the regular rules for disambiguating qualified names, not specific to implicits)
+// (these are just the regular rules for disambiguating qualified names based on types -- not specific to implicits)
 pub fun example4()
   eq([1,2],[1,2])
 
@@ -105,8 +105,10 @@ pub fun example5()
   eq( [[1,2],[]], [[1,2],[]] )
 
 
+// # Experimental territory
+
 // When resolving an implicit parameter, the compiler also considers
-// unit functions that only take implicit parameters themselves.
+// matching functions that take further implicit parameters themselves.
 // These are called _conversions_. For example, given a `compare` function,
 // we could provide an `eq` function:
 fun compare/eq( x : a, y : a, ?compare : (a,a) -> order )

diff --git a/test/overload/eq1.kk b/test/overload/eq1.kk
@@ -2,18 +2,18 @@
 fun int/eq( x : int, y : int ) : bool
   (x == y)
 
-fun float64/eq( x : float64, y : float64 ) : bool
+fun char/eq( x : char, y : char ) : bool
   (x == y)
 
-fun list/eq<a>( xs : list<a>, ys : list<a>, ?eq : (a,a) -> bool ) : bool
+fun list/eq( xs : list<a>, ys : list<a>, ?eq : (a,a) -> bool ) : bool
   match xs
     Cons(x,xx) -> match ys
       Cons(y,yy) | eq(x,y) -> eq(xx,yy)
       _ -> False
     Nil -> ys.is-nil
 
-fun choose(x,y)
-  if True then x else y
+fun example()
+  eq([1,2],[1,3])
 
 
 fun test1()