Level grinding

We incrementally improve our compiler over and over again, which is like grinding levels in a computer role-playing game. There is even a skill tree of sorts. Do we want to add language features? Or optimize the generated code? Or improve error reporting? And so on.

We must also prepare for an an upcoming boss battle.

Stringy

Algorithm 4.1 of Kiselyov’s paper; strings and character constants.

------------------------------------------------------------------------
-- Uppercase letters. Strings. Chars.
-- Bracket abstraction optimization.
------------------------------------------------------------------------

or f g x y = f x (g x y);
and f g x y = @C f y (g x y);
pair x y f = f x y;
just x f g = g x;
foldr = @Y \r c n l -> l n (\h t -> c h(r c n t));
lsteq = @Y \r xs ys a b -> xs (ys a (\u u -> b)) (\x xt -> ys b (\y yt -> x(y @=) (r xt yt a b) b));

pure x inp = just (pair x inp);
bind f m = m @K (\x -> x f);
ap x y = \inp -> bind (\a t -> bind (\b u -> pure (a b) u) (y t)) (x inp);
fmap f x = ap (pure f) x;
alt x y = \inp -> (x inp) (y inp) just;
liftaa f x y = ap (fmap f x) y;
many = @Y \r p -> alt (liftaa @: p (r p)) (pure @K);
some p = liftaa @: p (many p);
liftki = liftaa (@K @I);
liftk = liftaa @K;
between x y p = liftki x (liftk p y);
sat f inp = inp @K (\h t -> f h (pure h t) @K);
char c = sat (\x -> x(c @=));

lcr s   = \a b c d -> a s;
lcv v   = \a b c d -> b v;
lca x y = \a b c d -> c x y;
lcl x y = \a b c d -> d x y;
raw x = lcr (@: x);

com = liftki (char #-) (liftk (char #-) (many (sat (\c -> @C (c(#
@=))))));
sp = many (alt (char # ) (alt (char #
) com));
spc f = liftk f sp;
spch = @B spc char;
var = spc (some (sat (\x -> or
  (and (#z(x @L)) (x(#a @L)))
  (and (#Z(x @L)) (x(#A @L)))
  )));
pre = liftki (char #@) (fmap (@:) (spc (sat (@K @K))));
lam r = liftki (spch #\) (liftaa (@C (foldr lcl)) (some var) (liftki (char #-) (liftki (spch #>) r)));
listify = fmap (foldr (\h t -> lca (lca (raw #:) h) t) (raw #K));
escchar = liftki (char #\) (alt (sat (\c -> or (c(#" @=)) (or (c(#\ @=)) (c(#' @=))))) (fmap (@K #
) (char #n)));
litone delim = fmap (\x -> lcr (@B (@: ##) (@: x))) (alt escchar (sat (\c -> @C (delim(c @=)))));
litstr = listify (between (char #") (spch #") (many (litone #")));
litchar = between (char #') (spch #') (litone #');
lit = alt litstr litchar;
atom r = alt (fmap lcv var) (alt (between (spch #() (spch #)) r) (alt (fmap lcr pre) (alt (lam r) lit)));

apps = @Y \f r -> alt (liftaa @T (atom r) (fmap (\vs v x -> vs (lca x v)) (f r))) (pure @I);
expr = @Y \r -> liftaa @T (atom r) (apps r);
def = liftaa pair var (liftaa (@C (foldr lcl)) (many var) (liftki (spch #=) expr));
program = liftki sp (some (liftk def (spch #;)));

ze   = \    a b c d e -> a;
su   = \x   a b c d e -> b x;
pass = \x   a b c d e -> c x;
la   = \x   a b c d e -> d x;
app  = \x y a b c d e -> e x y;

debruijn = @Y \r n e -> e
  (\s -> pass (lcr s))
  (\v -> foldr (\h m -> lsteq h v ze (su m)) (pass (lcv v)) n)
  (\x y -> app (r n x) (r n y))
  (\s t -> la (r (@: s n) t))
  ;

defer = \a b c d -> a;
closed = \t a b c d -> b t;
need = \x a b c d -> c x;
weak = \x a b c d -> d x;

ldef = \r y -> y
  (need (closed (lca (lca (raw #S) (raw #I)) (raw #I))))
  (\d -> need (closed (lca (raw #T) d)))
  (\e -> need (r (closed (lca (raw #S) (raw #I))) e))
  (\e -> need (r (closed (raw #T)) e))
  ;

lclo = \r d y -> y
  (need (closed d))
  (\dd -> closed (lca d dd))
  (\e -> need (r (closed (lca (raw #B) d)) e))
  (\e -> weak (r (closed d) e))
  ;

lnee = \r e y -> y
  (need (r (r (closed (raw #S)) e) (closed (raw #I))))
  (\d -> need (r (closed (lca (raw #R) d)) e))
  (\ee -> need (r (r (closed (raw #S)) e) ee))
  (\ee -> need (r (r (closed (raw #C)) e) ee))
  ;

lwea = \r e y -> y
  (need e)
  (\d -> weak (r e (closed d)))
  (\ee -> need (r (r (closed (raw #B)) e) ee))
  (\ee -> weak (r e ee))
  ;

babsa = @Y \r x y -> x
  (ldef r y)
  (\d -> lclo r d y)
  (\e -> lnee r e y)
  (\e -> lwea r e y)
  ;

babs = @Y \r t -> t
  defer
  (@B weak r)
  closed
  (\t -> r t
    (closed (raw #I))
    (\d -> closed (lca (raw #K) d))
    @I
    (babsa (closed (raw #K))))
  (\x y -> babsa (r x) (r y))
  ;

nolam x = babs (debruijn @K x) @? @I @? @?;

-- Code generator.
rank ds v = foldr (\d t -> lsteq v (d @K) (\n -> @B (@:#@) (@:n)) (@B t \n -> #0(#1 @-)(n @+))) @? ds # ;
shows f = @Y \r t -> t @I f (\x y -> @B (@B (@:#`) (r x)) (r y)) @?;
dump tab = foldr (\h t -> shows (rank tab) (nolam (h (@K @I))) (@:#;t)) @K tab;
main s = program s (@:#?@K) (@B dump (@T @K));

Binary

Lists; binary operators on the right-hand side.

We can now write xs <> ys in expressions, though the function itself must be defined with (<>) xs ys = …​.

------------------------------------------------------------------------
-- Operators. Lists.
------------------------------------------------------------------------

or f g x y = f x (g x y);
and f g x y = @C f y (g x y);
lstEq = @Y \r xs ys a b -> xs (ys a (\u u -> b)) (\x xt -> ys b (\y yt -> x(y(@=)) (r xt yt a b) b));
pair x y f = f x y;
Just x f g = g x;
Nothing f g = f;
foldr = @Y \r c n l -> l n (\h t -> c h(r c n t));
foldl = \f a bs -> foldr (\b g x -> g (f x b)) @I bs a;
foldlOne = \f bs -> bs @? (\h t -> foldl f h t);
elem k xs = \a b -> foldr (\x t -> (x(k(@=))) a t) b xs;
append = @C (foldr @:);

pure x inp = Just (pair x inp);
bind f m = m @K (\x -> x f);
ap x y = \inp -> bind (\a t -> bind (\b u -> pure (a b) u) (y t)) (x inp);
fmap f x = ap (pure f) x;
alt x y = \inp -> (x inp) (y inp) Just;
liftaa f x y = ap (fmap f x) y;
many = @Y \r p -> alt (liftaa @: p (r p)) (pure @K);
some p = liftaa @: p (many p);
liftKI = liftaa (@K @I);
liftK = liftaa @K;
between x y p = liftKI x (liftK p y);
sat f inp = inp @K (\h t -> f h (pure h t) @K);
char c = sat (\x -> x(c(@=)));

R s   = \a b c d -> a s;
V v   = \a b c d -> b v;
A x y = \a b c d -> c x y;
L x y = \a b c d -> d x y;
raw s = R (append s);

com = between (liftKI (char '-') (char '-')) (char '\n') (many (sat (\c -> @C (c('\n' @=)))));
sp = many (alt (char ' ') (alt (char '\n') com));
spc f = liftK f sp;
spch = @B spc char;
paren = between (spch '(') (spch ')');
letter = sat (\x -> or
  (and ('z'(x(@L))) (x('a'(@L))))
  (and ('Z'(x(@L))) (x('A'(@L))))
  );
digit = sat (\x -> and ('9'(x @L)) (x('0' @L)));
varId = spc (liftaa @: letter (many (alt letter digit)));
varSym = spc (some (sat (@C elem ":!#$%&*+./<=>?@\\^|-~")));
op = alt varSym (between (spch '`') (spch '`') varId);
var = alt varId (paren varSym);
pre = liftKI (char '@') (fmap (@:) (spc (sat (@K @K))));
lam r = liftKI (spch '\\') (liftaa (@C (foldr L)) (some var) (liftKI (char '-') (liftKI (spch '>') r)));
listify = fmap (foldr (\h t -> A (A (R (@:':')) h) t) (R (@:'K')));
escchar = liftKI (char '\\') (alt (sat (\c -> or (c('"'(@=))) (or (c('\\'(@=))) (c('\''(@=)))))) (fmap (@K '\n') (char 'n')));
litOne delim = fmap (\c -> R (@B (@:'#') (@:c))) (alt escchar (sat (\c -> @C (delim(c @=)))));
litStr = listify (between (char '"') (spch '"') (many (litOne '"')));
litChar = between (char '\'') (spch '\'') (litOne '\'');
lit = alt litStr litChar;
sepByOne p sep = liftaa (@:) p (many (liftKI sep p));
sepBy p sep = alt (sepByOne p sep) (pure @K);
sqLst r = listify (between (spch '[') (spch ']') (sepBy r (spch ',')));
atom r = alt (paren r) (alt (fmap V var) (alt (fmap R pre) (alt (lam r) (alt lit (sqLst r)))));
aexp r = fmap (foldlOne A) (some (atom r));
expr = @Y \r -> liftaa (foldl @T) (aexp r) (many (liftaa (\f b a -> A (A (V f) a) b) op (aexp r)));
def = liftaa pair var (liftaa (@C (foldr L)) (many var) (liftKI (spch '=') expr));
program = liftKI sp (some (liftK def (spch ';')));

Ze   = \    a b c d e -> a;
Su   = \x   a b c d e -> b x;
Pass = \x   a b c d e -> c x;
La   = \x   a b c d e -> d x;
App  = \x y a b c d e -> e x y;

debruijn = @Y \r n e -> e
  (\s -> Pass (R s))
  (\v -> foldr (\h m -> lstEq h v Ze (Su m)) (Pass (V v)) n)
  (\x y -> App (r n x) (r n y))
  (\s t -> La (r (@: s n) t))
  ;

Defer = \a b c d -> a;
Closed = \t a b c d -> b t;
Need = \x a b c d -> c x;
Weak = \x a b c d -> d x;

ldef = \r y -> y
  (Need (Closed (raw "``SII")))
  (\d -> Need (Closed (A (raw "T") d)))
  (\e -> Need (r (Closed (raw "`SI")) e))
  (\e -> Need (r (Closed (raw "T")) e))
  ;

lclo = \r d y -> y
  (Need (Closed d))
  (\dd -> Closed (A d dd))
  (\e -> Need (r (Closed (A (raw "B") d)) e))
  (\e -> Weak (r (Closed d) e))
  ;

lnee = \r e y -> y
  (Need (r (r (Closed (raw "S")) e) (Closed (raw "I"))))
  (\d -> Need (r (Closed (A (raw "R") d)) e))
  (\ee -> Need (r (r (Closed (raw "S")) e) ee))
  (\ee -> Need (r (r (Closed (raw "C")) e) ee))
  ;

lwea = \r e y -> y
  (Need e)
  (\d -> Weak (r e (Closed d)))
  (\ee -> Need (r (r (Closed (raw "B")) e) ee))
  (\ee -> Weak (r e ee))
  ;

babsa = @Y \r x y -> x
  (ldef r y)
  (\d -> lclo r d y)
  (\e -> lnee r e y)
  (\e -> lwea r e y)
  ;

babs = @Y \r t -> t
  Defer
  (@B Weak r)
  Closed
  (\t -> r t
    (Closed (raw "I"))
    (\d -> Closed (A (raw "K") d))
    @I
    (babsa (Closed (raw "K"))))
  (\x y -> babsa (r x) (r y))
  ;

nolam x = babs (debruijn @K x) @? @I @? @?;
insPrim = @B (@: (pair ":" (raw ":"))) (@B (@: (pair "<=" (raw "``BT`TL"))) (@: (pair "==" (raw "``BT`T="))));
rank ds v = foldr (\d t -> lstEq v (d @K) (\n -> @B (@:'@') (@:n)) (@B t \n -> '0'('1'@-)(n @+))) @? ds ' ';
shows f = @Y \r t -> t @I f (\x y -> @B (@B (@:'`') (r x)) (r y)) @?;
dump tab = foldr (\h t -> shows (rank tab) (nolam (h (@K @I))) (@:';'t)) "" tab;
main s = program s "?" (@B (@B dump insPrim) (@T @K));

Algebraically

Algebraic data types, sections, case expressions, recursive definitions, but not mutually recursive definitions.

Because of the simplistic way we convert case expressions to lambda calculus, our compiler expects case expressions to list out each data constructor in the order they are given in their data declaration.

We pay a heavy price for simplicity. When a case expression is evaluated, we copy the address of each alternative to the stack, only to eventually eliminate all but one. Furthermore, we copy and delete these addresses by evaluating intricate sequences of B and K combinators.

------------------------------------------------------------------------
-- ADTs. Recursion. Case expressions. Sections.
------------------------------------------------------------------------

(.) f g x = f (g x);
(||) f g x y = f x (g x y);
(&&) f g x y = @C f y (g x y);
lstEq = @Y \r xs ys a b -> xs (ys a (\u u -> b)) (\x xt -> ys b (\y yt -> (x == y) (r xt yt a b) b));
pair x y f = f x y;
fst p = p (\x y -> x);
snd p = p (\x y -> y);
Just x f g = g x;
Nothing f g = f;
foldr = @Y \r c n l -> l n (\h t -> c h(r c n t));
foldl = \f a bs -> foldr (\b g x -> g (f x b)) @I bs a;
foldl1 f bs = bs @? (\h t -> foldl f h t);
elem k xs = \a b -> foldr (\x t -> (x == k) a t) b xs;
(++) = @C (foldr @:);
concat = foldr (++) [];
wrap c = c:[];

R s   = \a b c d -> a s;
V v   = \a b c d -> b v;
A x y = \a b c d -> c x y;
L x y = \a b c d -> d x y;

pure x inp = Just (pair x inp);
bind f m = m @K (\x -> x f);
ap x y = \inp -> bind (\a t -> bind (\b u -> pure (a b) u) (y t)) (x inp);
(<*>) = ap;
fmap f x = ap (pure f) x;
(<$>) = fmap;
(<|>) x y = \inp -> (x inp) (y inp) Just;
liftA2 f x y = ap (fmap f x) y;
(*>) = liftA2 (@K @I);
(<*) = liftA2 @K;
many = @Y \r p -> liftA2 (:) p (r p) <|> pure @K;
some p = liftA2 (:) p (many p);
sepBy1 p sep = liftA2 (:) p (many (sep *> p));
sepBy p sep = sepBy1 p sep <|> pure [];
between x y p = x *> (p <* y);
sat f inp = inp @K (\h t -> f h (pure h t) @K);

char c = sat \x -> x == c;
com = char '-' *> char '-' <* many (sat (\c -> @C (c == '\n')));
sp = many (char ' ' <|> char '\n' <|> com);
spc f = f <* sp;
spch = @B spc char;
satHelper f = \h t -> f h (pure h t) Nothing;
wantWith pred f inp = bind (satHelper pred) (f inp);
want f s inp = wantWith (lstEq s) f inp;  -- bind (satHelper (lstEq s)) (f inp);

paren = between (spch '(') (spch ')');
letter = sat \x -> ((x <= 'z') && ('a' <= x)) || ((x <= 'Z') && ('A' <= x));
digit = sat \x -> (x <= '9') && ('0' <= x);
varLex = liftA2 (:) letter (many (letter <|> digit));
keyword s = spc (want varLex s);
varId = spc (wantWith (@C . lstEq "of") varLex);
opLex = some (sat (@C elem ":!#$%&*+./<=>?@\\^|-~"));
op = spc opLex <|> between (spch '`') (spch '`') varId;
var = varId <|> paren (spc opLex);
pre = char '@' *> fmap wrap (spc (sat (@K @K)));
lam r = spch '\\' *> liftA2 (@C (foldr L)) (some varId) (char '-' *> (spch '>' *> r));
listify = fmap (foldr (\h t -> A (A (R ":") h) t) (R "K"));
escChar = char '\\' *> ((sat (\c -> elem c "'\"\\")) <|> (@K '\n' <$> char 'n'));
litOne delim = fmap (@B R (@B (@: '#') wrap)) (escChar <|> sat (\c -> @C (c == delim)));
litStr = listify (between (char '"') (spch '"') (many (litOne '"')));
litChar = between (char '\'') (spch '\'') (litOne '\'');
lit = litStr <|> litChar;
sqLst r = listify (between (spch '[') (spch ']') (sepBy r (spch ',')));
alt r = (var <|> sqLst r) *> (@C (foldr L) <$> many varId <*> (want op "->" *> r));
alts r = between (spch '{') (spch '}') (sepBy (alt r) (spch ';'));
altize h t = foldl A h t;
cas r = altize <$> between (keyword "case") (keyword "of") r <*> alts r;

singleOrLeftSect r = @T <$> r <*> (((\v a -> A (V v) a) <$> op) <|> pure @I);
rightSect r = (\v a -> A (A (R "C") (V v)) a) <$> op <*> r;
section r = paren (singleOrLeftSect r <|> rightSect r);

atom r = sqLst r <|> section r <|> cas r <|> lam r <|> fmap R pre <|> fmap V var <|> lit;
aexp r = fmap (foldl1 A) (some (atom r));
expr = @Y \r -> liftA2 (foldl @T) (aexp r) (many (liftA2 (\f b a -> A (A (V f) a) b) op (aexp r)));

isFree = @Y \r v expr -> expr
  (\s -> @K @I)
  (\s -> lstEq s v)
  (\x y -> r v x || r v y)
  (\w t -> @C ((lstEq v w) || @C (r v t)))
  ;
maybeFix s x = pair s (isFree s x (A (R "Y") (L s x)) x);
def = liftA2 maybeFix var (liftA2 (@C (foldr L)) (many var) (spch '=' *> expr));

aType = paren (some var) <|> var <|> between (spch '[') (spch ']') aType;
map = @C (foldr . ((.) (:))) [];
dataDefs cs = map (\cas -> cas (\c as -> pair c (foldr L (foldl (\a b -> A a (V b)) (V c) as) (as ++ map fst cs)))) cs;

dataArgs = (snd . foldl (\p u -> p (\s l -> pair ('x':s) (s : l))) (pair "x" [])) <$> many aType;
adt = between (keyword "data") (spch '=') (some var) *> (dataDefs <$> (sepBy (pair <$> var <*> dataArgs) (spch '|')));
program = sp *> (concat <$> sepBy (adt <|> (wrap <$> def)) (spch ';'));
lstLookup s = foldr (\h t -> h (\k v -> lstEq s k (Just v) t)) Nothing;
second f p = p \x y -> pair x (f y);

Ze   = \    a b c d e -> a;
Su   = \x   a b c d e -> b x;
Pass = \x   a b c d e -> c x;
La   = \x   a b c d e -> d x;
App  = \x y a b c d e -> e x y;

debruijn = @Y \r n e -> e
  (\s -> Pass (R s))
  (\v -> foldr (\h m -> lstEq h v Ze (Su m)) (Pass (V v)) n)
  (\x y -> App (r n x) (r n y))
  (\s t -> La (r (s:n) t))
  ;

Defer = \a b c d -> a;
Closed = \t a b c d -> b t;
Need = \x a b c d -> c x;
Weak = \x a b c d -> d x;

ldef = \r y -> y
  (Need (Closed (A (A (R "S") (R "I")) (R "I"))))
  (\d -> Need (Closed (A (R "T") d)))
  (\e -> Need (r (Closed (A (R "S") (R "I"))) e))
  (\e -> Need (r (Closed (R "T")) e))
  ;

lclo = \r d y -> y
  (Need (Closed d))
  (\dd -> Closed (A d dd))
  (\e -> Need (r (Closed (A (R "B") d)) e))
  (\e -> Weak (r (Closed d) e))
  ;

lnee = \r e y -> y
  (Need (r (r (Closed (R "S")) e) (Closed (R "I"))))
  (\d -> Need (r (Closed (A (R "R") d)) e))
  (\ee -> Need (r (r (Closed (R "S")) e) ee))
  (\ee -> Need (r (r (Closed (R "C")) e) ee))
  ;

lwea = \r e y -> y
  (Need e)
  (\d -> Weak (r e (Closed d)))
  (\ee -> Need (r (r (Closed (R "B")) e) ee))
  (\ee -> Weak (r e ee))
  ;

babsa = @Y \r x y -> x
  (ldef r y)
  (\d -> lclo r d y)
  (\e -> lnee r e y)
  (\e -> lwea r e y)
  ;

babs = @Y \r t -> t
  Defer
  (@B Weak r)
  Closed
  (\t -> r t
    (Closed (R "I"))
    (\d -> Closed (A (R "K") d))
    @I
    (babsa (Closed (R "K"))))
  (\x y -> babsa (r x) (r y))
  ;

nolam x = babs (debruijn [] x) @? @I @? @?;
insPrim = (++) (map (second R) (pair ":" ":" : (pair "succ" "`T`(1)+"
  : map (second ((++) "``BT`T")) [pair "<=" "L", pair "==" "=", pair "-" "-", pair "+" "+"])));
rank ds v = foldr (\d t -> lstEq v (d @K) (\n -> (@:'@') . (@:n)) (@B t \n -> '0'('1'@-)(n @+))) @? ds ' ';
shows f = @Y \r t -> t (++) f (\x y -> (@:'`') . r x . r y) @?;
dump tab = foldr (\h t -> shows (rank tab) (nolam (h (@K @I))) (';':t)) "" tab;
main s = program s "?" (dump . insPrim . fst);

Parity

Achievement unlocked. GHC accepts our next compiler if we insert the following preamble:

{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
import Prelude ((+), (-), (*), Char, Int, String, succ)
import Data.Char (chr, ord)
import qualified Prelude
a <= b = if a Prelude.<= b then True else False
(/) = Prelude.div
(%) = Prelude.mod
class Eq a where { (==) :: a -> a -> Bool };
instance Eq Char where { (==) x y = if (x Prelude.== y) then True else False };
instance Eq Int where { (==) x y = if (x Prelude.== y) then True else False };

We can now develop using GHC with its powerful type checking and friendly error messages. Naturally, we switch back to our compiler when it all works, though we must be mindful that in our language, all operators have the same precedence, every identifier in an expression we’re parsing must have already been defined, and case expressions require all data constructors to appear exactly once and in order.

We drop support for the @ prefix. Our language has advanced enough that we no longer need direct access to primitive combinators.

This compiler supports integer constants. We’ve survived without them for so long because the succ function has been enough for our numerical needs so far.

------------------------------------------------------------------------
-- Accepted by GHC, with a small wrapper.
--
-- Integer constants.
------------------------------------------------------------------------
data Bool = True | False;
ife a b c = case a of { True -> b ; False -> c };
not a = case a of { True -> False; False -> True };
(.) f g x = f (g x);
(||) f g = ife f True g;
(&&) f g = ife f g False;
flst xs n c = case xs of { [] -> n; (:) h t -> c h t };
lstEq xs ys = case xs of
  { [] -> flst ys True (\h t -> False)
  ; (:) x xt -> flst ys False (\y yt -> ife (x == y) (lstEq xt yt) False)
  };
id x = x;
flip f x y = f y x;
(&) x f = f x;
foldr c n l = flst l n (\h t -> c h(foldr c n t));
foldl = \f a bs -> foldr (\b g x -> g (f x b)) (\x -> x) bs a;
undefined = undefined;
foldl1 f bs = flst bs undefined (\h t -> foldl f h t);
elem k xs = foldr (\x t -> ife (x == k) True t) False xs;
(++) = flip (foldr (:));
concat = foldr (++) [];
wrap c = c:[];
data Pair x y = Pair x y;
fpair p = \f -> case p of { Pair x y -> f x y };
fst p = case p of { Pair x y -> x };
snd p = case p of { Pair x y -> y };
second f p = fpair p \x y -> Pair x (f y);
data Maybe a = Nothing | Just a;
fmaybe m n j = case m of { Nothing -> n; Just x -> j x };

pure x = \inp -> Just (Pair x inp);
bind f m = case m of
  { Nothing -> Nothing
  ; Just x -> fpair x f
  };
ap x y = \inp -> bind (\a t -> bind (\b u -> pure (a b) u) (y t)) (x inp);
(<*>) = ap;
fmap f x = ap (pure f) x;
(<$>) = fmap;
(<|>) x y = \inp -> case x inp of
  { Nothing -> y inp
  ; Just x -> Just x
  };
liftA2 f x y = ap (fmap f x) y;
(*>) = liftA2 \x y -> y;
(<*) = liftA2 \x y -> x;
many p = liftA2 (:) p (many p) <|> pure [];
some p = liftA2 (:) p (many p);
sepBy1 p sep = liftA2 (:) p (many (sep *> p));
sepBy p sep = sepBy1 p sep <|> pure [];
between x y p = x *> (p <* y);
satHelper f = \h t -> ife (f h) (pure h t) Nothing;
sat f inp = flst inp Nothing (satHelper f);

data Ast = R String | V String | A Ast Ast | L String Ast;

char c = sat \x -> x == c;
com = char '-' *> between (char '-') (char '\n') (many (sat \c -> not (c == '\n')));
sp = many ((wrap <$> (sat (\c -> (c == ' ') || (c == '\n')))) <|> com);
spc f = f <* sp;
spch = spc . char;
wantWith pred f inp = bind (satHelper pred) (f inp);
want f s inp = wantWith (lstEq s) f inp;
paren = between (spch '(') (spch ')');
letter = sat \x -> ((x <= 'z') && ('a' <= x)) || ((x <= 'Z') && ('A' <= x));
digit = sat \x -> (x <= '9') && ('0' <= x);
varLex = liftA2 (:) letter (many (letter <|> digit));
keyword s = spc (want varLex s);
varId = spc (wantWith (not . lstEq "of") varLex);
opLex = some (sat (\c -> elem c ":!#$%&*+./<=>?@\\^|-~"));
op = spc opLex <|> between (spch '`') (spch '`') varId;
var = varId <|> paren (spc opLex);
lam r = spch '\\' *> liftA2 (flip (foldr L)) (some varId) (char '-' *> (spch '>' *> r));
listify = fmap (foldr (\h t -> A (A (R ":") h) t) (R "K"));
escChar = char '\\' *> ((sat (\c -> elem c "'\"\\")) <|> ((\c -> '\n') <$> char 'n'));
litOne delim = fmap (\c -> R ('#':wrap c)) (escChar <|> sat (\c -> not (c == delim)));
litInt = R . ('(':) . (++ ")") <$> spc (some digit);
litStr = listify (between (char '"') (spch '"') (many (litOne '"')));
litChar = between (char '\'') (spch '\'') (litOne '\'');
lit = litStr <|> litChar <|> litInt;
sqLst r = listify (between (spch '[') (spch ']') (sepBy r (spch ',')));
alt r = (var <|> (undefined <$> sqLst r) <|> (undefined <$> paren (spch ','))) *> (flip (foldr L) <$> many varId <*> (want op "->" *> r));
alts r = between (spch '{') (spch '}') (sepBy (alt r) (spch ';'));
altize h t = foldl A h t;
cas r = altize <$> between (keyword "case") (keyword "of") r <*> alts r;

thenComma r = spch ',' *> (((\x y -> A (A (V ",") y) x) <$> r) <|> pure (A (V ",")));
parenExpr r = (&) <$> r <*> (((\v a -> A (V v) a) <$> op) <|> thenComma r <|> pure id);
rightSect r = ((\v a -> A (A (R "C") (V v)) a) <$> (op <|> (wrap <$> spch ','))) <*> r;
section r = paren (parenExpr r <|> rightSect r);

atom r = sqLst r <|> section r <|> cas r <|> lam r <|> (paren (spch ',') *> pure (V ",")) <|> fmap V var <|> lit;
aexp r = fmap (foldl1 A) (some (atom r));
expr = liftA2 (foldl (&)) (aexp expr) (many (liftA2 (\f b a -> A (A (V f) a) b) op (aexp expr)));

isFree v expr = case expr of
  { R s -> False
  ; V s -> lstEq s v
  ; A x y -> isFree v x || isFree v y
  ; L w t -> not ((lstEq v w) || not (isFree v t))
  };
maybeFix s x = Pair s (ife (isFree s x) (A (R "Y") (L s x)) x);
def = liftA2 maybeFix var (liftA2 (flip (foldr L)) (many var) (spch '=' *> expr));

aType = paren (some var) <|> (undefined <$> var) <|> (undefined <$> between (spch '[') (spch ']') aType);
map = flip (foldr . ((.) (:))) [];
dataDefs cs = map (\cas -> fpair cas (\c as -> Pair c (foldr L (foldl (\a b -> A a (V b)) (V c) as) (as ++ map fst cs)))) cs;

dataArgs = (snd . foldl (\p u -> fpair p (\s l -> Pair ('x':s) (s : l))) (Pair "x" [])) <$> many aType;
adt = between (keyword "data") (spch '=') (some var) *> (dataDefs <$> (sepBy (Pair <$> var <*> dataArgs) (spch '|')));
program = sp *> (concat <$> sepBy (adt <|> (wrap <$> def)) (spch ';'));

data LC = Ze | Su LC | Pass Ast | La LC | App LC LC;

debruijn n e = case e of
  { R s -> Pass (R s)
  ; V v -> foldr (\h m -> ife (lstEq h v) Ze (Su m)) (Pass (V v)) n
  ; A x y -> App (debruijn n x) (debruijn n y)
  ; L s t -> La (debruijn (s:n) t)
  };

data Sem = Defer | Closed Ast | Need Sem | Weak Sem;

ldef = \r y -> case y of
  { Defer -> Need (Closed (A (A (R "S") (R "I")) (R "I")))
  ; Closed d -> Need (Closed (A (R "T") d))
  ; Need e -> Need (r (Closed (A (R "S") (R "I"))) e)
  ; Weak e -> Need (r (Closed (R "T")) e)
  };

lclo = \r d y -> case y of
  { Defer -> Need (Closed d)
  ; Closed dd -> Closed (A d dd)
  ; Need e -> Need (r (Closed (A (R "B") d)) e)
  ; Weak e -> Weak (r (Closed d) e)
  };

lnee = \r e y -> case y of
  { Defer -> Need (r (r (Closed (R "S")) e) (Closed (R "I")))
  ; Closed d -> Need (r (Closed (A (R "R") d)) e)
  ; Need ee -> Need (r (r (Closed (R "S")) e) ee)
  ; Weak ee -> Need (r (r (Closed (R "C")) e) ee)
  };

lwea = \r e y -> case y of
  { Defer -> Need e
  ; Closed d -> Weak (r e (Closed d))
  ; Need ee -> Need (r (r (Closed (R "B")) e) ee)
  ; Weak ee -> Weak (r e ee)
  };

babsa x y = case x of
  { Defer -> ldef babsa y
  ; Closed d -> lclo babsa d y
  ; Need e -> lnee babsa e y
  ; Weak e -> lwea babsa e y
  };

babs t = case t of
  { Ze -> Defer
  ; Su x -> Weak (babs x)
  ; Pass s -> Closed s
  ; La t -> case babs t of
    { Defer -> Closed (R "I")
    ; Closed d -> Closed (A (R "K") d)
    ; Need e -> e
    ; Weak e -> babsa (Closed (R "K")) e
    }
  ; App x y -> babsa (babs x) (babs y)
  };

nolam x = case babs (debruijn [] x) of
  { Defer -> undefined
  ; Closed d -> d
  ; Need e -> undefined
  ; Weak e -> undefined
  };

insPrim = (map (second R) ([Pair ":" ":", Pair "," "``BCT", Pair "ord" "I", Pair "succ" "`T`(1)+"] ++ map (second ("``BT`T" ++)) [Pair "<=" "L", Pair "==" "=", Pair "-" "-", Pair "+" "+", Pair "*" "*"]) ++);
rank ds v = foldr (\d t -> ife (lstEq v (fst d)) (\n -> ('@':) . (n:)) (t . succ)) undefined ds ' ';
shows f t = case t of
  { R s -> (s++)
  ; V v -> f v
  ; A x y -> ('`':) . shows f x . shows f y
  ; L w t -> undefined
  };
dump tab = foldr (\h t -> shows (rank tab) (nolam (snd h)) (';':t)) "" tab;
compile s = fmaybe (program s) "?" (dump . insPrim . fst);

Fixity

This compiler supports infix, infixl, infixr declarations at the beginning of the source.

------------------------------------------------------------------------
-- Supports infix declarations.
--
-- All infix declarations must precede the definitions.
--
-- Supports let expressions.
------------------------------------------------------------------------
data Bool = True | False;
data Maybe a = Nothing | Just a;
ife a b c = case a of { True -> b ; False -> c };
not a = case a of { True -> False; False -> True };
(.) f g x = f (g x);
(||) f g = ife f True g;
(&&) f g = ife f g False;
flst xs n c = case xs of { [] -> n; (:) h t -> c h t };
lstEq xs ys = case xs of
  { [] -> flst ys True (\h t -> False)
  ; (:) x xt -> flst ys False (\y yt -> ife (x == y) (lstEq xt yt) False)
  };
id x = x;
flip f x y = f y x;
(&) x f = f x;
foldr c n l = flst l n (\h t -> c h(foldr c n t));
foldl = \f a bs -> foldr (\b g x -> g (f x b)) (\x -> x) bs a;
undefined = undefined;
foldl1 f bs = flst bs undefined (\h t -> foldl f h t);
elem k xs = foldr (\x t -> ife (x == k) True t) False xs;
find f xs = foldr (\x t -> ife (f x) (Just x) t) Nothing xs;
(++) = flip (foldr (:));
concat = foldr (++) [];
wrap c = c:[];

fst p = case p of { (,) x y -> x };
snd p = case p of { (,) x y -> y };
uncurry f p = case p of { (,) x y -> f x y };
second f = uncurry \x y -> (x, f y);
maybe n j m = case m of { Nothing -> n; Just x -> j x };

pure x = \inp -> Just (x, inp);
bind f = maybe Nothing (uncurry f);
ap x y = \inp -> bind (\a t -> bind (\b u -> pure (a b) u) (y t)) (x inp);
(<*>) = ap;
fmap f x = ap (pure f) x;
(<$>) = fmap;
(>>=) x y = \inp -> bind (\a t -> y a t) (x inp);
(<|>) x y = \inp -> case x inp of
  { Nothing -> y inp
  ; Just x -> Just x
  };
liftA2 f x y = ap (fmap f x) y;
(*>) = liftA2 \x y -> y;
(<*) = liftA2 \x y -> x;
many p = liftA2 (:) p (many p) <|> pure [];
some p = liftA2 (:) p (many p);
sepBy1 p sep = liftA2 (:) p (many (sep *> p));
sepBy p sep = sepBy1 p sep <|> pure [];
between x y p = x *> (p <* y);
satHelper f = \h t -> ife (f h) (pure h t) Nothing;
sat f inp = flst inp Nothing (satHelper f);

char c = sat \x -> x == c;
com = char '-' *> char '-' <* many (sat \c -> not (c == '\n'));
sp = many ((wrap <$> (sat (\c -> (c == ' ') || (c == '\n')))) <|> com);
spc f = f <* sp;
spch = spc . char;
wantWith pred f inp = bind (satHelper pred) (f inp);
want f s inp = wantWith (lstEq s) f inp;
paren = between (spch '(') (spch ')');
small = sat \x -> ((x <= 'z') && ('a' <= x)) || (x == '_');
large = sat \x -> (x <= 'Z') && ('A' <= x);
digit = sat \x -> (x <= '9') && ('0' <= x);
varLex = liftA2 (:) small (many (small <|> large <|> digit <|> char '\''));
conId = spc (liftA2 (:) large (many (small <|> large <|> digit <|> char '\'')));
keyword s = spc (want varLex s);
varId = spc (wantWith (not . lstEq "of") varLex);
opLex = some (sat (\c -> elem c ":!#$%&*+./<=>?@\\^|-~"));
op = spc opLex <|> between (spch '`') (spch '`') varId;
var = varId <|> paren (spc opLex);

data Ast = R String | V String | A Ast Ast | L String Ast;

lam r = spch '\\' *> liftA2 (flip (foldr L)) (some varId) (char '-' *> (spch '>' *> r));
listify = fmap (foldr (\h t -> A (A (R ":") h) t) (R "K"));
escChar = char '\\' *> ((sat (\c -> elem c "'\"\\")) <|> ((\c -> '\n') <$> char 'n'));
litOne delim = fmap (\c -> R ('#':(wrap c))) (escChar <|> sat (\c -> not (c == delim)));
litInt = R . ('(':) . (++ ")") <$> spc (some digit);
litStr = listify (between (char '"') (spch '"') (many (litOne '"')));
litChar = between (char '\'') (spch '\'') (litOne '\'');
lit = litStr <|> litChar <|> litInt;
sqLst r = listify (between (spch '[') (spch ']') (sepBy r (spch ',')));
alt r = (conId <|> var <|> (undefined <$> sqLst r) <|> (undefined <$> paren (spch ','))) *> (flip (foldr L) <$> many varId <*> (want op "->" *> r));
braceSep f = between (spch '{') (spch '}') (sepBy f (spch ';'));
alts r = braceSep (alt r);
altize h t = foldl A h t;
cas r = altize <$> between (keyword "case") (keyword "of") r <*> alts r;

thenComma r = spch ',' *> (((\x y -> A (A (V ",") y) x) <$> r) <|> pure (A (V ",")));
parenExpr r = (&) <$> r <*> (((\v a -> A (V v) a) <$> op) <|> thenComma r <|> pure id);
rightSect r = ((\v a -> A (A (R "C") (V v)) a) <$> (op <|> (wrap <$> spch ','))) <*> r;
section r = paren (parenExpr r <|> rightSect r);

isFree v expr = case expr of
  { R s -> False
  ; V s -> lstEq s v
  ; A x y -> isFree v x || isFree v y
  ; L w t -> not ((lstEq v w) || not (isFree v t))
  };
maybeFix s x = (s, ife (isFree s x) (A (R "Y") (L s x)) x);
def r = liftA2 maybeFix var (liftA2 (flip (foldr L)) (many varId) (spch '=' *> r));
addLets ls x = foldr (\p t -> uncurry (\name def -> A (L name t) def) p) x ls;
letin r = addLets <$> between (keyword "let") (keyword "in") (braceSep (def r)) <*> r;

atom r = letin r <|> sqLst r <|> section r <|> cas r <|> lam r <|> (paren (spch ',') *> pure (V ",")) <|> fmap V (conId <|> var) <|> lit;
aexp r = fmap (foldl1 A) (some (atom r));
fix f = f (fix f);
lstLookup s = foldr (\h t -> uncurry (\k v -> ife (lstEq s k) (Just v) t) h) Nothing;

data Assoc = NAssoc | LAssoc | RAssoc;
eqAssoc x y = case x of
  { NAssoc -> case y of { NAssoc -> True  ; LAssoc -> False ; RAssoc -> False }
  ; LAssoc -> case y of { NAssoc -> False ; LAssoc -> True  ; RAssoc -> False }
  ; RAssoc -> case y of { NAssoc -> False ; LAssoc -> False ; RAssoc -> True }
  };
precOf s precTab = maybe 9 fst (lstLookup s precTab);
assocOf s precTab = maybe LAssoc snd (lstLookup s precTab);
opWithPrec precTab n = wantWith (\s -> n == precOf s precTab) op;
opFold precTab e xs = case xs of
  { [] -> e
  ; (:) x xt -> case find (\y -> not (eqAssoc (assocOf (fst x) precTab) (assocOf (fst y) precTab))) xt of
    { Nothing -> case assocOf (fst x) precTab of
      { NAssoc -> case xt of
        { [] -> uncurry (\op y -> A (A (V op) e) y) x
        ; (:) y yt -> undefined
        }
      ; LAssoc -> foldl (\a b -> uncurry (\op y -> A (A (V op) a) y) b) e xs
      ; RAssoc -> (foldr (\a b -> uncurry (\op y -> \e -> A (A (V op) e) (b y)) a) id xs) e
      }
    ; Just y -> undefined
    }
  };
expr precTab = fix \r n -> ife (n <= 9) (liftA2 (opFold precTab) (r (succ n)) (many (liftA2 (\a b -> (a,b)) (opWithPrec precTab n) (r (succ n))))) (aexp (r 0));

aType = (undefined <$> paren (some aType <* ((spch ',' *> some aType) <|> pure []) )) <|> (undefined <$> (conId <|> varId)) <|> (undefined <$> between (spch '[') (spch ']') aType);
map = flip (foldr . ((.) (:))) [];
dataDefs cs = map (\cas -> uncurry (\c as -> (c, foldr L (foldl (\a b -> A a (V b)) (V c) as) (as ++ map fst cs))) cas) cs;

dataArgs = (snd . foldl (\p u -> uncurry (\s l -> ('x':s, s : l)) p) ("x", [])) <$> many aType;
adt = between (keyword "data") (spch '=') (conId *> many varId) *> (dataDefs <$> (sepBy ((,) <$> conId <*> dataArgs) (spch '|')));

prec = (\c -> ord c - ord '0') <$> spc digit;
fixityList a n os = map (\o -> (o, (n, a))) os;
fixityDecl kw a = between (keyword kw) (spch ';') (fixityList a <$> prec <*> sepBy op (spch ','));
fixity = fixityDecl "infix" NAssoc <|> fixityDecl "infixl" LAssoc <|> fixityDecl "infixr" RAssoc;

funs precTab = concat <$> sepBy (adt <|> (wrap <$> def (expr precTab 0))) (spch ';');
program = sp *> (((":", (5, RAssoc)):) . concat <$> many fixity) >>= funs;

data LC = Ze | Su LC | Pass Ast | La LC | App LC LC;

debruijn n e = case e of
  { R s -> Pass (R s)
  ; V v -> foldr (\h m -> ife (lstEq h v) Ze (Su m)) (Pass (V v)) n
  ; A x y -> App (debruijn n x) (debruijn n y)
  ; L s t -> La (debruijn (s:n) t)
  };

data Sem = Defer | Closed Ast | Need Sem | Weak Sem;

ldef = \r y -> case y of
  { Defer -> Need (Closed (A (A (R "S") (R "I")) (R "I")))
  ; Closed d -> Need (Closed (A (R "T") d))
  ; Need e -> Need (r (Closed (A (R "S") (R "I"))) e)
  ; Weak e -> Need (r (Closed (R "T")) e)
  };

lclo = \r d y -> case y of
  { Defer -> Need (Closed d)
  ; Closed dd -> Closed (A d dd)
  ; Need e -> Need (r (Closed (A (R "B") d)) e)
  ; Weak e -> Weak (r (Closed d) e)
  };

lnee = \r e y -> case y of
  { Defer -> Need (r (r (Closed (R "S")) e) (Closed (R "I")))
  ; Closed d -> Need (r (Closed (A (R "R") d)) e)
  ; Need ee -> Need (r (r (Closed (R "S")) e) ee)
  ; Weak ee -> Need (r (r (Closed (R "C")) e) ee)
  };

lwea = \r e y -> case y of
  { Defer -> Need e
  ; Closed d -> Weak (r e (Closed d))
  ; Need ee -> Need (r (r (Closed (R "B")) e) ee)
  ; Weak ee -> Weak (r e ee)
  };

babsa x y = case x of
  { Defer -> ldef babsa y
  ; Closed d -> lclo babsa d y
  ; Need e -> lnee babsa e y
  ; Weak e -> lwea babsa e y
  };

babs t = case t of
  { Ze -> Defer
  ; Su x -> Weak (babs x)
  ; Pass s -> Closed s
  ; La t -> case babs t of
    { Defer -> Closed (R "I")
    ; Closed d -> Closed (A (R "K") d)
    ; Need e -> e
    ; Weak e -> babsa (Closed (R "K")) e
    }
  ; App x y -> babsa (babs x) (babs y)
  };

nolam x = case babs (debruijn [] x) of
  { Defer -> undefined
  ; Closed d -> d
  ; Need e -> undefined
  ; Weak e -> undefined
  };

insPrim = (map (second R) ([(":", ":"), (",", "``BCT"), ("chr", "I"), ("ord", "I"), ("succ", "`T`(1)+")] ++ map (second ("``BT`T" ++)) [("<=", "L"), ("==", "="), ("-", "-"), ("/", "/"), ("%", "%"), ("+", "+"), ("*", "*")]) ++);
rank ds v = foldr (\d t -> ife (lstEq v (fst d)) (\n -> ('@':) . (n:)) (t . (\n -> succ n))) undefined ds ' ';
shows f t = case t of
  { R s -> (s++)
  ; V v -> f v
  ; A x y -> ('`':) . shows f x . shows f y
  ; L w t -> undefined
  };
dump tab = foldr (\h t -> shows (rank tab) (nolam (snd h)) (';':t)) "" tab;
compile s = maybe "?" (dump . insPrim . fst) (program s);

Ben Lynn blynn@cs.stanford.edu 💡