> {-# LANGUAGE TypeSynonymInstances #-} > module HaLay where > import Control.Arrow > import Control.Applicative > import Data.Char > import Data.List > import Data.Traversable > import Control.Monad > import Control.Monad.State > import Debug.Trace > import Parsley ------------------------------------------------------------------------------ Glom a file like this ------------------------------------------------------------------------------ > ready :: String -> [[Tok]] > ready = map (munge exTyMu) . fst . getLines (Seek NoLay "") [] . > tokenize . (,) 0 ------------------------------------------------------------------------------ Stage 1 : lexing ------------------------------------------------------------------------------ > tokenize :: (Int, String) -> [(Int, Tok)] > tokenize = unfoldr (runStateT ((,) <$> gets fst <*> tokIn)) > data Tok > = Lit String > | Ope BK > | Clo BK > | Uid String > | Lid String > | KW String > | Sym String > | Semi > | Spc String > | Com String > | Urk Char > | NL > | B BK [Tok] -- a bracket > | L String [[Tok]] -- some layout > | T Tag [Tok] -- a tagged region > deriving (Show, Eq) > tokOut :: Tok -> String > tokOut t = case t of > Lit s -> s > Ope b -> ope b > Clo b -> clo b > Uid s -> s > Lid s -> s > KW s -> s > Sym s -> s > Semi -> ";" > Spc s -> s > Com s -> s > Urk c -> [c] > NL -> "\n" > B b ts -> ope b ++ toksOut ts ++ clo b > L s tss -> s ++ tokssOut tss > T _ ts -> toksOut ts > toksOut :: [Tok] -> String > toksOut ts = ts >>= tokOut > tokssOut :: [[Tok]] -> String > tokssOut tss = tss >>= toksOut > isSpcT :: Tok -> Bool > isSpcT (Spc _) = True > isSpcT NL = True > isSpcT (Com _) = True > isSpcT _ = False > tokIn :: L Tok > tokIn = Lit <$> ((:) <$> ch '\"' <*> slit) > <|> Com <$ stol <*> ((++) <$> traverse ch "#" <*> spa (not . isNL)) > <|> Sym <$> sym > <|> Com <$> ((++) <$> traverse ch "--" <*> spa (not . isNL)) > <|> Com <$> ((++) <$> traverse ch "{-" <*> comment 1) > <|> Ope Rnd <$ ch '(' > <|> Clo Rnd <$ ch ')' > <|> Ope Sqr <$ ch '[' > <|> Clo Sqr <$ ch ']' > <|> Ope Crl <$ ch '{' > <|> Clo Crl <$ ch '}' > <|> Semi <$ ch ';' > <|> Lit <$> ((:) <$> chk isDigit cha <*> spa isDigit) -- sod FP now > <|> Uid <$> ((:) <$> chk isUpper cha <*> spa isIddy) > <|> klid <$> ((:) <$> chk isLower cha <*> spa isIddy) > <|> Spc <$> ((:) <$> chk isHSpace cha <*> spa isHSpace) > <|> NL <$ chk isNL cha > <|> Urk <$> cha > where > slit = (:) <$> ch '\\' <*> ((:) <$> cha <*> slit) > <|> return <$> ch '\"' > <|> (:) <$> cha <*> slit > klid s = if elem s keywords then KW s else Lid s > comment 0 = pure "" > comment i = (++) <$> traverse ch "{-" <*> comment (i + 1) > <|> (++) <$> traverse ch "-}" <*> comment (i - 1) > <|> (++) <$> ((:) <$> ch '\"' <*> slit) <*> comment i > <|> (:) <$> cha <*> comment i ------------------------------------------------------------------------ Stage 2 : Group according to brackets and layout rules ------------------------------------------------------------------------ > data ChunkMode > = Lay String Int > | Bra BK > | NoLay > deriving (Show, Eq) > getChunks :: ChunkMode -> [Tok] -> [(Int, Tok)] -> ([Tok], [(Int, Tok)]) > getChunks _ acc [] = (reverse acc, []) > getChunks m acc its@((i, t) : its') = case (m, t) of > _ | isSpcT t -> getChunks m (t : acc) its' > (Lay _ j, _) | not (null acc) && i <= j -> (reverse acc, its) > (Lay _ _, Semi) -> (reverse (t : acc), its') > (Lay k _, KW e) | elem (k, e) layDKillaz -> (reverse acc, its) > (Lay _ _, Clo _) -> (reverse acc, its) > (Bra b, Clo b') | b == b' -> (reverse acc, its') > (m, Ope b) -> case getChunks (Bra b) [] its' of > (cs, its) -> getChunks m (B b cs : acc) its > (m, KW e) | elem e lakeys -> case getLines (Seek m e) [] its' of > (css, its) -> getChunks m ((L e css) : acc) its > _ -> getChunks m (t : acc) its' > data LineMode > = Bracing > | Seek ChunkMode String > | Edge String Int > deriving (Show, Eq) > getLines :: LineMode -> [[Tok]] -> [(Int, Tok)] -> ([[Tok]], [(Int, Tok)]) > getLines _ acc [] = (reverse acc, []) > getLines m acc ((_, s) : its) | isSpcT s = eat [s] its where > eat sacc ((_, s) : its) | isSpcT s = eat (s : sacc) its > eat sacc its = getLines m (reverse (splendid (reverse sacc)) ++ acc) its > getLines m acc its@((i, t) : its') = case (m, t) of > (Edge k j, _) > | i == j -> case getChunks (Lay k i) [] its of > ([], its) -> (reverse acc, its) > (cs, its) -> getLines (Edge k i) (reverse (splendid cs) ++ acc) its > | otherwise -> (reverse acc, its) > (Seek m s, Ope Crl) -> getLines Bracing ([Ope Crl] : acc) its' > (Seek (Lay k j) s, _) > | j < i -> getLines (Edge s i) acc its > | otherwise -> (reverse acc, its) > (Seek NoLay s, _) -> getLines (Edge s i) acc its > (Bracing, Clo Crl) -> (reverse ([Clo Crl] : acc), its') > (Bracing, _) -> case getChunks NoLay [] its of > ([], its) -> (reverse acc, its) > (cs, its) -> getLines Bracing (cs : acc) its > layDKillaz :: [(String, String)] > layDKillaz = [("of", "where"), ("do", "where"), ("let", "in")] > splendid :: [Tok] -> [[Tok]] > splendid [] = [[]] > splendid (NL : ts) = case splendid ts of > (ss : sss) | all isSpcT ss -> [] : (NL : ss) : sss > | otherwise -> (NL : ss) : sss > splendid (t : ts) = case splendid ts of > (us : sss) -> (t : us) : sss ------------------------------------------------------------------------------ Stage 3 : tag regions of interest ------------------------------------------------------------------------------ > data Tag = Ty | Ki | Ex deriving (Show, Eq) > tender :: Tok -> Bool > tender (L _ _ ) = True > tender t = elem t [Semi, Sym "=", Sym ",", Sym "|", KW "in"] > exTyMu :: [Tok] -> Maybe [Tok] > exTyMu (t : ts) > | elem t [Sym "::", KW "class", KW "instance"] > = Just $ t : u : munge exTyMu vs > | elem t [KW "data", KW "newtype"] > = Just $ case vs of > Sym "=" : vs -> t : u : Sym "=" : [T Ty vs] -- dodgy or what? > _ -> t : u : munge exTyMu vs > | elem t [KW "type"] > = Just $ case vs of > Sym "=" : vs -> t : u : Sym "=" : [T Ty (munge tyMu vs)] > _ -> t : u : munge exTyMu vs > where > (u, vs) = first (T Ty . munge tyMu) (span (not . tender) ts) > exTyMu _ = Nothing > tyMu :: [Tok] -> Maybe [Tok] > tyMu (Sym "::" : ts) = Just $ Sym "::" : u : munge tyMu vs > where > (u, vs) = first (T Ki . munge kiMu) (span (not . tender) ts) > tyMu (B Crl us : ts) = Just $ > B Crl [T Ex (munge exTyMu us)] : munge tyMu ts > tyMu _ = Nothing > kiMu :: [Tok] -> Maybe [Tok] > kiMu (B Crl us : ts) = Just $ > B Crl [T Ty (munge tyMu us)] : munge kiMu ts > kiMu (KW "forall" : ts) = case span (/= Sym ".") ts of > (ts, us) -> Just $ KW "forall" : T Ty (munge tyMu ts) : munge kiMu us > kiMu _ = Nothing ------------------------------------------------------------------------------ Parsley for layout ------------------------------------------------------------------------------ > spc :: P Tok () > spc = () <$ many (tok isSpcT) > uid :: P Tok String > uid = grok h next where > h (Uid s) = Just s > h _ = Nothing > lid :: P Tok String > lid = grok h next where > h (Lid s) = Just s > h _ = Nothing > infC :: P Tok String > infC = grok h next where > h (Sym (':' : s)) = Just (':' : s) > h _ = Nothing > pBr :: BK -> P Tok x -> P Tok x > pBr k p = grok pb next where > pb (B j cs) | k == j = parse p cs > pb _ = Nothing > pLay :: String -> P [Tok] x -> P Tok x > pLay k p = grok pb next where > pb (L j tss) | k == j = parse p tss > pb _ = Nothing > pTag :: Tag -> P Tok x -> P Tok x > pTag t p = grok pb next where > pb (T u ts) | t == u = parse p ts > pb _ = Nothing ------------------------------------------------------------------------------ Mungers ------------------------------------------------------------------------------ > munge :: ([Tok] -> Maybe [Tok]) -> [Tok] -> [Tok] > munge m ts = case m ts of > Just us -> us > Nothing -> case ts of > [] -> [] > (B b ss : ts) -> B b (munge m ss) : munge m ts > (L k sss : ts) -> L k (map (munge m) sss) : munge m ts > (T t ss : ts) -> T t (munge m ss) : munge m ts > (t : ts) -> t : munge m ts > mungeLines :: ([[Tok]] -> Maybe [[Tok]]) -> ([Tok] -> Maybe [Tok]) -> > [[Tok]] -> [[Tok]] > mungeLines ms m tss = case ms tss of > Just uss -> uss > Nothing -> case tss of > [] -> [] > (ts : tss) -> munge help ts : mungeLines ms m tss > where > help ts = m ts <|> > case ts of > (L k sss : ts) -> Just (L k (mungeLines ms m sss) : munge help ts) > _ -> Nothing > dashOut :: [Tok] -> [Tok] > dashOut ts = [Com ("-- " ++ easy ts)] where > easy [] = [] > easy (B _ _ : _) = [] > easy (L _ _ : _) = [] > easy (t : ts) = tokOut t ++ easy ts > dental :: [[Tok]] -> [Tok] > dental [] = [NL] > dental (l@(NL : _) : (c : _) : _) | not (isSpcT c) = l > dental (l : ls) = dental ls > redent :: [Tok] -> [[Tok]] -> [[Tok]] > redent nl ((NL : _) : tss) = redent nl tss > redent nl (ts : tss) = nl : ts : redent nl tss > redent nl [] = [] > preamble :: [[Tok]] -> [[Tok]] -> [[Tok]] > preamble ls [] = ls > preamble ls ms@(l@(NL : _) : (c : _) : _) | not (isSpcT c) = > redent l ls ++ ms > preamble ls (m : ms) = m : preamble ls ms ------------------------------------------------------------------------------ Classifiers, odds and ends ------------------------------------------------------------------------------ > isNL :: Char -> Bool > isNL b = elem b "\r\n" > isHSpace :: Char -> Bool > isHSpace c = isSpace c && not (isNL c) > isIddy :: Char -> Bool > isIddy b = isAlphaNum b || elem b "_'" > isInfy :: Char -> Bool > isInfy b = elem b "!#$%&*+-.:/<=>?@\\^|~" > data BK = Rnd | Sqr | Crl deriving (Show, Eq) > ope :: BK -> String > ope Rnd = "(" > ope Sqr = "[" > ope Crl = "{" > clo :: BK -> String > clo Rnd = ")" > clo Sqr = "]" > clo Crl = "}" > keywords :: [String] > keywords = ["module", "import", "type", "data", "newtype", "pattern", "kind", > "let", "in", "case", "of", "do", "forall", "class", "instance", > "family", "where", "if", "then", "else"] > lakeys :: [String] > lakeys = ["let", "of", "do", "where"] > width :: [Tok] -> Int > width ts = case span (/= NL) ts of > (ts, []) -> length (ts >>= tokOut) > (_, NL : ts) -> width ts ------------------------------------------------------------------------ The lexer monad ------------------------------------------------------------------------ > type L = StateT (Int, String) Maybe > instance Alternative L where > empty = StateT $ \ is -> empty > p <|> q = StateT $ \ is -> runStateT p is <|> runStateT q is > instance Applicative L where > pure = return > (<*>) = ap > cha :: L Char > cha = StateT moo where > moo (i, []) = Nothing > moo (i, c : s) | isNL c = Just (c, (0, s)) > | c == '\t' > = if mod i 8 == 7 then Just (' ', (i + 1, s)) > else Just (' ', (i + 1, c : s)) > | otherwise = Just (c, (i + 1, s)) > stol :: L () > stol = do > i <- gets fst > guard (i == 0) > chk :: (t -> Bool) -> L t -> L t > chk p l = do t <- l ; if p t then return t else empty > ch :: Char -> L Char > ch c = chk (== c) cha > spa :: (Char -> Bool) -> L String > spa p = (:) <$> chk p cha <*> spa p <|> pure [] > sym :: L String > sym = StateT $ \ (i, s) -> case h s of > ("", _) -> Nothing > ("--", _) -> Nothing > (s, t) -> Just (s, (i + length s, t)) > where > h (s@('-':'}':_)) = ("", s) > h (c : s) | isInfy c = first (c :) (h s) > h s = ([], s)