>
> module Language.Haskell.Her.HaLay where
> import Control.Arrow (first)
> import Control.Applicative
> import Data.Char
> import Data.List
> import Data.Traversable
> import Control.Monad
> import Control.Monad.State
> import Debug.Trace
> import Language.Haskell.Her.Parsley
------------------------------------------------------------------------------
Glom a file like this
------------------------------------------------------------------------------
> ready :: String -> String -> [[Tok]]
> ready f = map (munge exTyMu) . fst . getLines (Seek NoLay "") [] .
> tokenize . (,) ((f, 0), 0)
------------------------------------------------------------------------------
Stage 1 : lexing
------------------------------------------------------------------------------
> type Position = ((String, Int), Int)
> tokenize :: (Position, String) -> [(Int, Tok)]
> tokenize = unfoldr (runStateT ((,) <$> lCol <*> tokIn))
> data Tok
> = Lit String
> | Ope BK
> | Clo BK
> | Uid String
> | Lid String
> | KW String
> | Sym String
> | Semi
> | Spc String
> | Com String
> | Urk Char
> | NL (String, Int)
> | B BK [Tok]
> | L String [[Tok]]
> | T Tag [Tok]
> deriving (Show, Eq)
> tokOut :: Tok -> String
> tokOut t = case t of
> Lit s -> s
> Ope b -> ope b
> Clo b -> clo b
> Uid s -> s
> Lid s -> s
> KW s -> s
> Sym s -> s
> Semi -> ";"
> Spc s -> s
> Com s -> s
> Urk c -> [c]
> NL (f, l) | isSuffixOf ".hers" f -> "\n"
> | otherwise -> "{-# LINE " ++ show l ++ " " ++ show f ++ " #-}\n"
> B b ts -> ope b ++ toksOut ts ++ clo b
> L s tss -> s ++ tokssOut tss
> T _ ts -> toksOut ts
> toksOut :: [Tok] -> String
> toksOut ts = ts >>= tokOut
> tokssOut :: [[Tok]] -> String
> tokssOut tss = tss >>= toksOut
> isSpcT :: Tok -> Bool
> isSpcT (Spc _) = True
> isSpcT (NL _) = True
> isSpcT (Com _) = True
> isSpcT _ = False
> tokIn :: L Tok
> tokIn = Lit <$> ((:) <$> ch '\"' <*> slit)
> <|> Lit <$> ((:) <$> ch '\'' <*> haha)
> <|> NL <$ traverse ch "{-# LINE " <*> whur <* traverse ch "\" #-}" <*
> some (chk isNL cha)
> <|> Com <$ stol <*> ((++) <$> traverse ch "#" <*> spa (not . isNL))
> <|> Sym <$> sym
> <|> Com <$> ((++) <$> traverse ch "--" <*> spa (not . isNL))
> <|> Com <$> ((++) <$> traverse ch "{-" <*> comment 1)
> <|> Ope Rnd <$ ch '('
> <|> Clo Rnd <$ ch ')'
> <|> Ope Sqr <$ ch '['
> <|> Clo Sqr <$ ch ']'
> <|> Ope Crl <$ ch '{'
> <|> Clo Crl <$ ch '}'
> <|> Semi <$ ch ';'
> <|> Lit <$> ((:) <$> chk isDigit cha <*> spa isDigit)
> <|> Uid <$> ((:) <$> chk isUpper cha <*> spa isIddy)
> <|> klid <$> ((:) <$> chk (\ b -> isLower b || b == '_') cha <*> spa isIddy)
> <|> Spc <$> ((:) <$> chk isHSpace cha <*> spa isHSpace)
> <|> NL <$ some (chk isNL cha) <*> lFLine
> <|> Urk <$> cha
> where
> slit = (:) <$> ch '\\' <*> ((:) <$> cha <*> slit)
> <|> return <$> ch '\"'
> <|> (:) <$> cha <*> slit
> haha = (:) <$> ch '\\' <*> ((:) <$> cha <*> haha)
> <|> return <$> ch '\''
> <|> (:) <$> cha <*> haha
> klid s = if elem s keywords then KW s else Lid s
> comment 0 = pure ""
> comment i = (++) <$> traverse ch "{-" <*> comment (i + 1)
> <|> (++) <$> traverse ch "-}" <*> comment (i 1)
> <|> (++) <$> ((:) <$> ch '\"' <*> slit) <*> comment i
> <|> (:) <$> cha <*> comment i
> whur = flip (,) <$> (read <$> some (chk isDigit cha))
> <* traverse ch " \""
> <*> spa (not . (=='\"'))
------------------------------------------------------------------------
Stage 2 : Group according to brackets and layout rules
------------------------------------------------------------------------
> data ChunkMode
> = Lay String Int
> | Bra BK
> | NoLay
> deriving (Show, Eq)
> getChunks :: ChunkMode -> [Tok] -> [(Int, Tok)] -> ([Tok], [(Int, Tok)])
> getChunks m acc its =
> let (iss, ius) = span gappy its
> gappy (_, s) = isSpcT s
> acss = case iss of
> [] -> acc
> _ -> reverse (map snd iss) ++ acc
> in case ius of
> [] -> (reverse acc, its)
> ((i, t) : its') -> case (m, t) of
> (Lay _ j, _) | not (null acc) && i <= j -> (reverse acc, its)
> (Lay _ _, Semi) -> (reverse acc, its)
> (Lay k _, KW e) | elem (k, e) layDKillaz -> (reverse acc, its)
> (Lay _ _, Clo _) -> (reverse acc, its)
> (Bra b, Clo b') | b == b' -> (reverse acss, its')
> (m, Ope b) -> case getChunks (Bra b) [] its' of
> (cs, its) -> getChunks m (B b cs : acss) its
> (m, KW e) | elem e lakeys -> case getLines (Seek m e) [] its' of
> (css, its) -> getChunks m ((L e css) : acss) its
> _ -> getChunks m (t : acss) its'
getChunks :: ChunkMode -> [Tok] -> [(Int, Tok)] -> ([Tok], [(Int, Tok)])
getChunks _ acc [] = (reverse acc, [])
getChunks m acc its@((i, t) : its') = case (m, t) of
_ | isSpcT t -> getChunks m (t : acc) its'
(Lay _ j, _) | not (null acc) && i <= j -> (reverse acc, its)
(Lay _ _, Semi) -> (reverse (t : acc), its')
(Lay k _, KW e) | elem (k, e) layDKillaz -> (reverse acc, its)
(Lay _ _, Clo _) -> (reverse acc, its)
(Bra b, Clo b') | b == b' -> (reverse acc, its')
(m, Ope b) -> case getChunks (Bra b) [] its' of
(cs, its) -> getChunks m (B b cs : acc) its
(m, KW e) | elem e lakeys -> case getLines (Seek m e) [] its' of
(css, its) -> getChunks m ((L e css) : acc) its
_ -> getChunks m (t : acc) its'
> data LineMode
> = Bracing
> | Seek ChunkMode String
> | Edge String Int
> deriving (Show, Eq)
> getLines :: LineMode -> [[Tok]] -> [(Int, Tok)] -> ([[Tok]], [(Int, Tok)])
> getLines m acc its =
> let (iss, ius) = span gappy its
> gappy (_, Semi) = True
> gappy (_, s) = isSpcT s
> acss = case iss of
> [] -> acc
> _ -> reverse (splendid (map snd iss)) ++ acc
> in case ius of
> [] -> (reverse acc, its)
> ((i, t) : its') -> case (m, t) of
> (Bracing, Clo Crl) -> (reverse ([Clo Crl] : acss), its')
> (_, Clo _) -> (reverse acc, its)
> (Edge k j, _)
> | i >= j -> case getChunks (Lay k i) [] ius of
> ([], _) -> (reverse acc, its)
> (cs, ius) -> getLines (Edge k i) (reverse (splendid cs) ++ acss) ius
> | otherwise -> (reverse acc, its)
> (Seek m s, Ope Crl) | properBrace its'
> -> getLines Bracing ([Ope Crl] : acss) its'
> (Seek (Lay k j) s, _)
> | j < i -> getLines (Edge s i) acss ius
> | otherwise -> (reverse acc, its)
> (Seek (Bra b) s,_) -> getLines (Edge s i) acss ius
> (Seek NoLay s, _) -> getLines (Edge s i) acss ius
> (Bracing, _) -> case getChunks NoLay [] its of
> ([], _) -> (reverse acc, its)
> (cs, ius) -> getLines Bracing (cs : acss) ius
> properBrace :: [(Int, Tok)] -> Bool
> properBrace [] = True
> properBrace ((_, Clo Crl) : _) = False
> properBrace ((_, Semi) : _) = True
> properBrace ((_, Sym s) : _) | elem s ["->", "<-", "="] = True
> properBrace (_ : its) = properBrace its
getLines :: LineMode -> [[Tok]] -> [(Int, Tok)] -> ([[Tok]], [(Int, Tok)])
getLines _ acc [] = (reverse acc, [])
getLines m acc ((_, s) : its) | isSpcT s = eat [s] its where
eat sacc ((_, s) : its) | isSpcT s = eat (s : sacc) its
eat sacc its = getLines m (reverse (splendid (reverse sacc)) ++ acc) its
getLines m acc its@((i, t) : its') = case (m, t) of
(Bracing, Clo Crl) -> (reverse ([Clo Crl] : acc), its')
(_, Clo _) -> (reverse acc, its)
(Edge k j, _)
| i == j -> case getChunks (Lay k i) [] its of
([], its) -> (reverse acc, its)
(cs, its) -> getLines (Edge k i) (reverse (splendid cs) ++ acc) its
| otherwise -> (reverse acc, its)
(Seek m s, Ope Crl) -> getLines Bracing ([Ope Crl] : acc) its'
(Seek (Lay k j) s, _)
| j < i -> getLines (Edge s i) acc its
| otherwise -> (reverse acc, its)
(Seek (Bra b) s,_) -> getLines (Edge s i) acc its
(Seek NoLay s, _) -> getLines (Edge s i) acc its
(Bracing, _) -> case getChunks NoLay [] its of
([], its) -> (reverse acc, its)
(cs, its) -> getLines Bracing (cs : acc) its
> layDKillaz :: [(String, String)]
> layDKillaz = [("of", "where"), ("do", "where"), ("let", "in")]
> splendid :: [Tok] -> [[Tok]]
> splendid [] = [[]]
> splendid (NL fl : ts) = case splendid ts of
> (ss : sss) | all isSpcT ss -> [] : (NL fl : ss) : sss
> | otherwise -> (NL fl : ss) : sss
> splendid (t : ts) = case splendid ts of
> (us : sss) -> (t : us) : sss
------------------------------------------------------------------------------
Stage 3 : tag regions of interest
------------------------------------------------------------------------------
> data Tag = Ty | Ki | Ex deriving (Show, Eq)
> tender :: Tok -> Bool
> tender (L _ _ ) = True
> tender t = elem t [Semi, Sym "=", Sym ",", Sym "|", KW "in", KW "deriving"]
> tender' :: Tok -> Bool
> tender' (L _ _ ) = True
> tender' _ = False
> exTyMu :: [Tok] -> Maybe [Tok]
> exTyMu (t : ts)
> | elem t [KW "class", KW "instance"]
> = Just $ t : u' : munge exTyMu vs'
> | elem t [Sym "::"]
> = Just $ t : u : munge exTyMu vs
> | elem t [KW "data", KW "newtype"]
> = Just $ case vs of
> Sym "=" : vs -> t : u : Sym "=" : oldStyle vs
> _ -> t : u : munge exTyMu vs
> | elem t [KW "type"]
> = Just $ case vs of
> Sym "=" : vs -> t : u : Sym "=" : [T Ty (munge tyMu vs)]
> _ -> t : u : munge exTyMu vs
> where
> (u, vs) = first (T Ty . munge tyMu) (span (not . tender) ts)
> (u', vs') = first (T Ty . munge tyMu) (span (not . tender') ts)
> exTyMu _ = Nothing
> oldStyle :: [Tok] -> [Tok]
> oldStyle ts = case parse (pSep (teq (Sym "|")) (many (tok (/= Sym "|")))) ts of
> Just tss -> intercalate [Sym "|"] (map go tss)
> where
> noInfT (Sym (':':_)) = False
> noInfT _ = True
> go ts = case span noInfT ts of
> (as, (t : bs)) -> [T Ty (munge tyMu as), t, T Ty (munge tyMu bs)]
> _ -> case span isSpcT ts of
> (ss, t : ts) -> ss ++ t : map ho ts
> _ -> ts
> ho s | isSpcT s = s
> ho (Sym s) = Sym s
> ho (B Crl ts) = B Crl (munge exTyMu ts)
> ho t = T Ty (munge tyMu [t])
> tyMu :: [Tok] -> Maybe [Tok]
> tyMu (Sym "::" : ts) = Just $ Sym "::" : u : munge tyMu vs
> where
> (u, vs) = first (T Ki . munge kiMu) (span (not . tender) ts)
> tyMu (B Crl us : ts) = Just $
> B Crl [T Ex (munge exTyMu us)] : munge tyMu ts
> tyMu _ = Nothing
> kiMu :: [Tok] -> Maybe [Tok]
> kiMu (B Crl us : ts) = Just $
> B Crl [T Ty (munge tyMu us)] : munge kiMu ts
> kiMu (KW "forall" : ts) = case span (/= Sym ".") ts of
> (ts, us) -> Just $ KW "forall" : T Ty (munge tyMu ts) : munge kiMu us
> kiMu _ = Nothing
------------------------------------------------------------------------------
Parsley for layout
------------------------------------------------------------------------------
> spc :: P Tok ()
> spc = () <$ many (tok isSpcT)
> pNL :: P Tok ()
> pNL = grok h next where
> h (NL _) = Just ()
> h _ = Nothing
> uid :: P Tok String
> uid = grok h next where
> h (Uid s) = Just s
> h _ = Nothing
> lid :: P Tok String
> lid = grok h next where
> h (Lid s) = Just s
> h _ = Nothing
> infC :: P Tok String
> infC = grok h next where
> h (Sym (':' : s)) = Just (':' : s)
> h _ = Nothing
> pBr :: BK -> P Tok x -> P Tok x
> pBr k p = grok pb next where
> pb (B j cs) | k == j = parse p cs
> pb _ = Nothing
> pLay :: String -> P [Tok] x -> P Tok x
> pLay k p = grok pb next where
> pb (L j tss) | k == j = parse p tss
> pb _ = Nothing
> pTag :: Tag -> P Tok x -> P Tok x
> pTag t p = grok pb next where
> pb (T u ts) | t == u = parse p ts
> pb _ = Nothing
------------------------------------------------------------------------------
Mungers
------------------------------------------------------------------------------
> munge :: ([Tok] -> Maybe [Tok]) -> [Tok] -> [Tok]
> munge m ts = case m ts of
> Just us -> us
> Nothing -> case ts of
> [] -> []
> (B b ss : ts) -> B b (munge m ss) : munge m ts
> (L k sss : ts) -> L k (map (munge m) sss) : munge m ts
> (T t ss : ts) -> T t (munge m ss) : munge m ts
> (t : ts) -> t : munge m ts
> mungeLines :: ([[Tok]] -> Maybe [[Tok]]) -> ([Tok] -> Maybe [Tok]) ->
> [[Tok]] -> [[Tok]]
> mungeLines ms m tss = case ms tss of
> Just uss -> uss
> Nothing -> case tss of
> [] -> []
> (ts : tss) -> munge help ts : mungeLines ms m tss
> where
> help ts = m ts <|>
> case ts of
> (L k sss : ts) -> Just (L k (mungeLines ms m sss) : munge help ts)
> _ -> Nothing
> dashOut :: [Tok] -> [Tok]
> dashOut ts = [Com ("-- " ++ easy ts)] where
> easy [] = []
> easy (B _ _ : _) = []
> easy (L _ _ : _) = []
> easy (t : ts) = tokOut t ++ easy ts
> dental :: [[Tok]] -> [Tok]
> dental [] = [NL ("Dunno.lhs", 0)]
> dental (l@(NL _ : _) : (c : _) : _) | not (isSpcT c) = l
> dental (l : ls) = dental ls
> redent :: [Tok] -> [[Tok]] -> [[Tok]]
> redent (NL _ : r) ((NL p : _) : tss) = redent (NL p : r) tss
> redent nl ((NL _ : _) : tss) = redent nl tss
> redent nl (ts : tss) = nl : ts : redent nl tss
> redent nl [] = []
> preamble :: [[Tok]] -> [[Tok]] -> [[Tok]]
> preamble ls [] = ls
> preamble ls ms@(l@(NL _ : _) : (c : _) : _) | not (isSpcT c) =
> redent l ls ++ ms
> preamble ls (m : ms) = m : preamble ls ms
------------------------------------------------------------------------------
Classifiers, odds and ends
------------------------------------------------------------------------------
> isNL :: Char -> Bool
> isNL b = elem b "\r\n"
> isHSpace :: Char -> Bool
> isHSpace c = isSpace c && not (isNL c)
> isIddy :: Char -> Bool
> isIddy b = isAlphaNum b || elem b "_'"
> isInfy :: Char -> Bool
> isInfy b = elem b "!#$%&*+-,.:/<=>?@\\^|~"
> data BK = Rnd | Sqr | Crl deriving (Show, Eq)
> ope :: BK -> String
> ope Rnd = "("
> ope Sqr = "["
> ope Crl = "{"
> clo :: BK -> String
> clo Rnd = ")"
> clo Sqr = "]"
> clo Crl = "}"
> keywords :: [String]
> keywords = ["module", "import", "type", "data", "newtype", "pattern", "kind",
> "let", "in", "case", "of", "do", "forall", "class", "instance",
> "family", "where", "if", "then", "else", "deriving", "hiding"]
> lakeys :: [String]
> lakeys = ["let", "of", "do", "where"]
> width :: [Tok] -> Int
> width ts = case span noNL ts of
> (ts, []) -> length (ts >>= tokOut)
> (_, NL _ : ts) -> width ts
> where noNL (NL _) = False
> noNL _ = True
------------------------------------------------------------------------
The lexer monad
------------------------------------------------------------------------
> type L = StateT (Position, String) Maybe
> lFLine :: L (String, Int)
> lFLine = gets $ \ ((fl, _), _) -> fl
> lCol :: L Int
> lCol = gets $ \ ((_, c), _) -> c
< instance Alternative L where
< empty = StateT $ \ is -> empty
< p <|> q = StateT $ \ is -> runStateT p is <|> runStateT q is
< instance Applicative L where
< pure = return
< (<*>) = ap
> cha :: L Char
> cha = StateT moo where
> moo (i, []) = Nothing
> moo ((fl@(f,l),i), c : s)
> | isNL c = Just (c, (((f, l + 1), 0), s))
> | c == '\t' = if mod i 8 == 7 then Just (' ', ((fl, i + 1), s))
> else Just (' ', ((fl, i + 1), c : s))
> | otherwise = Just (c, ((fl, i + 1), s))
> stol :: L ()
> stol = do
> i <- lCol
> guard (i == 0)
> chk :: (t -> Bool) -> L t -> L t
> chk p l = do t <- l ; if p t then return t else empty
> ch :: Char -> L Char
> ch c = chk (== c) cha
> spa :: (Char -> Bool) -> L String
> spa p = (:) <$> chk p cha <*> spa p <|> pure []
> sym :: L String
> sym = StateT $ \ ((fl, i), s) -> case h s of
> ("", _) -> Nothing
> ("--", _) -> Nothing
> (s, t) -> Just (s, ((fl, i + length s), t))
> where
> h (s@('-':'}':_)) = ("", s)
> h (c : s) | isInfy c = first (c :) (h s)
> h s = ([], s)