-- Copyright (C) 2010 John Millikin -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation, either version 3 of the License, or -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program. If not, see . -- {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveDataTypeable #-} module Anansi.Parser ( ParseError (..) , parseFile ) where import Prelude hiding (FilePath) import Control.Applicative ((<|>), (<$>)) import Control.Monad.Trans.Class (lift) import qualified Control.Monad.Trans.State as S import qualified Control.Exception as E import Data.List (unfoldr) import Data.Typeable (Typeable) import qualified Text.Parsec as P import qualified Data.ByteString as B import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Lazy as TL import qualified Data.Map as Map import System.FilePath (replaceFileName) import Anansi.Types import Anansi.Util data ParseError = ParseError { parseErrorPosition :: Position , parseErrorMessage :: TL.Text } deriving (Show) -- too lazy to write proper error handling data ParseExc = ParseExc ParseError deriving (Typeable, Show) instance E.Exception ParseExc data Command = CommandInclude TL.Text | CommandFile TL.Text | CommandDefine TL.Text | CommandColon | CommandEndBlock | CommandComment deriving (Show) data Line = LineCommand Position Command | LineText Position TL.Text deriving (Show) untilChar :: Char -> P.Parsec String u TL.Text untilChar c = TL.pack <$> P.manyTill P.anyChar (P.try (P.char c)) getPosition :: Monad m => P.ParsecT s u m Position getPosition = do pos <- P.getPosition return $ Position (TL.pack (P.sourceName pos)) (toInteger (P.sourceLine pos)) parseLines :: P.Parsec String u [Line] parseLines = do lines' <- P.many parseLine P.eof return lines' parseLine :: P.Parsec String u Line parseLine = command <|> text where command = do P.char ':' pos <- getPosition LineCommand pos <$> parseCommand text = do pos <- getPosition line <- untilChar '\n' return . LineText pos $ TL.append line "\n" parseCommand :: P.Parsec String u Command parseCommand = parsed where string = P.try . P.string parsed = P.choice [file, include, define, colon, comment, endBlock] file = do string "file " <|> string "f " CommandFile <$> untilChar '\n' include = do string "include " <|> string "i " CommandInclude <$> untilChar '\n' define = do string "define " <|> string "d " -- TODO: verify no '|' in name CommandDefine <$> untilChar '\n' colon = do P.char ':' return $ CommandColon comment = do P.char '#' untilChar '\n' return $ CommandComment endBlock = do line <- untilChar '\n' if TL.all isSpace line then return $ CommandEndBlock else do pos <- getPosition let msg = TL.pack $ "unknown command: " ++ show (TL.append ":" line) E.throw $ ParseExc $ ParseError pos msg -- TODO: more unicode support isSpace :: Char -> Bool isSpace ' ' = True isSpace '\t' = True isSpace _ = False parseBlocks :: [Line] -> Maybe (Either ParseError Block, [Line]) parseBlocks [] = Nothing parseBlocks (line:xs) = parsed where parsed = case line of LineText _ text -> Just (Right $ BlockText text, xs) LineCommand pos cmd -> case cmd of CommandFile path -> parseContent pos (BlockFile path) xs CommandDefine name -> parseContent pos (BlockDefine name) xs CommandColon -> Just (Right $ BlockText ":", xs) CommandEndBlock -> Just (Right $ BlockText "\n", xs) CommandComment -> Just (Right $ BlockText "", xs) CommandInclude _ -> let msg = "unexpected CommandInclude (internal error)" in Just (Left $ ParseError pos msg, []) parseContent :: Position -> ([Content] -> Block) -> [Line] -> Maybe (Either ParseError Block, [Line]) parseContent start block = parse [] where parse acc [] = Just (Right $ block acc, []) parse acc (line:xs) = case line of LineText pos text -> case parse' pos text of Left err -> Just (Left err, []) Right parsed -> parse (acc ++ [parsed]) xs LineCommand _ CommandEndBlock -> Just (Right $ block acc, xs) LineCommand _ _ -> let msg = "Unterminated content block" in Just (Left $ ParseError start msg, []) parse' pos text = case P.parse (parser pos) "" (TL.unpack text) of Right content -> Right content Left err -> let msg = TL.pack $ "Invalid content line " ++ show text ++ ": " ++ show err in Left $ ParseError pos msg parser pos = do content <- contentMacro pos <|> contentText pos P.optional $ P.char '\n' P.eof return content contentMacro pos = do (indent, c) <- P.try $ do indent <- P.many $ P.satisfy isSpace P.char '|' c <- P.satisfy (not . isSpace) return (indent, c) name <- untilChar '|' return $ ContentMacro pos (TL.pack indent) (TL.strip (TL.cons c name)) contentText pos = do text <- untilChar '\n' return . ContentText pos $ text type FilePath = TL.Text type FileMap = Map.Map FilePath [Line] genLines :: Monad m => (FilePath -> m [Line]) -> FilePath -> S.StateT FileMap m [Line] genLines getLines = genLines' where genLines' path = lift (getLines path) >>= concatMapM (resolveIncludes path) relative x y = TL.pack $ replaceFileName (TL.unpack x) (TL.unpack y) resolveIncludes root line = case line of LineCommand _ (CommandInclude path) -> genLines' $ relative root path _ -> return [line] parseFile :: TL.Text -> IO (Either ParseError [Block]) parseFile root = io where io = E.handle onError $ do lines' <- S.evalStateT (genLines getLines root) Map.empty return . catEithers $ unfoldr parseBlocks lines' onError (ParseExc err) = return $ Left err getLines :: FilePath -> IO [Line] getLines path = do -- TODO: encode 'path' into UTF-8? let path' = TL.unpack path bytes <- B.readFile path' case P.parse parseLines path' (T.unpack $ TE.decodeUtf8 bytes) of Right x -> return x Left err -> let msg = TL.pack $ "getLines parse failed (internal error): " ++ show err in E.throw $ ParseExc $ ParseError (Position path 0) msg