Data/Avc/Parser.hs

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleContexts #-}

-- TODO: Hackage, github, haddock, tests
-- TODO: isGzipped to a personal utility package; readAndDecompress
--       like readFile but if Gzipped will also decompress
-- TODO: Only works with parseOnly beause I decided to do special handling of
-- putting this into statements so trailing space at the end of the file will
-- make it return "Partial _"
-- TODO: keep track of line numbers in case of failure

module Data.Avc.Parser (parseAvcFile) where

import Codec.Compression.GZip
import qualified Data.ByteString.Lazy.Char8 as BL

import Control.DeepSeq
import Control.DeepSeq.Generics (genericRnf)
import GHC.Generics hiding (Rep)

import Data.ByteString.Char8 as BS hiding (split)
import Control.Monad
import Control.Applicative hiding (many)

import Data.Attoparsec.ByteString.Char8 as A
import qualified System.IO as IO

import qualified Test.HUnit as H

import qualified Data.List as DL hiding (words, unwords, takeWhile, elem, lines)
import Data.Function (on)

import Data.List.Split (keepDelimsR, whenElt, split)

import Prelude hiding (lines, elem, null, unwords, map, null)
import qualified Prelude as P

import Data.Char as DC (isLetter, isDigit)

import Data.Binary.Get (runGet, getWord16le)

import Data.Avc.Type

-- | Parse a simple AVC which may be gzipped
-- Only handles simple files with one character per state like:
--
-- # Begin AVC
-- FORMAT a b c;
-- R10 10X ; vector comment # other comment
-- R10 01H ;
parseAvcFile :: FilePath -> IO [Statement]
parseAvcFile filename = do
    content <- decompressFile filename
    let strict = BL.toStrict content
    let ls = BS.lines strict
    let ncmnt = nocomment ls
    let stmts = joinStatements ncmnt
    return $ getStatements stmts

-- TODO: put this in my utility module
decompressFile :: FilePath -> IO BL.ByteString
decompressFile filename = do
    bs <- BL.readFile filename
    isgz <- isGzipped bs
    let decompressed = decompress bs
    return $ if isgz then decompressed else bs

isGzipped :: BL.ByteString -> IO Bool
isGzipped bs = do
    let magic = runGet getWord16le bs
    return $ magic == 0x8b1f
-- /utility

-- | Strip # comments from a list of lines
nocomment :: [ByteString] -> [ByteString]
nocomment = P.map dropComment
    where dropComment bs = let idx = elemIndex '#' bs in
                           case idx of
                            Nothing -> bs
                            Just i -> BS.take i bs

-- | Join statements terminated by ';' into one line each also keeping
-- info after ';' which is a comment on the statement.
joinStatements :: [ByteString] -> [ByteString]
joinStatements bss = P.map unwords (grouped bss)
    where grouped = split (keepDelimsR $ whenElt hasSemi)
          hasSemi hs = ';' `elem` hs

-- Assume first statement is Format
getFormat :: ByteString -> Statement
getFormat bs =
    case parseOnly (skipSpace *> parseFormat) bs of
                    Left e -> error ("Error parsing FORMAT: " ++ show e ++ "\nInput:" ++ show bs)
                    Right r -> r

getStatements :: [ByteString] -> [Statement]
getStatements = P.map getStatement

-- getStatement "R1 cyc 123" -- TODO: doesn't return. some kind of inifinite loop without the ';'
getStatement bs =
    case parseOnly (skipSpace *> parseStatement) bs of
                    Left e -> error ("Error parsing FORMAT: " ++ show e ++ "\nInput:" ++ show bs)
                    Right r -> r

parseStatement :: Parser Statement
parseStatement =
    choice [parseRepeat
           ,parseFormat
           ,parseEof]

parseEof :: Parser Statement
parseEof = do
    endOfInput
    return EOF

-- Parser A is like parser b but it skips trailing spaces
-- TODO: returns "Partial _" if space until EOF
lexeme :: Parser a -> Parser a
lexeme pa = pa <* skipSpace

parseRepeat :: Parser Statement
parseRepeat = Repeat <$> (keyword "R" *> rep) <*> devcyc <*> parseVec <*> parseComment
    where
        rep = lexeme decimal
        devcyc = lexeme $ takeWhile1 isVecChar

isVecChar :: Char -> Bool
isVecChar c = A.isAlpha_ascii c || A.isDigit c
{-# INLINE isVecChar #-}

parseVec :: Parser ByteString
parseVec = do
    vec <- manyTill (skipSpace *> A.takeWhile isVecChar) (skipSpace *> semicolon)
    return $ BS.concat vec


parseFail :: String -> ByteString -> [String] -> String -> Statement
parseFail usrmsg trying contexts errmsg = error message
    where message = "Fail parsing: " ++ unpack trying
                 ++ "\nError message: " ++ errmsg
                 ++ "\nContexts: " ++ DL.intercalate "\n+" contexts

parseLeftover :: String -> ByteString -> Statement
parseLeftover trying leftover = error $ "Leftover input trying: " ++ trying ++ " Leftover: '" ++ unpack leftover ++ "'"


parseFormat' :: Parser Statement
parseFormat' = Format <$> (keyword "FORMAT" *> sigs) <*> parseComment <?> "FORMAT"
    where 
        sigs = manyTill parseSig semicolon

parseFormat :: Parser Statement
parseFormat = do
    linespaces
    parseFormat'

parseSig :: Parser Signal
parseSig = Sig <$> lexeme (takeWhile1 isSigChar)

letter = letter_ascii

parseComment :: Parser Comment
parseComment = do
    cmnt <- A.takeWhile (const True) <* endOfInput
    return $ Comment cmnt

linespaces :: Parser ()
linespaces = void $ A.takeWhile (`elem` "\t ")

isSigChar :: Char -> Bool
isSigChar c = isLetter c || DC.isDigit c || c `elem` "[]:_"

semicolon :: Parser ()
semicolon = void $ lexeme $ char ';'

keyword :: ByteString -> Parser ()
keyword kw = void $ lexeme $ string kw