-- | -- Module : Text.CueSheet.Parser -- Copyright : © 2016–2017 Mark Karpov -- License : BSD 3 clause -- -- Maintainer : Mark Karpov -- Stability : experimental -- Portability : portable -- -- The modules contains a CUE sheet parser. You probably want to import -- "Text.CueSheet" instead. {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} module Text.CueSheet.Parser ( parseCueSheet , CueParserFailure (..) , Eec (..) ) where import Control.Applicative import Control.Monad.State.Strict import Data.ByteString (ByteString) import Data.Data (Data) import Data.List.NonEmpty (NonEmpty (..)) import Data.Maybe (isJust, fromMaybe) import Data.Set (Set) import Data.Text (Text) import Data.Typeable (Typeable) import Data.Word (Word8) import GHC.Generics import Numeric.Natural import Text.CueSheet.Types import Text.Megaparsec import Text.Megaparsec.Byte import qualified Data.List.NonEmpty as NE import qualified Data.Set as E import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Text.Megaparsec.Byte.Lexer as L ---------------------------------------------------------------------------- -- Types -- | Extended error component with support for storing number of track -- declaration in which a parsing error has occurred. data Eec = Eec (Maybe Natural) CueParserFailure deriving (Show, Eq, Ord, Data, Typeable, Generic) instance ShowErrorComponent Eec where showErrorComponent (Eec mtrack failure') = showErrorComponent failure' ++ "\n" ++ maybe "" (\n -> "in declaration of the track " ++ show n) mtrack -- | The enumeration of all failures that may happen during running of -- 'parseCueSheet'. data CueParserFailure = CueParserTrivialError (Maybe (ErrorItem Word8)) (Set (ErrorItem Word8)) -- ^ A wrapper for a trivial error | CueParserInvalidCatalog Text -- ^ We ran into an invalid media catalog number | CueParserInvalidCueText Text -- ^ We ran into an invalid text literal | CueParserTrackOutOfOrder -- ^ We spotted a track out of order | CueParserInvalidTrackIsrc Text -- ^ We ran into an invalid ISRC | CueParserInvalidSeconds Natural -- ^ We ran into an invalid number of seconds | CueParserInvalidFrames Natural -- ^ We ran into an invalid number of frames | CueParserTrackIndexOutOfOrder -- ^ We spotted a track index out of order deriving (Show, Eq, Ord, Data, Typeable, Generic) instance ShowErrorComponent CueParserFailure where showErrorComponent = \case CueParserTrivialError us es -> init $ parseErrorTextPretty (TrivialError undefined us es :: ParseError Word8 Eec) CueParserInvalidCatalog txt -> "the value \"" ++ T.unpack txt ++ "\" is not a valid Media Catalog Number" CueParserInvalidCueText txt -> "the value \"" ++ T.unpack txt ++ "\" is not a valid CUE text literal" CueParserTrackOutOfOrder -> "this track appears out of order" CueParserInvalidTrackIsrc txt -> "\"" ++ T.unpack txt ++ "\" is not a valid ISRC" CueParserInvalidSeconds n -> "\"" ++ show n ++ "\" is not a valid number of seconds" CueParserInvalidFrames n -> "\"" ++ show n ++ "\" is not a valid number of frames" CueParserTrackIndexOutOfOrder -> "this index appears out of order" -- | Type of parser we use here, it's not public. type Parser a = StateT Context (Parsec Eec ByteString) a -- | Context of parsing. This is passed around in 'StateT'. We need all of -- this to signal parse errors on duplicate declarations of things that -- should only be declared once according to description of the format, to -- validate track numbers, etc. data Context = Context { contextCueSheet :: !CueSheet -- ^ Current state of CUE sheet we parse. When a part\/parameter of CUE -- sheet is parsed, this thing is updated. , contextFiles :: ![CueFile] -- ^ Temporary storage for parsed files (we can't store it in the -- 'CueSheet' because it does not allow empty list of files). , contextTracks :: ![CueTrack] -- ^ Similar to 'contextFiles', collection of tracks but for current -- file. , contextTrackCount :: !Natural -- ^ Number of tracks we have parsed so far, to avoid traversing lists -- again and again. , contextIndices :: ![CueTime] -- ^ Temporary storage for collection indices for current track. , contextIndexCount :: !Natural -- ^ Similarly for indices. } -- | Parse a CUE sheet from a lazy 'BL.ByteString'. parseCueSheet :: String -- ^ File name to include in error messages -> ByteString -- ^ CUE sheet to parse as a lazy 'BL.ByteString' -> Either (ParseError Word8 Eec) CueSheet -- ^ 'ParseError' or result parseCueSheet = parse (contextCueSheet <$> execStateT pCueSheet initContext) where initContext = Context { contextCueSheet = CueSheet { cueCatalog = Nothing , cueCdTextFile = Nothing , cuePerformer = Nothing , cueTitle = Nothing , cueSongwriter = Nothing , cueFirstTrackNumber = 0 , cueFiles = dummyFile :| [] } , contextFiles = [] , contextTracks = [] , contextTrackCount = 0 , contextIndices = [] , contextIndexCount = 0 } -- | Parse a 'CueSheet'. The result is not returned, but written in the -- 'Context'. pCueSheet :: Parser () pCueSheet = do scn void (many pHeaderItem) void (some pFile) -- NOTE Lens would help here, but let's keep this vanilla. modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cueFiles = (NE.fromList . reverse . contextFiles) x } } eof pHeaderItem :: Parser () pHeaderItem = choice [ pCatalog , pCdTextFile , pPerformer , pTitle , pSongwriter , pRem ] pCatalog :: Parser () pCatalog = do already <- gets (isJust . cueCatalog . contextCueSheet) let f x' = let x = T.decodeUtf8 x' in case mkMcn x of Nothing -> Left (CueParserInvalidCatalog x) Just mcn -> Right mcn mcn <- labelledLit already f "CATALOG" modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cueCatalog = Just mcn } } pCdTextFile :: Parser () pCdTextFile = do already <- gets (isJust . cueCdTextFile . contextCueSheet) cdTextFile <- T.decodeUtf8 <$> labelledLit already Right "CDTEXTFILE" modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cueCdTextFile = Just (T.unpack cdTextFile) } } pPerformer :: Parser () pPerformer = do already <- gets (isJust . cuePerformer . contextCueSheet) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt performer <- labelledLit already f "PERFORMER" modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cuePerformer = Just performer } } pTitle :: Parser () pTitle = do already <- gets (isJust . cueTitle . contextCueSheet) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt title <- labelledLit already f "TITLE" modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cueTitle = Just title } } pSongwriter :: Parser () pSongwriter = do already <- gets (isJust . cueSongwriter . contextCueSheet) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt songwriter <- labelledLit already f "SONGWRITER" modify $ \x -> x { contextCueSheet = (contextCueSheet x) { cueSongwriter = Just songwriter } } pRem :: Parser () pRem = do void (symbol "REM") takeWhileP (Just "character") (/= 10) *> char 10 *> scn pFile :: Parser () pFile = do void (symbol "FILE") filename <- T.decodeUtf8 <$> lexeme stringLit let pFiletype = choice [ Binary <$ symbol "BINARY" , Motorola <$ symbol "MOTOROLA" , Aiff <$ symbol "AIFF" , Wave <$ symbol "WAVE" , MP3 <$ symbol "MP3" ] filetype <- pFiletype <* eol <* scn void (some (pTrack <|> pRem)) tracks <- gets contextTracks let newFile = CueFile { cueFileName = T.unpack filename , cueFileType = filetype , cueFileTracks = NE.fromList (reverse tracks) } modify $ \x -> x { contextFiles = newFile : contextFiles x , contextTracks = [] } pTrack :: Parser () pTrack = do void (symbol "TRACK") trackOffset <- gets (cueFirstTrackNumber . contextCueSheet) trackCount <- gets contextTrackCount let firstTrack = trackCount == 0 f x = if firstTrack || x == trackOffset + trackCount then Right x else Left CueParserTrackOutOfOrder n <- withCheck f (lexeme L.decimal) let pTrackType = choice [ CueTrackAudio <$ symbol "AUDIO" , CueTrackCdg <$ symbol "CDG" , CueTrackMode1_2048 <$ symbol "MODE1/2048" , CueTrackMode1_2352 <$ symbol "MODE1/2352" , CueTrackMode2_2336 <$ symbol "MODE2/2336" , CueTrackMode2_2352 <$ symbol "MODE2/2352" , CueTrackCdi2336 <$ symbol "CDI/2336" , CueTrackCdi2352 <$ symbol "CDI/2352" ] trackType <- pTrackType <* eol <* scn let newTrack = dummyTrack { cueTrackType = trackType } modify $ \x -> x { contextTracks = newTrack : contextTracks x , contextTrackCount = trackCount + 1 , contextCueSheet = let old = contextCueSheet x in if firstTrack then old { cueFirstTrackNumber = n } else old } inTrack n $ do void (many pTrackHeaderItem) index0 <- (optional . try . pIndex) 0 modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackPregapIndex = index0 } } modify $ \x -> x { contextIndices = [] , contextIndexCount = 0 } let grabIndex = do next <- (+ 1) <$> gets contextIndexCount nextIndex <- pIndex next modify $ \x -> x { contextIndices = nextIndex : contextIndices x , contextIndexCount = next } void (some (grabIndex <|> pRem)) modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackIndices = (NE.fromList . reverse . contextIndices) x } } void (optional pPostgap) pTrackHeaderItem :: Parser () pTrackHeaderItem = choice [ pFlags , pIsrc , pTrackPerformer , pTrackTitle , pTrackSongwriter , pRem , pPregap ] pFlags :: Parser () pFlags = do already <- gets (seenFlags . head . contextTracks) failAtIf already "FLAGS" void (some pFlag) <* eol <* scn -- | A helper data type for track flags. data CueTrackFlag = DCP | FourCH | PRE | SCMS pFlag :: Parser () pFlag = do flag <- choice [ DCP <$ symbol "DCP" , FourCH <$ symbol "4CH" , PRE <$ symbol "PRE" , SCMS <$ symbol "SCMS" ] modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> case flag of DCP -> t { cueTrackDigitalCopyPermitted = True } FourCH -> t { cueTrackFourChannelAudio = True } PRE -> t { cueTrackPreemphasisEnabled = True } SCMS -> t { cueTrackSerialCopyManagement = True } } pIsrc :: Parser () pIsrc = do already <- gets (isJust . cueTrackIsrc . head . contextTracks) let f x' = let x = T.decodeUtf8 x' in case mkIsrc x of Nothing -> Left (CueParserInvalidTrackIsrc x) Just isrc -> Right isrc isrc <- labelledLit already f "ISRC" modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackIsrc = Just isrc } } pTrackPerformer :: Parser () pTrackPerformer = do already <- gets (isJust . cueTrackPerformer . head . contextTracks) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt performer <- labelledLit already f "PERFORMER" modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackPerformer = Just performer } } pTrackTitle :: Parser () pTrackTitle = do already <- gets (isJust . cueTrackTitle . head . contextTracks) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt title <- labelledLit already f "TITLE" modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackTitle = Just title } } pTrackSongwriter :: Parser () pTrackSongwriter = do already <- gets (isJust . cueTrackSongwriter . head . contextTracks) let f x' = let x = T.decodeUtf8 x' in case mkCueText x of Nothing -> Left (CueParserInvalidCueText x) Just txt -> Right txt songwriter <- labelledLit already f "SONGWRITER" modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackSongwriter = Just songwriter } } pPregap :: Parser () pPregap = do already <- gets (isJust . cueTrackPregap . head . contextTracks) failAtIf already "PREGAP" time <- lexeme cueTime <* eol <* scn modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackPregap = Just time } } pPostgap :: Parser () pPostgap = do already <- gets (isJust . cueTrackPostgap . head . contextTracks) failAtIf already "POSTGAP" time <- lexeme cueTime <* eol <* scn modify $ \x -> x { contextTracks = changingFirstOf (contextTracks x) $ \t -> t { cueTrackPostgap = Just time } } pIndex :: Natural -> Parser CueTime pIndex n = do void (symbol "INDEX") let f x = if x == n then Right () else Left CueParserTrackIndexOutOfOrder withCheck f (lexeme naturalLit) lexeme cueTime <* eol <* scn cueTime :: Parser CueTime cueTime = do minutes <- naturalLit void (char 58) let checkSeconds n = if n < 60 then Right n else Left (CueParserInvalidSeconds n) checkFrames n = if n < 75 then Right n else Left (CueParserInvalidFrames n) seconds <- withCheck checkSeconds naturalLit void (char 58) frames <- withCheck checkFrames naturalLit case fromMmSsFf minutes seconds frames of Nothing -> empty -- NOTE must be always valid, we checked already Just x -> return x ---------------------------------------------------------------------------- -- Helpers -- | Parse a thing and then check if it's OK conceptually. If it's not OK, -- the error will be reported with position at the start of offending -- lexeme, otherwise the lexeme is parsed as usual. Of course if the lexeme -- has incorrect format, that is just reported and no additional check -- happens. withCheck :: (a -> Either CueParserFailure b) -> Parser a -> Parser b withCheck check p = do cpos <- getPosition npos <- fromMaybe cpos <$> getNextTokenPosition r <- p case check r of Left custom -> do setPosition npos (fancyFailure . E.singleton . ErrorCustom) (Eec Nothing custom) Right x -> return x -- | If the first argument is 'True' and we can parse the given command, -- fail pointing at the beginning of the command and report it as something -- unexpected. failAtIf :: Bool -> ByteString -> Parser () failAtIf shouldFail command = do let p = void (symbol command) lookAhead p if shouldFail then empty else p -- | Indicate that the inner parser belongs to declaration of a track with -- the given index. The index of the track will be added to 'ParseError's to -- help the user find where the error happened. inTrack :: Natural -> Parser a -> Parser a inTrack n = region f where f (TrivialError pos us es) = FancyError pos . E.singleton $ ErrorCustom (Eec (Just n) (CueParserTrivialError us es)) f (FancyError pos xs) = FancyError pos (E.map g xs) g (ErrorCustom (Eec mn x)) = ErrorCustom (Eec (mn <|> Just n) x) g e = e -- | A labelled literal (a helper for common case). labelledLit :: Bool -- ^ Should we instantly fail when command is parsed? -> (ByteString -> Either CueParserFailure a) -- ^ How to judge the result -> ByteString -- ^ Name of the command to grab -> Parser a labelledLit shouldFail check command = do failAtIf shouldFail command withCheck check (lexeme stringLit) <* eol <* scn -- | String literal with support for quotation. stringLit :: Parser ByteString stringLit = (quoted "quoted string literal") <|> (unquoted "unquoted string literal") where quoted = char 34 *> takeWhileP Nothing f <* char 34 unquoted = takeWhileP Nothing g f x = x /= 10 && x /= 34 g x = x /= 10 && x /= 9 && x /= 13 && x /= 32 -- | Parse a 'Natural'. naturalLit :: Parser Natural naturalLit = L.decimal -- | Case-insensitive symbol parser. symbol :: ByteString -> Parser ByteString symbol s = string' s <* notFollowedBy alphaNumChar <* sc -- | A wrapper for lexemes. lexeme :: Parser a -> Parser a lexeme = L.lexeme sc -- | Space consumer (eats newlines). scn :: Parser () scn = L.space space1 empty empty -- | Space consumer (does not eat newlines). sc :: Parser () sc = L.space (void $ takeWhile1P Nothing f) empty empty where f x = x == 32 || x == 9 -- | Determine by 'CueTrack' if we have already parsed FLAGS command. seenFlags :: CueTrack -> Bool seenFlags CueTrack {..} = or [ cueTrackDigitalCopyPermitted , cueTrackFourChannelAudio , cueTrackPreemphasisEnabled , cueTrackSerialCopyManagement ] -- | Apply given function to the first element of the list. changingFirstOf :: [a] -> (a -> a) -> [a] changingFirstOf [] _ = [] changingFirstOf (x:xs) f = f x : xs ---------------------------------------------------------------------------- -- Dummies -- | A dummy file. It's only here because 'CueSheet' can't have an empty -- list of files and it cannot be a bottom either. dummyFile :: CueFile dummyFile = CueFile { cueFileName = "" , cueFileType = Wave , cueFileTracks = dummyTrack :| [] } -- | A dummy track, see 'dummyFile'. dummyTrack :: CueTrack dummyTrack = CueTrack { cueTrackDigitalCopyPermitted = False , cueTrackFourChannelAudio = False , cueTrackPreemphasisEnabled = False , cueTrackSerialCopyManagement = False , cueTrackType = CueTrackAudio , cueTrackIsrc = Nothing , cueTrackTitle = Nothing , cueTrackPerformer = Nothing , cueTrackSongwriter = Nothing , cueTrackPregap = Nothing , cueTrackPregapIndex = Nothing , cueTrackIndices = CueTime 0 :| [] , cueTrackPostgap = Nothing }