-- |
-- Module      : Text.Subtitles.SRT
-- Copyright   : Ruben Astudillo 2012
-- License     : BSD3
--
-- Maintainer  : ruben.astud@gmail.com
-- Portability : unknown
--
-- A basic parser for .srt files (subtitles) based on 'Attoparsec' and 'Text'

module Text.Subtitles.SRT 
  (
  -- * Terminology of the module
  -- $example
  
  -- * Re-exported Datatypes
  module Text.Subtitles.SRT.Datatypes,

  -- * Main parsers
  parseSRT,
  parseSingleLine,

  -- * Deprecated
  parseOnly'
  ) where

import Prelude hiding (takeWhile)
import Control.Applicative
import Data.Attoparsec.Text hiding (parseOnly)
import qualified Data.Attoparsec.Text as DAT
import qualified Data.Text as T
-- in project modules
import Text.Subtitles.SRT.Datatypes

-- $example
--
-- All the sections of a Line have their corresponding ADT in
-- "Text.Subtitles.SRT.Datatypes"
-- 
-- >2
-- >00:00:50,050 --> 00:00:52,217 X1:1 X2:2 Y1:1 Y2:2
-- >Drama here
--
-- The whole Line is represented in the 'Line' ADT which constructors
-- represented by different ADTs
--
-- * The first line is called index, which is the first constructor of
--   'Line'.
--
-- * The second one is called 'Range', which correspond to two separated 'Time'.
--
-- * After the range is an optional field called Rectangle which says what
--   geometry should the text obey.
--
-- * The last one is the 'subs'. Which is just Text and correspond to the third
--   constructor of 'Line'.

-- |Main Parser, gives you a list of all the Lines of the subtitle. It fails if
--  the subtitle doesn't have any Lines.
parseSRT :: Parser Subtitles
parseSRT = many1 parseSingleLine

-- |The individual Line parser. Given the upper example return the
-- corresponding Line representation
parseSingleLine :: Parser Line
parseSingleLine = 
  Line <$> parseIndex <*> parseRange <*> optionalGeometry <*> parseDialog []

parseIndex :: Parser Int
parseIndex = decimal <* eol

eol :: Parser ()
eol = endOfLine 

{-# DEPRECATED parseOnly' "AttoParsec's parseOnly was broken before v0.10.2.1" #-}
parseOnly' :: Parser a -> Text -> Either String a
parseOnly' = DAT.parseOnly

{- Is clear that this just aplies parseTime breaking down the "-->" string -}
parseRange :: Parser Range
parseRange = Range <$> parseTime <* arrowString <*> parseTime <* skipSpace
  where
    arrowString :: Parser Text
    arrowString = string (T.pack " --> ") 

{- the order X1 X2 Y1 Y2 seems to be enforced, so we can check only for order
 - instead of keywords -}
optionalGeometry :: Parser (Maybe Rectangle)
optionalGeometry = option Nothing (Just <$> rectangle <* eol)
  where rectangle = R <$> valueSpace <*> valueSpace <*> valueSpace <*> value
        value   = letter *> digit *> char ':' *> decimal 
        valueSpace = value <* space

parseTime :: Parser Time
parseTime = Time <$> numDot <*> numDot <*> decimal <* char ',' <*> decimal
  where
    numDot :: Parser Int
    numDot = decimal <* char ':'

{- return the dialog checking for newlines that could be in there. That why is
 - written in a monad instead of applicative. More efficient version welcome -}
parseDialog :: [Text] -> Parser Text
parseDialog stateLines = do 
  line <- takeWhile1 (not . isEndOfLine)
  endOfLine
  let stateLines' = line : stateLines
      dialog_complete = return $ T.concat $ reverse stateLines' 
  -- the end of the file
  (endOfInput *> dialog_complete) 
    -- End of this Line, new Line coming.
    <|> (endOfLine *> dialog_complete)
    -- In between lines, the next one belong to this Line,explicit eol required
    <|> parseDialog (T.singleton '\n' : stateLines')