-- |
-- Module      : Text.Subtitles.SRT
-- Copyright   : Ruben Astudillo 2012
-- License     : BSD3
--
-- Maintainer  : ruben.astud@gmail.com
-- Portability : unknown
--
-- A basic parser for .srt files (subtitles) based on 'Attoparsec' and 'Text'

module Text.Subtitles.SRT 
  (
  -- * Don't use parseOnly!
  -- $noParseOnly 
  
  -- * Terminology of the module
  -- $example
  
  -- * Re-exported Datatypes
  module Text.Subtitles.SRT.Datatypes,

  -- * Main parsers
  parseSRT,
  parseSingleLine,

  -- * Temporal solutions
  parseOnly'
  ) where

import Prelude hiding (takeWhile)
import Control.Applicative
import Data.Attoparsec.Text hiding (parseOnly)
import qualified Data.Text as T
-- in project modules
import Text.Subtitles.SRT.Datatypes

-- $noParseOnly
--
-- This module uses now peekChar in parseDialog, which replaces the ad-hoc
-- method used before. As a consequence it doesn't play well with
-- Data.Attoparsec.Text.parseOnly on some conditions.
--
-- You should use just 'parse' or the parseOnly' function I provide to avoid
-- problems until further notice. /Hopefully/ in the next version of attoparsec
-- this will be solved , so keep an eye for removal!.

-- $example
--
-- All the sections of a Line have their corresponding ADT in
-- "Text.Subtitles.SRT.Datatypes"
-- 
-- >2
-- >00:00:50,050 --> 00:00:52,217 X1:1 X2:2 Y1:1 Y2:2
-- >Drama here
--
-- The whole Line is represented in the 'Line' ADT which constructors
-- represented by different ADTs
--
-- * The first line is called index, which is the first constructor of
--   'Line'.
--
-- * The second one is called 'Range', which correspond to two separated 'Time'.
--
-- * After the range is an optional field called Rectangle which says what
--   geometry should the text obey.
--
-- * The last one is the 'subs'. Which is just Text and correspond to the third
--   constructor of 'Line'.

-- |Main Parser, gives you a list of all the Lines of the subtitle. It fails if
--  the subtitle doesn't have any Lines.
parseSRT :: Parser Subtitles
parseSRT = many1 parseSingleLine

-- |The individual Line parser. Given the upper example return the
-- corresponding Line representation
parseSingleLine :: Parser Line
parseSingleLine = 
  Line <$> parseIndex <*> parseRange <*> optionalGeometry <*> parseDialog T.empty

parseIndex :: Parser Int
parseIndex = decimal <* eol

eol :: Parser ()
eol = endOfLine 

-- |This version avoid the problems associated with peekChar and thus is safe to
-- use in this module. Subject to removal once parseOnly is fixed.
parseOnly' :: Parser a -> Text -> Either String a
parseOnly' p t = eitherResult $ feed (parse p t) T.empty

{- Is clear that this just aplies parseTime breaking down the "-->" string -}
parseRange :: Parser Range
parseRange = Range <$> parseTime <* arrowString <*> parseTime <* skipSpace
  where
    arrowString :: Parser Text
    arrowString = string (T.pack " --> ") 

{- the order X1 X2 Y1 Y2 seems to be enforced, so we can check only for order
 - instead of keywords -}
optionalGeometry :: Parser (Maybe Rectangle)
optionalGeometry = option Nothing (Just <$> rectangle <* eol)
  where rectangle = R <$> valueSpace <*> valueSpace <*> valueSpace <*> value
        value   = letter *> digit *> char ':' *> decimal 
        valueSpace = value <* space

parseTime :: Parser Time
parseTime = Time <$> numDot <*> numDot <*> decimal <* char ',' <*> decimal
  where
    numDot :: Parser Int
    numDot = decimal <* char ':'

{- return the dialog checking for newlines that could be in there. That why is
 - written in a monad instead of applicative. More efficient version welcome -}
parseDialog :: Text -> Parser Text
parseDialog stateLine = do 
  line <- takeWhile1 (not . isEndOfLine)
  endOfLine
  let stateCurrent = T.append stateLine line
      lineState = T.snoc stateCurrent '\n' --takeWhile1 didn't consume \n
  next <- peekChar
  case next of
    Nothing     -> return stateCurrent  -- the end of the file
    (Just '\n') -> eol >> return stateCurrent -- End of this Line, new Line coming.
    (Just _)    -> parseDialog lineState {- in between lines, the next one belong to this Line
                                         explicit eol required -}