{-# LANGUAGE OverloadedStrings #-} module Text.Hastily.SubtitleFileTypes.Srt.Srt ( getSrtFilesInDir, parseFile, srtParser ) where import Control.Applicative ((<*)) import qualified Data.ByteString as BS import Data.Char import Data.String.Conversions import qualified Data.Text as DT import Data.Text.Encoding import Data.Text.Encoding.Error import Text.Hastily.Types import System.Directory.Tree import System.FilePath import System.IO import Text.Parsec ((<|>)) import qualified Text.Parsec as Parsec getSrtFilesInDir :: FilePath -> IO [FilePath] getSrtFilesInDir destination = do anchored_dir_tree <- build destination -- Filter out everything that is not an .srt file let file_dt = filter isfile $ flattenDir $ filterDir (\x -> case x of (File name a) -> (takeExtension name) == ".srt";Dir _ _ -> True;_->False) (dirTree anchored_dir_tree) -- Extract FilePath objects out the the result tree return $ fmap (\(File name file_path) -> file_path) file_dt where isfile x = case x of File _ _ -> True _ -> False parseFile :: MovieInfo -> FilePath -> IO Subtitle parseFile movie_info file_path = do putStrLn $ "Parsing subtitles from " ++ file_path handle <- openBinaryFile file_path ReadMode doParse handle where doParse handle = do b_string <- BS.hGetContents handle -- Drop bytes util the charecter '1' is found -- This is a hack to account for the BOM marker in some -- utf-8 encoded files let string = DT.dropWhile (/= '1') $ decodeUtf8With ignore b_string case Parsec.parse srtParser "(mainparser)" string of Right subtitles -> case subtitles of [] -> do putStrLn $ "WARNING : No subtitles could be read from file " ++ file_path return $ Subtitle movie_info file_path [] all@(x:xs) -> do putStrLn $ "Read " ++ (show $ length all) ++ " dialogues." return $ Subtitle movie_info file_path subtitles Left err -> do print err return $ Subtitle movie_info file_path [] srtParser :: Parsec.Parsec DT.Text () [SubtitleDialog] srtParser = Parsec.many $ Parsec.try chunk_parser where makeDigest dialog = DT.filter isAlphaNum $ DT.toLower dialog newLine = (Parsec.string "\n") <|> (Parsec.string "\r\n") separator = Parsec.count 2 newLine separatorOrEof = do separator chunk_parser = do let time_parser = Parsec.many (Parsec.digit <|> (Parsec.char ':') <|> (Parsec.char ',')) index <- (Parsec.many Parsec.digit) newLine start_time <- time_parser Parsec.many1 $ Parsec.char ' ' Parsec.string "-->" Parsec.many1 $ Parsec.char ' ' end_time <- time_parser newLine dialog <- Parsec.manyTill Parsec.anyChar (Parsec.try separatorOrEof) return $ SubtitleDialog (cs start_time) (cs end_time) (cs dialog) (makeDigest $ cs dialog)