{-# LANGUAGE OverloadedStrings #-} module Text.Hastily.Report ( generate, findCandidateDialog ) where import Data.List (intersect) import Data.List import qualified Data.Map as Map import Data.String.Conversions import qualified Data.Text as DT import System.Directory import System.IO import Text.Printf import Text.Hastily.Types generate :: [DT.Text] -> [Subtitle] -> IO () generate skip_words subs = do let dialog_frequency_pair = findCandidateDialog skip_words subs printReport (filter isEmpty subs) $ fst dialog_frequency_pair where isEmpty :: Subtitle -> Bool isEmpty s = (length $ subtitle_dialogs s) > 0 printReport :: [Subtitle] -> SubtitleDialog -> IO () printReport subs candidate_dialog = if (length subs==0) then putStrLn "No subtitles found!" else do putStrLn $ printf "Computing candidate dialog..." maybe_handle <- let report_filename = "hastily-report.txt" in doesFileExist report_filename >>= (\x -> if not x then fmap Just $ openFile report_filename WriteMode else do putStrLn "WARNING: Existing report file found. Will not overwrite!" return Nothing) printHeaders candidate_dialog maybe_handle mapM_ (findAndPrintReport candidate_dialog maybe_handle) subs case maybe_handle of Just handle -> hClose handle _ -> return () where outPut maybe_handle str = do putStrLn str case maybe_handle of Just handle -> hPutStrLn handle str _ -> return () printHeaders candidate_dialog maybe_handle = do outPut maybe_handle $ printf "\nDialogue\n--------\n%s\n--------" (cs $ dialog candidate_dialog::String) outPut maybe_handle $ printf "%-29s | %s" ("Time"::String) ("File"::String) outPut maybe_handle $ printf "%-29s | %s" ("----"::String) ("----"::String) findAndPrintReport :: SubtitleDialog -> Maybe Handle -> Subtitle -> IO () findAndPrintReport c_dialog maybe_handle sub = do outPut maybe_handle $ printf "%-29s | %s" (findTimeFor c_dialog (subtitle_dialogs sub)) (subtitle_file sub) where findTimeFor :: SubtitleDialog -> [SubtitleDialog] -> String findTimeFor c_dialog [] = "Dialog not found!" findTimeFor c_dialog (d:ds) = if d == c_dialog then printf "%s --> %s" (cs $ start_time d::String) (cs $ end_time d::String) else findTimeFor c_dialog ds -- Find a dialog whose time is used to compare -- subtitles. The following process is used to find this dialog. -- 1. For each subtitle file, prepare a list of dialogues that appear only once in the movie. -- 2. Among those list, select dialogues that appear in the most of them. -- 3. Among those list of dialogues, pick one that appears closest to the start. findCandidateDialog :: [DT.Text] -> [Subtitle] -> (SubtitleDialog, Int) findCandidateDialog skip_words subs = head $ sortBy compareFunction $ getCombinedHistogram non_repeating_dialog_sets where compareFunction (SubtitleDialog st1 _ _ _, a) (SubtitleDialog st2 _ _ _, b) = -- Sort dialogs by decreasing order of their frequency. -- if two dialogs appear in same number of files, then sort them -- by the time they first appear, so that earlier ones are prefered. let ordering = compare b a in if ordering == EQ then compare st1 st2 else ordering non_repeating_dialog_sets = fmap (getNonRepeatingDialogList skip_words) subs -- given a subtitle, return a list of subtitle dialogs that does not -- appear more than once or contain the list of skip words getNonRepeatingDialogList :: [DT.Text] -> Subtitle -> [SubtitleDialog] getNonRepeatingDialogList skip_words (Subtitle _ _ dialogs) = fmap fst $ filter (\(dialog, count) -> (count == 1) && (dialog `doesNotContain` skip_words) ) $ getHistogram dialogs where doesNotContain sub_dialog skip_words = all (\word -> not $ word `DT.isInfixOf` (digest sub_dialog)) $ fmap DT.toLower skip_words -- Make a list of tuples. Each tuple in this list will -- hold a subtitle dialog and the number of times it appears in -- all the avaliable subs getCombinedHistogram :: [[SubtitleDialog]] -> [(SubtitleDialog, Int)] getCombinedHistogram dlgxs = getHistogram $ flatenList dlgxs where flatenList dlgs = foldl (++) [] dlgxs getHistogram :: [SubtitleDialog] -> [(SubtitleDialog, Int)] getHistogram subdx = Map.toList $ foldl addDialogToMap (makeMap subdx) subdx where addDialogToMap :: Map.Map SubtitleDialog Int -> SubtitleDialog -> Map.Map SubtitleDialog Int addDialogToMap map dg = Map.update (\f -> Just (f+1)) dg map makeMap :: [SubtitleDialog] -> Map.Map SubtitleDialog Int makeMap subdx = Map.fromList $ zip (nub subdx) $ repeat 0