{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE ScopedTypeVariables #-}

--------------------------------------------------------------------------------
-- |
-- Module      :  HarmTrace.IO.Recognise
-- Copyright   :  (c) 2010-2012 Universiteit Utrecht, 2012 University of Oxford
-- License     :  GPL3
--
-- Maintainer  :  bash@cs.uu.nl, jpm@cs.ox.ac.uk
-- Stability   :  experimental
-- Portability :  non-portable
--
-- Summary: Handels all the IO of the 'recognise' mode of HarmTrace, which
-- deals with audio chord transcription.
--------------------------------------------------------------------------------

module HarmTrace.IO.Recognise (

    parseAnnotation, parseAnnotationVerb, parseAnnotationDir,
    evaluateLabeling, printLabeling, batchLabeling

  ) where


-- Common IO functions
import HarmTrace.IO.Common
import HarmTrace.IO.BasePaths (BasePaths, getOutDir, getFeatDir)
import Constants ( keyStr, chromaStr, beatStr )

-- Parser stuff
import Text.ParserCombinators.UU

-- Music stuff
import HarmTrace.HarmTrace
import HarmTrace.Base.MusicRep
import HarmTrace.Models.Jazz.Instances ()
import HarmTrace.HAnTree.ToHAnTree
import HarmTrace.IO.Errors
import HarmTrace.Base.Parsing

-- Audio stuff
import HarmTrace.Audio.DataParser ( parseChordinoData, parseChromaData
                                  , parseBeatBarData)
import HarmTrace.Audio.AnnotationParser
import HarmTrace.Audio.Annotate (putSegStats)
import HarmTrace.Audio.Evaluation
import HarmTrace.Audio.ChordTypes 
import HarmTrace.IO.FeatExtract
import Data.List (genericLength, isSuffixOf, stripPrefix)

-- Library modules
import Data.List (sort, intersperse)
import System.FilePath
import System.Directory
import System.IO
import System.CPUTime
import Data.Maybe (isJust, fromJust, isNothing)
-- import Control.Parallel.Strategies (parList, rdeepseq, using)

--------------------------------------------------------------------------------
-- Audio Ground-truth annotations IO
-------------------------------------------------------------------------------- 

-- @pedro: I think the three "parse" functions should go to HarmTrace.IO.Parse
parseAnnotation, parseAnnotationVerb :: (GTree g) => Grammar g -> [PPOption] 
   -> String -> String -> IO (ParseResult g)
parseAnnotation g opts k ann =
  do let pr@(ParseResult _ tks _ _ n te pe _) = postProc opts $ gt2Piece g k ann  
     putStrLn ("key: "  ++ k)
     putStrLn ("parsed " ++ show (length tks)++ " audio chord annotations into " 
                         ++ show n           ++ " ambiguous trees")
     if not $ null te then showErrors "tokenizer: " te
                     else putStr ""
     if not $ null pe then showErrors "parser: " pe
                     else putStr ""
     return pr

parseAnnotationVerb g opts k ann =
  do let pr@(ParseResult _ tks _ _ n te pe _) = postProc opts $ gt2Piece g k ann  
     putStrLn ("key: "  ++ k)
     if not $ null te then do putStrLn "tokenizer errors:" ; mapM_ print te
                      else putStr ""
     if not $ null pe then do putStrLn "parser errors:" ; mapM_ print pe
                      else putStr ""
     putStrLn ("parsed " ++ show (length tks)++ " audio chord annotations into " 
                         ++ show n           ++ " ambiguous trees")
     return pr   

-- parses a directory of annotation files and key description files
-- and prints condenced parsing information to std out
parseAnnotationDir :: GTree a => Grammar a -> [PPOption] -> FilePath -> FilePath
                   -> IO ()     
parseAnnotationDir g opts kdir andir =
  do ks  <- getDirectoryContents kdir
     ans <- getDirectoryContents andir
         -- prints parse results in one line
     let prntParse :: (FilePath,FilePath) -> IO ()
         prntParse (kfp,anfp) = 
           do k <- readFile kfp
              a <- readFile anfp
              printLn . concat $ intersperse "\t" (takeFileName kfp 
                : (showParseResult . postProc opts $ gt2Piece g k a))
         -- filters .lab files and adds the path
         fileFilter :: FilePath -> [FilePath] -> [FilePath]
         fileFilter pf = map (combine pf) . filter ((== ".lab") . takeExtension)       

     case matchKeyAnn (fileFilter kdir  $ reverse ks) 
                      (fileFilter andir $ reverse ans) of
       Just x  -> do printVersion
                     printLn ("Filename\tkey\tnrOfTrees\tInsertions\tDeletions"
                           ++ "\tDelsAtEnd\tTotalErr\tnrOfChords\tTokenizerErr")
                     mapM_ prntParse x
       Nothing -> putStrLn ("the filenames in " ++ kdir 
                         ++ " do not exactly match the ones in " ++ andir)

-- Checks if the key and the annotation files all match, if this is the
-- case it will return a paired list of these files
matchKeyAnn :: [FilePath] -> [FilePath] -> Maybe [(FilePath,FilePath)]     
matchKeyAnn ks ans = 
  let match = and $ zipWith eqFileName ks ans
      eqFileName :: FilePath -> FilePath -> Bool
      eqFileName a b = takeFileName a == takeFileName b
  in if match then Just $ zip ks ans else Nothing

-- shows some elements of a ParseResult 
showParseResult :: ParseResult a -> [String]
showParseResult (ParseResult k tk _p _han n te pe _pp) =
  let pErr   = countErrors pe 
      insert = ins pErr
      delete = del pErr
      endDel = delEnd pErr
      total  = insert + delete + endDel
  -- key nrOfTrees Insertions Deletions DelsAtEnd TotalErr tokenizerErr
  in show k : (map show (n : insert : delete : endDel : total 
                           : length tk : length te :[]))


--------------------------------------------------------------------------------
-- Audio Data IO
-------------------------------------------------------------------------------- 

-- | Returns True when the argument is an audio feature file based on 
-- the filename
isAudioFeatureFile :: FilePath -> Bool
isAudioFeatureFile fp
  | isSuffixOf chromaStr fp = True
  | isSuffixOf keyStr    fp = True
  | isSuffixOf beatStr   fp = True
  | otherwise               = False

-- | Retuns the audio feature identifier, which equals the filename without
-- the extension
getAudioFeatureId :: FilePath -> Maybe String
getAudioFeatureId fp
  | isJust key  = key
  | isJust chm  = chm
  | isJust bt   = bt  
  | otherwise   = Nothing
  where key  = stripSuffix keyStr fp
        chm  = stripSuffix chromaStr fp
        bt   = stripSuffix beatStr fp

        -- drops a given suffix from a string. It returns nothing if the suffix 
        -- is not a suffix of the string
        stripSuffix :: String -> String -> Maybe String
        stripSuffix suf txt 
          | isJust stp = Just . reverse $ fromJust stp
          | otherwise  = Nothing
          where stp = stripPrefix (reverse suf) (reverse txt) 

-- maps readAudioFeat over a directory
readAudioFeatureDir :: FilePath -> IO [AudioFeat]
readAudioFeatureDir fp = 
  do fs <- getDirectoryContents fp
     mapM readAudioFeatures (group . sort $ filter isAudioFeatureFile fs) 
       where
       group :: [FilePath] -> [(FilePath, FilePath, FilePath)]
       group (c:k:b:fs) = (fp </> c, fp </> b, fp </> k) : group fs
       group [] =[]
       group _  = error ("HarmTrace.IO.Recognise.readAudioFeatureDir: the " 
                   ++  "number of files in the filepath cannot be divided by 3")

-- Given triplet of three filenames describing 
-- a chroma, beat and key feature file, 'readAudioFeat' parses all data and returns an 'AudioFeat'.
readAudioFeatures :: (FilePath,FilePath,FilePath) -> IO (AudioFeat)
readAudioFeatures (cfp,bfp,kfp) =
  do dChroma <- readFile cfp
     dBeat   <- readFile bfp
     dKey    <- readFile kfp
    
     let chrm  = parseData parseChordinoData    dChroma
         beats = parseData parseBeatBarData     dBeat
         keys  = parseData parseChromaData      dKey
     return (AudioFeat chrm beats keys)

-- Given one VAMP feature CSV file, HarmTrace will look for the other two 
-- feature files needed for chord transcription. If the first argument it True
-- 'findAudioFeatures' outputs the filepaths of the feature files it found. 
findAudioFeatures :: Bool -> FilePath -> IO (Maybe (FilePath,FilePath,FilePath))
findAudioFeatures verbose fp =
  case getAudioFeatureId fp of
    Nothing     -> return Nothing
    (Just afid) -> do let cfp = afid ++ chromaStr
                          bfp = afid ++ beatStr
                          kfp = afid ++ keyStr
                     
                      cfpExists <- doesFileExist cfp
                      bfpExists <- doesFileExist bfp
                      kfpExists <- doesFileExist kfp
                      
                      when (verbose && cfpExists) (putStrLn ("found: " ++ cfp))
                      when (verbose && bfpExists) (putStrLn ("found: " ++ bfp))
                      when (verbose && kfpExists) (putStrLn ("found: " ++ kfp))
                      
                      case (cfpExists && bfpExists && kfpExists) of
                         True  -> return (Just (cfp,bfp,kfp))
                         False -> return Nothing
           
-- | Evaluluates a single labeling of a piece with a ground truth annotation 
-- visually.
evaluateLabeling :: (Maybe [TimedData Key] -> AudioFeat -> ChordBeatAnnotation) 
                 -> Bool -> FilePath -> FilePath -> Maybe FilePath -> IO Double
evaluateLabeling annotator prnt gtfp featfp keyfp = do
  
  maf <- findAudioFeatures True featfp
  af  <- readAudioFeatures (fromJust maf)
  gt  <- readAnnotation gtfp
  
  case (isJust maf, keyfp, prnt) of
    (True, Nothing,True)  -> 
        do printLn ("using key finding")
           putSegStats Nothing af
           printRelCorrectOverlap (annotator Nothing) af gt
    (True, Nothing,False) -> 
        do return (relCorrectOverlap gt (dumpBeats $ annotator Nothing af))
    (True, Just k ,True)  -> 
        do key <- readAndParseKeyAnn k
           printLn ("using groundTruth key annotation: " ++ show key)
           putSegStats (Just key) af
           printRelCorrectOverlap (annotator (Just key)) af gt
    (True, Just k ,False) -> 
        do key <- readAndParseKeyAnn k
           return (relCorrectOverlap gt (dumpBeats $ annotator (Just key) af))
    (False, _, _)         -> return (-1)
  
-- | Given a ground truth directory and an data directory (containing exactly
-- 3 times as much files as the gt directory) all files will be labeled and
-- the relative correct overlap wil be corrected an presented to the user
batchLabeling :: (Maybe [TimedData Key] -> AudioFeat -> ChordBeatAnnotation) 
              -> FilePath -> FilePath   -> Maybe FilePath -> IO ()
batchLabeling annotator gtfp audiofp keyfp = do
  gt <- getDirectoryContents gtfp
  af <- readAudioFeatureDir  audiofp
  
  -- parse the key annotations [Maybe [TimedData Key]]
  mKeys <- case keyfp of
             Nothing   -> 
                do printLn "using key finding"
                   return $ repeat Nothing 
             (Just fp) -> 
                do printLn "using key ground-truth annotations"
                   kfs <- getDirectoryContents fp
                   -- better to move also the reading etc. to evalR
                   ks  <- mapM (\x ->readAndParseKeyAnn (fp </> x))
                           (sort $ filter ((== ".lab") . takeExtension ) kfs)  
                   return $ map Just ks

  -- this is really hacky, but there is no zipWith3M
  let files = zip (sort $ filter ((== ".lab") . takeExtension ) gt) mKeys
  
  printVersion
  printLn "file\trun time (seconds)\trelative correct overlap"  
  res   <- zipWithM evalR files af 
  -- let res'  = res `using` parList rdeepseq
  printLn ("average: " ++ show (sum (fmap fst res) / genericLength res))
  where
    evalR :: (FilePath, Maybe [TimedData Key]) -> AudioFeat -> IO(Double, Float)
    -- evalR _  Nothing  = error "evalR: Nothing"
    evalR (fp, maybeKey) af = do
      gt <- readAnnotation (gtfp </> fp)
      let result = relCorrectOverlap gt (dumpBeats $ annotator maybeKey af)
          exec   = seq result (return ())
      t1 <- getCPUTime
      exec
      t2 <- getCPUTime
      let runtime = fromIntegral (t2 - t1) / (1000000000000 :: Float)
      printRes fp (result, runtime)
      return (result, runtime)

    printRes :: FilePath -> (Double, Float) -> IO ()
    printRes fp (r,t) = printLn (fp ++ ":\t" ++ showFloat t ++ '\t' : show r)
                        >> hFlush stdout

-- takes a set of features or an audio file and writes the chords to a file
-- the process is also logged in a logfile to keep track of the process
-- (see HarmTrace.IO.FeatExtract)
printLabeling :: BasePaths
              -> (Maybe [TimedData Key] -> AudioFeat -> ChordBeatAnnotation) 
              -> FilePath -> IO ()
printLabeling dirs annotator fp = do
  let -- we use the same logfile for adding harmtrace status
      logf = getOutDir dirs </> takeFileName fp <.> "extract.log" 
      out  = getOutDir dirs </> takeFileName fp <.> "chords.txt" 
      ffp  = getFeatDir dirs </> dropExtension (takeFileName fp) ++ chromaStr

  -- check (silently) if features have been extracted earlier
  hasFt <- findAudioFeatures True ffp

  case (isAudioFile fp, hasFt) of
    -- We found an audio file and no previously extracted features: extract them
    (True, Nothing) -> do -- extract features
                          maf <- extractFeatures dirs fp
                          if isNothing maf
                             then appendFile logf (fp ++";ERROR;0;99\n")
                             else do appendFile logf (fp ++ ";harmtrace;1;80\n")
                                     -- print chords
                                     readAndPrint out maf annotator
                                     appendFile logf (fp++";harmtrace;100;99\n")
                                     appendFile logf (fp++";DONE;100;100\n")
                                  
    -- We found an audio file, but also found matching features
    (True,Just maf) -> do -- appendFile logf (fp ++ ";Initialize;1;1\n")
                          writeFile logf (fp ++ ";harmtrace;1;80\n")
                          readAndPrint out (Just maf) annotator 
                          appendFile logf (fp ++ ";harmtrace;100;99\n")
                          appendFile logf (fp ++ ";DONE;100;100\n")
    
    -- No audio, but one feature file: look for all feature files
    (False,_      ) -> do maf <- findAudioFeatures True fp
                          readAndPrint out maf annotator 
      
-- reads the features and prints the chords
readAndPrint :: FilePath -> Maybe (FilePath, FilePath, FilePath) 
             -> (Maybe [TimedData Key] -> AudioFeat -> ChordBeatAnnotation) 
             -> IO ()
readAndPrint _   Nothing   _         = return ()
readAndPrint out (Just af) annotator = 
  do feat <- readAudioFeatures af
     writeAnnotation out . segPerBeat (getBeats feat) $ annotator Nothing feat
                
-- returns True if fp is an audio file
isAudioFile :: FilePath -> Bool
isAudioFile fp = elem (tail $ takeExtension fp) -- all sox file formats :-)
  [ "8svx", "aif", "aifc", "aiff", "aiffc", "al", "amb", "amr-nb", "amr-wb" 
  , "anb", "au", "avi", "avr", "awb", "cdda", "cdr", "cvs", "cvsd", "cvu", "dat"
  , "dvms", "f32", "f4", "f64", "f8", "ffmpeg", "flac", "fssd", "gsm", "gsrt"
  , "hcom", "htk", "ima", "ircam", "la", "lpc", "lpc10", "lu", "m4a", "m4b"
  , "maud", "mp2", "mp3", "mp4", "mpg", "nist", "ogg", "prc", "raw", "s1"
  , "s16", "s2", "s24", "s3", "s32", "s4", "s8", "sb", "sds", "sf", "sl", "smp"
  , "snd", "sndfile", "sndr", "sndt", "sou", "sox", "sph", "sw", "txw", "u1"
  , "u16", "u2", "u24", "u3", "u32", "u4", "u8", "ub", "ul", "uw", "vms", "voc"
  , "vorbis", "vox", "wav", "wavpcm", "wmv", "wv", "wve", "xa"
  ]


-- reads an annotation
readAnnotation :: FilePath -> IO ChordAnnotation
readAnnotation fp = do f <- readFile fp
                       return (parseData parseAnnotationData f)
  
readAndParseKeyAnn :: FilePath -> IO [TimedData Key]
readAndParseKeyAnn keyfp = do key <- readFile keyfp 
                              return $ parseData parseKeyAnnotationData key

-- writes an annotation to a specific file
writeAnnotation :: FilePath -> ChordBeatAnnotation -> IO ()
writeAnnotation f ca = do hdl <- openFile f ReadWriteMode
                          mapM_ (hPutStr hdl . show) ca
                          hClose hdl

-- segment a chord annotation per beat                       
-- ChordAnnotation = [TimedData ChordLabel] and BeatTrackerData = [NumData] 
segPerBeat :: BeatBarTrackData -> ChordBeatAnnotation -> ChordBeatAnnotation 
segPerBeat  bts cds = segment (start : bts) cds where
  start     = BeatBar (0,prevBeat . snd . beatBar $ head bts)
  
  -- takes a list of beats and a list of chords (where chords can span multiple 
  -- beats) and returns a list of beat synchronised list chords (every chord
  -- annoation has a one beat duration)
  segment :: Show a => [BeatBar] -> [BeatTimedData a] -> [BeatTimedData a]
  segment _   [] = []
  segment [b] [c] 
    | on < offset c = [setOnset c on]
    | otherwise     = [] -- N.B. includes on == offset b!
         where on      = fst $ beatBar b 
  segment (a : b : bs) (c:cs)
    | onset c <= on && offset c >= off 
                = BeatTimedData (getData c) bt on off : segment (b:bs) (c:cs)
    | otherwise = segment (a : b : bs) cs 
         where (on, bt) = beatBar a
               off     = fst $ beatBar b
  -- should not happen
  segment []  c = error ("we ran out of beats! Chords left: " ++ show c )
  segment [b] c = error ("Asynchroneous beat (" ++ show b ++ 
                         ") and chords: "       ++ show c )