{-# OPTIONS_GHC -Wall #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} -- Testing module HarmTrace.IO.Main where -- Parser stuff import Text.ParserCombinators.UU -- Music stuff import HarmTrace.HarmTrace import HarmTrace.Base.MusicRep import HarmTrace.Models.Jazz.Instances () import HarmTrace.HAnTree.Tree (Tree) import HarmTrace.HAnTree.ToHAnTree import HarmTrace.IO.Errors -- import HarmTrace.Matching.GuptaNishimuraEditMatch import HarmTrace.Matching.Standard -- import HarmTrace.Matching.Matching (getMatch) -- import HarmTrace.Matching.AlignmentFaster (getAlignDist) import HarmTrace.Matching.Alignment (getAlignDist, getHAnDist) #ifdef AUDIO -- Audio stuff import HarmTrace.Base.Parsing import HarmTrace.Audio.Parser import HarmTrace.Audio.BeatChroma import HarmTrace.Audio.Annotations import HarmTrace.Audio.Harmonize import HarmTrace.Audio.Evaluation import HarmTrace.Audio.ChordTypes (ChordAnnotation) import Data.List (genericLength) #endif -- Library modules import System.Console.ParseArgs hiding (args) -- cabal install parseargs import Data.List (sort, groupBy, intersperse) import Control.Arrow ((***)) import System.FilePath import System.Directory import System.IO import Text.Regex.TDFA hiding (match) import Text.Printf (printf) import System.CPUTime import Data.Maybe (isJust, fromJust) import Data.Binary -- Parallelism import Control.Parallel.Strategies -------------------------------------------------------------------------------- -- Data set Info -------------------------------------------------------------------------------- biabPat :: String biabPat = "^(.*)_id_([0-9]{5})_(allanah|wdick|community|midicons|realbook).(M|S|m|s)(G|g)[0-9A-Za-z]{1}.txt$" getInfo :: String -> Maybe [String] getInfo fileName = do let (_,_,_,groups) <- fileName =~~ biabPat :: Maybe (String,String,String,[String]) return groups getTitle, getId, getDb :: String -> String getTitle fn = getInfo' 0 fn getId fn = getInfo' 1 fn getDb fn = getInfo' 2 fn getInfo' :: Int -> String -> String getInfo' i fn = maybe "no_info" (!!i) (getInfo fn) createGroundTruth :: [String] -> [(String, String)] createGroundTruth files = [ (getTitle x, getId x) | x <- files ] getClassSizes :: [String] -> [(String,[String])] getClassSizes = map ((head *** id) . unzip) . groupBy gf . createGroundTruth where gf (name1, _key1) (name2, _key2) = name1 == name2 writeGroundTruth :: FilePath -> FilePath -> IO () writeGroundTruth infp outfp = do files <- readDataDir infp writeFile outfp . Prelude.tail $ concatMap merge (createGroundTruth files) where merge :: (String, String) -> String merge (x,y) = '\n' : y ++ "\t" ++ x -------------------------------------------------------------------------------- -- Symbolic Parsing IO -------------------------------------------------------------------------------- -- parses a string of chords and returns a parse tree with the harmony structure parseTree, parseTreeVerb :: (GTree g) => Grammar g -> [PPOption] -> String -> IO (ParseResult g) parseTree g opts s = do let pr@(ParseResult _ tks _ _ n te pe _) = postProc opts $ string2Piece g s putStrLn ("parsed " ++ show (length tks) ++ " chords into " ++ show n ++ " ambiguous trees") if not $ null te then showErrors "tokenizer: " te else putStr "" if not $ null pe then showErrors "parser: " pe else putStr "" return pr parseTreeVerb g opts s = do let pr@(ParseResult _ tks _ _ n te pe _) = postProc opts $ string2Piece g s putStrLn ("parsed " ++ show (length tks) ++ " chords into " ++ show n ++ " ambiguous trees") if not $ null te then mapM_ print te else putStr "" if not $ null pe then mapM_ print pe else putStr "" return pr -- Batch analyzing a directory with chord sequence files with reduced output. parseDir :: (GTree g) => Grammar g -> [PPOption] -> FilePath -> Maybe FilePath -> IO () parseDir g opts filepath bOut = getDirectoryContents filepath >>= parseDir' g opts bOut filepath . sort parseDir' :: (GTree g) => Grammar g -> [PPOption] -> Maybe FilePath -> String -> [String] -> IO () parseDir' g opts bOut fp fs = do putStr "Filename\tNumber of trees\t" putStr "Insertions\tDeletions\tReplacements\tDeletions at the end\t" putStr "Tot_Correction\tNr_of_chords\t" putStrLn "Error ratio\tTime taken" let process :: FilePath -> FilePath -> IO ([ChordLabel],Tree HAn) process path x = do content <- readFile (path x) let (ParseResult _ tks ps ts nr e1 e2 _) = postProc opts $ string2Piece g content -- @Pedro: I think that the (length ts) only is here to -- evaluate all trees right? Since the tree selection is now -- incorporated in the postprocessing I replaced it with -- length ps -- t = seq (length ts) (return ()) t = seq (length ps) (return ()) ErrorNrs i d e r = countErrors e2 errRat = errorRatio e2 tks nrOfChords = length tks -- (mergeDups toks) t1 <- getCPUTime t t2 <- getCPUTime let diff = fromIntegral (t2 - t1) / (1000000000 :: Float) when (not $ null e1) $ putErrLn (show x ++ ": " ++ show e1) printLn . concat $ intersperse "\t" [ x, show nr , show i, show d, show r, show e , show (i+d+e+r) , show nrOfChords, showFloat errRat , showFloat diff] return (tks, ts) res <- mapM (process fp) (filter ((== ".txt") . takeExtension) fs) case bOut of Nothing -> return () Just bf -> encodeFile bf (unzip res :: ([[ChordLabel]],[Tree HAn])) -------------------------------------------------------------------------------- -- Symbolic Matching IO -------------------------------------------------------------------------------- data MatchMode = STDiff | LCES | HAnAlign | Align deriving (Eq, Ord, Show) -- should return True if sim a b == sim b a and False otherwise isSymmetrical :: MatchMode -> Bool -- @pedro: I guess it is symmetrical, but I'm not 100% sure isSymmetrical STDiff = False isSymmetrical LCES = True isSymmetrical HAnAlign = True isSymmetrical Align = True -- matches a directory of chord description files dirMatch :: (GTree g) => Grammar g -> [PPOption] -> Maybe FilePath -> MatchMode -> Maybe Float -> FilePath -> IO () dirMatch g o bIn m me fp = do fs <- readDataDir fp let process s = let (ParseResult _ tks _ ts _nrts _ ePar _) = postProc o $ string2Piece g s in (tks, ts, errorRatio ePar tks) filterError = if isJust me then filter (\(_,_,e) -> e <= fromJust me) else id pss <- mapM (\f -> readFile' (fp f)) fs (tks, ps) <- case bIn of Just bp -> decodeFile bp :: IO ([[ChordLabel]],[Tree HAn]) Nothing -> let (toks, ps', _) = unzip3 (filterError (map process pss)) in return (toks, ps' `using` parList rdeepseq) let fsQLab = labelQuery fs -- print the ireval format ... putStr "true\n" if (m == LCES || m == HAnAlign || m == Align) then putStr "false\n" else putStr "true\n" mapM_ (putStr . (++ "\t"). getId) (fst . unzip $ filter snd fsQLab) putChar '\n' mapM_ (putStr . (++ "\t"). getId) fs putChar '\n' -- do the actual matching ... let match :: (a -> a -> Float) -> [a] -> [([Float],Bool)] match sim l = [ ([ calcSim sim x y i j | (j,y) <- zip [0..] l], xIsQ) -- :: ([Float],Bool) | (i,x,xIsQ) <- zip3 [0..] l (snd . unzip $ fsQLab)] -- calculate the similarity sim a b, or, if calculated, look up sim b a calcSim :: (a -> a -> Float) -> a -> a -> Int -> Int -> Float calcSim sim x y i j = if isSymmetrical m && j < i then (fst (simMat !! j)) !! i else sim x y simMat, querySimMat :: [([Float],Bool)] simMat = (case m of -- full n x n similarity matrix STDiff -> match diffChordsLen tks LCES -> error "disabled: fix me" HAnAlign -> match getHAnDist ps Align -> match (getAlignDist tempKeyC tempKeyC) tks ) where tempKeyC = (Key (Note Nothing C) MajMode) -- filter all non-queries, lazy evaluation should ensure the -- non-queries will not be evaluated querySimMat = (filter snd simMat) `using` parList rdeepseq sequence_ [ printLine x | (x,_) <- querySimMat] printLine :: [Float] -> IO () printLine l = printLn (foldr (\a b -> showFloat a ++ "\t" ++ b) "" l) -- labels (True/False) the songs that have multiple versions and are queries labelQuery :: [FilePath] -> [(FilePath, Bool)] labelQuery l = let cs = getClassSizes l in map (\x -> (x,(>1) . length . fromJust $ lookup (getTitle x) cs)) l #ifdef AUDIO -------------------------------------------------------------------------------- -- Audio Data IO -------------------------------------------------------------------------------- -- the strings that build up a data file vampStr, keyStr, chromaStr, beatStr :: String chromaStr = "nnls-chroma_nnls-chroma_bothchroma" keyStr = "qm-vamp-plugins_qm-keydetector_keystrength" beatStr = "qm-vamp-plugins_qm-tempotracker_beats" vampStr ="(^.+)_vamp_("++ chromaStr ++ '|' : keyStr ++ '|' : beatStr ++ ").csv$" parseAnnotation :: GTree g => Grammar g -> FilePath -> FilePath -> IO (ParseResult g) parseAnnotation g fpkey fpann = do key <- readFile fpkey ann <- readFile fpann return $ gt2Piece g key ann -- reads an annotation readAnnotation :: FilePath -> IO ChordAnnotation readAnnotation fp = do f <- readFile fp return (parseData parseAnnotationData f) -- maps readAudioFeat over a directory readAudioFeatureDir :: FilePath -> IO [Maybe AudioFeat] readAudioFeatureDir fp = do fs <- getDirectoryContents fp mapM (readAudioFeat fp) (group . sort $ filter (\x -> takeExtension x == ".csv") fs) where group :: [FilePath] -> [(FilePath, FilePath, FilePath)] group (c:k:b:fs) = (c,b,k) : group fs group [] =[] group _ = error ("the number of files in the filepath " ++ "cannot be divided by 3") -- given a base file path and a triple of three filenames describing -- a chroma, beat and key file, parses all data and returns an audioFeat -- if everything went well. readAudioFeat :: FilePath -> (FilePath, FilePath, FilePath) -> IO (Maybe AudioFeat) readAudioFeat baseURI (chroma, beat, key) = -- get the part of the filenames before _vamp_ and use it as ID let (idStr:ids) = map ((maybe "" head) . regexMatchGroups vampStr) [chroma,beat,key] in if all (idStr ==) ids then do -- if the IDs are the same then proceed dChroma <- readFile (baseURI chroma) dBeat <- readFile (baseURI beat) dKey <- readFile (baseURI key) return . Just $ AudioFeat idStr (parseData parseChordinoData dChroma) (parseData parseBeatData dBeat) (parseData parseKeyStrengthData dKey) else do putStrLn ("found non-matching set of audiofeatures with ids " ++ show ids) return Nothing {- | evaluluates a single labeling of a piece with a ground truth annotation visually: time match GT MPTREE 0.0 True NNone NNone 0.2 True EMaj EMaj ... etc. The argurments need some explanation: the first argurment should be the filepath to one of the data files (there must be three, a chroma, a beat and a key file, to create an AudioFeat), but without all text after "_vamp_" e.g. for reading file1_vamp_nnls-chroma_nnls-chroma_bothchroma.csv only file1 should be presented. The function below will now read all three data files in by adding chromaStr, beatStr and keyStr, respectively the second file path should just point at the ground truth annotation -} evaluateLabeling :: FilePath -> FilePath -> IO Double evaluateLabeling gtfp audiofp = do let (path, file) = splitFileName audiofp files = ((file ++ chromaStr <.> "csv"),(file ++ beatStr <.> "csv"), (file ++ keyStr <.> "csv")) gt <- readAnnotation gtfp (Just af) <- readAudioFeat path files -- mapM print gt -- let test = (processAudioFeat simpleAnnotator af) -- mapM print test putStrLn "time\tmatch\tGT\tMPTREE" printRelCorrectOverlap simpleAnnotator af gt -- given a ground truth directory and an data directory (containing exactly -- 3 times as much files as the gt directory) all files will be labeled and -- the relative correct overlap wil be corrected an presented to the user batchLabeling :: FilePath -> FilePath -> IO () -- [Double] batchLabeling gtfp audiofp = do gt <- getDirectoryContents gtfp af <- readAudioFeatureDir audiofp rco <- zipWithM printEval (sort $ filter ((== ".lab") . takeExtension ) gt) af putStrLn ("average: " ++ show (sum rco / genericLength rco)) where printEval :: FilePath -> Maybe AudioFeat -> IO Double printEval _ Nothing = error "printEval: Nothing" printEval fp (Just af) = do gt <- readAnnotation (gtfp fp) -- let test = processAudioFeat (harmonyAnnotator (getKey af)) af let test = processAudioFeat simpleAnnotator af result = relCorrectOverlap gt test putStrLn (fp ++ ':' : show af ++ ' ' : show result) return result #endif -------------------------------------------------------------------------------- -- Utils -------------------------------------------------------------------------------- putErrLn :: String -> IO() putErrLn = hPutStrLn stderr printLn :: String ->IO () printLn s = putStrLn s >> hFlush stdout regexMatchGroups :: String -> String -> Maybe [String] regexMatchGroups pat str = do (_,_,_,groups) <- str =~~ pat :: Maybe (String,String,String,[String]) return groups -- Stricter readFile hGetContents' :: Handle -> IO [Char] hGetContents' hdl = do e <- hIsEOF hdl if e then return [] else do c <- hGetChar hdl cs <- hGetContents' hdl return (c:cs) readFile' :: FilePath -> IO [Char] readFile' fn = do hdl <- openFile fn ReadMode xs <- hGetContents' hdl hClose hdl return xs readDataDir :: FilePath -> IO [FilePath] readDataDir fp = do fs <- getDirectoryContents fp return . sort $ filter (\str -> str =~ biabPat) fs -- | Shows a Float with 5 decimal places showFloat :: Float -> String showFloat = printf "%.6f"