module Parser ( parseClassicExperts, parseClassicAlgo , parseClassicAlgoVM1, parseClassicAlgoVM2, parseClassicAlgoMP, parseClassicAlgoSIACF1, parseClassicAlgoSIACP, parseClassicAlgoSIACR , parseFolkAlgoVM1, parseFolkAlgoVM2, parseFolkAlgoMP, parseFolkAlgoSIACF1, parseFolkAlgoSIACP, parseFolkAlgoSIACR, parseFolkAlgoCOSIA, parseFolkAlgoSIACFP, parseHEMANAnnotations, parseHEMANAlgoSIACRD, parseHEMANAlgoSIACPD, parseHEMANAlgoSIACR, parseHEMANAlgoSIACP, parseHEMANAlgoSIACF1, parseHEMANAlgoSIACF1D , parseEuroAlgoSIACF1, parseEuroAlgoSIACF1D, parseEuroAlgoSIACP, parseEuroAlgoSIACPD, parseEuroAlgoSIACR, parseEuroAlgoSIACRD , parsejazzAlgoSIACF1, parsejazzAlgoSIACF1D, parsejazzAlgoSIACP, parsejazzAlgoSIACPD, parsejazzAlgoSIACR, parsejazzAlgoSIACRD , parseFolkExperts, parseFolkAlgo, parseRandom , parseMusic , cd, listDirs, listFiles, emptyDirectory ) where import Control.Monad (forM, mapM_, filterM, void) import Data.List (sort, isInfixOf, sortOn, groupBy) import System.Directory import Text.Parsec import Text.Parsec.Language import Text.Parsec.String import qualified Text.Parsec.Token as Tokens import Types import MIDI (readFromMidi) -------------------- -- Parsers. -- | Parse a music piece from the MIREX dataset. -- the song can be one of [bach, beethoven, chopin, gibbons, mozart] parseMusic :: Song -> IO MusicPiece parseMusic song = cd ("data/pieces/" ++ sanitize song ++ "/monophonic/csv") $ do [f_music] <- listFiles parseMany mirexP f_music where -- | Parse one entry from a MIREX piece of music. mirexP :: Parser Note mirexP = Note <$> (floatP <* sepP) <*> (intP <* sepP) <* (intP <* sepP) <* (floatP <* sepP) <* intP <* newline parseHemanMusic :: Song -> IO MusicPiece parseHemanMusic song = cd ("data/HEMAN/piece/csv" ++ sanitize song) $ do [f_music] <- listFiles parseMany mirexP f_music where -- | Parse one entry from a MIREX piece of music. mirexP :: Parser Note mirexP = Note <$> (floatP <* sepP) <*> (intP <* sepP) <* (intP <* sepP) <* (floatP <* sepP) <* intP <* newline -- ======== parsejazzAlgoSIACRD :: IO [PatternGroup] parsejazzAlgoSIACRD = cd "data/jazz/patterns/alg/tlrd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIARD")) return algPgs parsejazzAlgoSIACPD :: IO [PatternGroup] parsejazzAlgoSIACPD = cd "data/jazz/patterns/alg/tlpd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAPD")) return algPgs parsejazzAlgoSIACF1D :: IO [PatternGroup] parsejazzAlgoSIACF1D = cd "data/jazz/patterns/alg/tlf1d/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1D")) return algPgs parsejazzAlgoSIACR :: IO [PatternGroup] parsejazzAlgoSIACR = cd "data/jazz/patterns/alg/tlr/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAR")) return algPgs parsejazzAlgoSIACP :: IO [PatternGroup] parsejazzAlgoSIACP = cd "data/jazz/patterns/alg/tlp/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAP")) return algPgs parsejazzAlgoSIACF1 :: IO [PatternGroup] parsejazzAlgoSIACF1 = cd "data/jazz/patterns/alg/tlf1/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1")) return algPgs -- ======= parseEuroAlgoSIACRD :: IO [PatternGroup] parseEuroAlgoSIACRD = cd "data/eurovision/patterns/alg/tlrd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIARD")) return algPgs parseEuroAlgoSIACPD :: IO [PatternGroup] parseEuroAlgoSIACPD = cd "data/eurovision/patterns/alg/tlpd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAPD")) return algPgs parseEuroAlgoSIACF1D :: IO [PatternGroup] parseEuroAlgoSIACF1D = cd "data/eurovision/patterns/alg/tlf1d/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1D")) return algPgs parseEuroAlgoSIACR :: IO [PatternGroup] parseEuroAlgoSIACR = cd "data/eurovision/patterns/alg/tlr/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAR")) return algPgs parseEuroAlgoSIACP :: IO [PatternGroup] parseEuroAlgoSIACP = cd "data/eurovision/patterns/alg/tlp/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAP")) return algPgs parseEuroAlgoSIACF1 :: IO [PatternGroup] parseEuroAlgoSIACF1 = cd "data/eurovision/patterns/alg/tlf1/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1")) return algPgs -- ======== parseHEMANAlgoSIACRD :: IO [PatternGroup] parseHEMANAlgoSIACRD = cd "data/HEMAN/patterns/alg/tlrd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIARD")) return algPgs parseHEMANAlgoSIACPD :: IO [PatternGroup] parseHEMANAlgoSIACPD = cd "data/HEMAN/patterns/alg/tlpd/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAPD")) return algPgs parseHEMANAlgoSIACF1D :: IO [PatternGroup] parseHEMANAlgoSIACF1D = cd "data/HEMAN/patterns/alg/tlf1d/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1D")) return algPgs parseHEMANAlgoSIACR :: IO [PatternGroup] parseHEMANAlgoSIACR = cd "data/HEMAN/patterns/alg/tlr/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAR")) return algPgs parseHEMANAlgoSIACP :: IO [PatternGroup] parseHEMANAlgoSIACP = cd "data/HEMAN/patterns/alg/tlp/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAP")) return algPgs parseHEMANAlgoSIACF1 :: IO [PatternGroup] parseHEMANAlgoSIACF1 = cd "data/HEMAN/patterns/alg/tlf1/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIAF1")) return algPgs parseHEMANAnnotations :: IO [PatternGroup] parseHEMANAnnotations = cd "data/HEMAN/patterns/annotations/" $ do algPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "Human")) return algPgs -- | Parse all (expert) pattern groups from the classical dataset. parseClassicExperts :: IO [PatternGroup] parseClassicExperts = cd "data/pieces" $ do f_roots <- listDirs res <- forM f_roots $ \f_root -> cd (f_root ++ "/monophonic/repeatedPatterns") $ do f_patExs <- listDirs allPats <- forM f_patExs $ \f_patEx -> cd f_patEx $ do f_patTys <- listDirs forM f_patTys $ \f_patTy -> do basePat:pats <- cd (f_patTy ++ "/occurrences/csv") $ do f_pats <- listFiles pforM f_pats (parseMany noteP) return $ PatternGroup { piece_name = f_root , expert_name = f_patEx , pattern_name = f_patTy , basePattern = basePat , patterns = pats } return $ concat allPats return $ concat res -- | Parse all (algorithmic) pattern groups from the classical dataset. parseClassicAlgo :: IO [PatternGroup] parseClassicAlgo = cd "data/algOutput" $ do f_algs <- listDirs allPgs <- forM f_algs $ \f_alg -> cd f_alg $ do f_versions <- listDirs if null f_versions then do listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece f_alg)) else concat <$> forM f_versions (\f_v -> cd f_v $ listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece $ f_alg ++ ":" ++ f_v))) return (concat allPgs) -- | Parse all (algorithmic, VM1) pattern groups from the classical dataset. parseClassicAlgoVM1 :: IO [PatternGroup] parseClassicAlgoVM1 = cd "data/algOutput/2016GV/VM1/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "VM1")) return allPgs parseClassicAlgoVM2 :: IO [PatternGroup] parseClassicAlgoVM2 = cd "data/algOutput/2016GV/VM2/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "VM2")) return allPgs parseClassicAlgoMP :: IO [PatternGroup] parseClassicAlgoMP = cd "data/algOutput/2016MP/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "MP")) return allPgs parseClassicAlgoSIACF1 :: IO [PatternGroup] parseClassicAlgoSIACF1 = cd "data/algOutput/2016DM/SIATECCompressF1/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACF1")) return allPgs parseClassicAlgoSIACP :: IO [PatternGroup] parseClassicAlgoSIACP = cd "data/algOutput/2016DM/SIATECCompressP/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACP")) return allPgs parseClassicAlgoSIACR :: IO [PatternGroup] parseClassicAlgoSIACR = cd "data/algOutput/2016DM/SIATECCompressR/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACR")) return allPgs -- | Parse all (algorithmic, VM1) pattern groups from the folk dataset. parseFolkAlgoVM1 :: IO [PatternGroup] parseFolkAlgoVM1 = cd "data/MTC/patterns/alg/VM1/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "VM1")) return allPgs parseFolkAlgoVM2 :: IO [PatternGroup] parseFolkAlgoVM2 = cd "data/MTC/patterns/alg/VM2/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "VM2")) return allPgs parseFolkAlgoMP :: IO [PatternGroup] parseFolkAlgoMP = cd "data/MTC/patterns/alg/MP/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "MP")) return allPgs parseFolkAlgoSIACF1 :: IO [PatternGroup] parseFolkAlgoSIACF1 = cd "data/MTC/patterns/alg/SIAF1/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACF1")) return allPgs parseFolkAlgoSIACP :: IO [PatternGroup] parseFolkAlgoSIACP = cd "data/MTC/patterns/alg/SIAP/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACP")) return allPgs parseFolkAlgoSIACR :: IO [PatternGroup] parseFolkAlgoSIACR = cd "data/MTC/patterns/alg/SIAR/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACR")) return allPgs parseFolkAlgoCOSIA :: IO [PatternGroup] parseFolkAlgoCOSIA = cd "data/MTC/patterns/alg/DM/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "COSIA")) return allPgs parseFolkAlgoSIACFP :: IO [PatternGroup] parseFolkAlgoSIACFP = cd "data/MTC/patterns/alg/SIARCT-CFP/" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "SIACFP")) return allPgs -- | Parse all (expert) pattern groups from the dutch folk dataset. parseFolkExperts :: IO [PatternGroup] parseFolkExperts = cd "data/MTC/patterns/expert" $ do allPgs <- listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece "exp")) return (groupPatterns allPgs) where groupPatterns :: [PatternGroup] -> [PatternGroup] groupPatterns = (foldl1 combinePatterns <$>) . groupBy samePattern . sortOn show samePattern :: PatternGroup -> PatternGroup -> Bool samePattern (PatternGroup p e pa _ _) (PatternGroup p' e' pa' _ _) = p == p' && e == e' && pa == pa' combinePatterns :: PatternGroup -> PatternGroup -> PatternGroup combinePatterns p1@(PatternGroup p e pa b os) p2@(PatternGroup _ _ _ b' os') | samePattern p1 p2 = PatternGroup p e pa b $ (b' : os) ++ os' | otherwise = error "Cannot combine occurences of different patterns" -- | Parse all (algorithmic) pattern groups from the dutch folk dataset. parseFolkAlgo :: IO [PatternGroup] parseFolkAlgo = cd "data/MTC/patterns/alg" $ do f_algs <- listDirs allPgs <- forM f_algs $ \f_alg -> cd f_alg $ listFiles >>= ((concat <$>) . pmapM (parseAlgoPiece f_alg)) return (concat allPgs) -- | Parse all patterns from the random dutch folk dataset and form random groups. parseRandom :: IO [PatternGroup] parseRandom = cd "data/MTC/ranexcerpts" $ do f_groups <- listDirs allPgs <- forM f_groups $ \f_group -> cd f_group $ do fs <- listFiles let families = groupBy (\x y -> sanitize x == sanitize y) $ sortOn sanitize fs forM families $ \family -> do (base:pats) <- pmapM readFromMidi family -- convert MIDI to Pattern return PatternGroup { piece_name = sanitize (head family) , expert_name = "RAND" , pattern_name = f_group , basePattern = base , patterns = pats } return (concat allPgs) parseAlgoPiece :: String -> FilePath -> IO [PatternGroup] parseAlgoPiece algo_n fname = parseMany (patternGroupP (sanitize fname) algo_n) fname -- | Normalize names of musical pieces to a static representation. sanitize :: String -> String sanitize s -- Classical pieces | (("bach" `isInfixOf` s) || ("wtc" `isInfixOf` s)) && not ("1" `isInfixOf` s) && not ("2" `isInfixOf` s) = "bachBWV889Fg" | ("beethoven" `isInfixOf` s) || ("sonata01" `isInfixOf` s) = "beethovenOp2No1Mvt3" | ("chopin" `isInfixOf` s) || ("mazurka" `isInfixOf` s) = "chopinOp24No4" | ("gibbons" `isInfixOf` s) || ("silver" `isInfixOf` s) = "gibbonsSilverSwan1612" | ("mozart" `isInfixOf` s) || ("sonata04" `isInfixOf` s) = "mozartK282Mvt2" -- HEMAN pieces | ("bach1" `isInfixOf` s) = "bach1" | ("bach2" `isInfixOf` s) = "bach2" | ("bee1" `isInfixOf` s) = "bee1" | ("mo155" `isInfixOf` s) = "mo155" | ("mo458" `isInfixOf` s) = "mo458" -- Folk pieces | ("Daar_g" `isInfixOf` s) = "DaarGingEenHeer" | ("Daar_r" `isInfixOf` s) = "DaarReedEenJonkheer" | ("Daar_w" `isInfixOf` s) = "DaarWasLaatstmaalEenRuiter" | ("Daar_z" `isInfixOf` s) = "DaarZouErEenMaagdjeVroegOpstaan" | ("Een_l" `isInfixOf` s) = "EenLindeboomStondInHetDal" | ("Een_S" `isInfixOf` s) = "EenSoudaanHadEenDochtertje" | ("En" `isInfixOf` s) = "EnErWarenEensTweeZoeteliefjes" | ("Er_r" `isInfixOf` s) = "ErReedErEensEenRuiter" | ("Er_was_een_h" `isInfixOf` s) = "ErWasEenHerderinnetje" | ("Er_was_een_k" `isInfixOf` s) = "ErWasEenKoopmanRijkEnMachtig" | ("Er_was_een_m" `isInfixOf` s) = "ErWasEenMeisjeVanZestienJaren" | ("Er_woonde" `isInfixOf` s) = "ErWoondeEenVrouwtjeAlOverHetBos" | ("Femmes" `isInfixOf` s) = "FemmesVoulezVousEprouver" | ("Heer_Halewijn" `isInfixOf` s) = "HeerHalewijn" | ("Het_v" `isInfixOf` s) = "HetVrouwtjeVanStavoren" | ("Het_was_l" `isInfixOf` s) = "HetWasLaatstOpEenZomerdag" | ("Het_was_o" `isInfixOf` s) = "HetWasOpEenDriekoningenavond" | ("Ik" `isInfixOf` s) = "IkKwamLaatstEensInDeStad" | ("Kom" `isInfixOf` s) = "KomLaatOnsNuZoStilNietZijn" | ("Lieve" `isInfixOf` s) = "LieveSchipperVaarMeOver" | ("O_God" `isInfixOf` s) = "OGodIkLeefInNood" | ("Soldaat" `isInfixOf` s) = "SoldaatKwamUitDeOorlog" | ("Vaarwel" `isInfixOf` s) = "VaarwelBruidjeSchoon" | ("Wat" `isInfixOf` s) = "WatZagIkDaarVanVerre" | ("Zolang" `isInfixOf` s) = "ZolangDeBoomZalBloeien" | otherwise = s patternGroupP :: String -> String -> Parser PatternGroup patternGroupP piece_n algo_n = PatternGroup piece_n algo_n <$> nameP 'p' <*> patternP <*> many patternP where patternP :: Parser Pattern patternP = nameP 'o' *> many noteP nameP :: Char -> Parser String nameP c = char c *> ((:) <$> return c <*> many1 alphaNum) <* lineP -------------------- -- Parser utilities. parseMany :: Parser a -> FilePath -> IO [a] parseMany p f = do input <- readFile f case runParser ((many p <* many lineP) <* eof) () f input of Left err -> error $ show err Right x -> return x noteP :: Parser Note noteP = Note <$> (floatP <* sepP) <*> intP <* lineP sepP :: Parser String sepP = string ", " intP :: Parser Integer intP = Tokens.integer haskell <* optional (string "." <* many (string "0") <* optional (string " ")) floatP :: Parser Double floatP = negP <|> Tokens.float haskell where negP = (\i -> -i) <$> (string "-" *> Tokens.float haskell) lineP :: Parser () lineP = void (newline <|> crlf) <|> void (many1 space) ------------------------- -- File-system utilities. cd :: FilePath -> IO a -> IO a cd fpath c = createDirectoryIfMissing True fpath >> withCurrentDirectory fpath c listDirs :: IO [FilePath] listDirs = sort <$> (getCurrentDirectory >>= listDirectory >>= filterM doesDirectoryExist) listFiles :: IO [FilePath] listFiles = sort <$> (getCurrentDirectory >>= listDirectory >>= filterM ((not <$>) . doesDirectoryExist)) emptyDirectory :: FilePath -> IO () emptyDirectory f_root = cd f_root $ mapM_ removeFile =<< listFiles