{-# LANGUAGE DataKinds #-} {-# LANGUAGE OverloadedStrings #-} module MusicScroll.Providers.MusiXMatch (musiXMatchInstance) where import Control.Category hiding ((.)) import Data.Maybe (catMaybes) import qualified Data.Set as Set import Data.Text (Text, replace, toTitle) import Data.Traversable (mapAccumL) -- import Data.Text.IO as T (readFile) import MusicScroll.Providers.Utils import MusicScroll.TrackInfo (TrackInfo (..)) import Network.HTTP.Req import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (anyAttr, tagOpenLit) musiXMatchInstance :: Provider musiXMatchInstance = Provider { toUrl = toUrl', extractLyricsFromPage = pipeline } toUrl' :: TrackInfo -> Url 'Https toUrl' track = let base :: Url 'Https base = https "www.musixmatch.com" quotedArtist = normalize (tArtist track) quotedSong = normalize (tTitle track) in base /: "lyrics" /: quotedArtist /: quotedSong normalize :: Text -> Text normalize = let noSpaces = replace " " "-" in noSpaces . toTitle -- exampleTrack = TrackInfo "hey jude" "the beatles" "/home" -- testOnFile fp = -- do contents <- T.readFile fp -- return (pipeline contents) pipeline :: Text -> Lyrics pipeline = parseTags >>> mapAccumL discriminate False >>> snd >>> catMaybes >>> innerText >>> Lyrics -- discriminate :: Bool -> Tag Text -> (Bool, Maybe (Tag Text)) discriminate onSpan@True tag | isTagText tag = (onSpan, pure tag) discriminate onSpan tag | tagOpenLit "span" goodClasses tag = (True, Nothing) | isTagCloseName "span" tag = (False, Nothing) | otherwise = (onSpan, Nothing) where goodClasses = anyAttr (\attr -> Set.member attr spanDiscr) spanDiscr = Set.fromList [ ("class", "lyrics__content__ok"), ("class", "lyrics__content__warning") ]