-- | Parse WaveSurfer files.
--
-- WaveSurfer is an application for analysing and annotating audio files.
-- <http://www.speech.kth.se/wavesurfer/>
--
-- A WaveSurfer file consists of lines containing a label and corresponding
-- onsets and offsets in seconds:
--
-- > onset offset label
--
-- This library supports an extended file format, where the first label can be
-- followed by additional key-value pairs, separated by colons:
--
-- > onset offset label key1[:value1] key2[:value2] ...
--
module Sound.WaveSurfer (
    Time, DTime,
    Label, Attribute,
    Record(..), duration,
    Result,
    encode, decode,
    interact
) where

import Data.Binary.Put                      (Put, runPut, putByteString)
import Data.Maybe                           (mapMaybe)

import Data.ByteString.Lazy                 (ByteString)
import qualified Data.ByteString            as BS
import qualified Data.ByteString.Char8      as BSC
import qualified Data.ByteString.Lazy       as BL
import qualified Data.ByteString.Lazy.Char8 as BLC
import Data.ByteString.Lex.Double           (readDouble)

import Text.Delimited                       (Result)
import qualified Text.Delimited             as DT
import Prelude                              hiding (interact)
import Text.Show.ByteString                 (putAscii, showp, unlinesP, unwordsP)
import qualified Text.Show.ByteString       as ShowP

-- | Time type.
type Time = Double

-- | Type for time differences.
type DTime = Time

-- | Record label.
type Label = BS.ByteString

-- | Key-value pair.
type Attribute = (Label, Maybe Label)

-- | Record representing a single line in the WaveSurfer file.
data Record = Record {
    onset      :: Time,
    offset     :: Time,
    label      :: Label,
    attributes :: [Attribute]
} deriving (Eq, Show)

-- | A file's content is a list of records.
type Content = [Record]

-- | Return the duration of a 'Record' in seconds.
duration :: Record -> DTime
duration e = offset e - onset e

whiteSpace :: [Char]
whiteSpace = [' ', '\t']

-- | Comment character.
comment :: Char
comment = '#'

putAttribute :: Attribute -> Put
putAttribute (key, value) = do
    putByteString key
    case value of
        Just v  -> putAscii ':' >> putByteString v
        Nothing -> return ()

putRecord :: Record -> Put
putRecord r = unwordsP ([t0, t1, putByteString (label r)] ++ attrs)
    where
        t0    = showp (onset r)
        t1    = showp (offset r)
        attrs = map putAttribute (attributes r)

putContent :: Content -> Put
putContent = unlinesP . map putRecord

-- | Encode 'Content' to a lazy 'ByteString'.
encode :: Content -> ByteString
encode = runPut . putContent

-- | Monad instance for Either.
instance Monad (Either a) where
    return = Right
    x >>= f = case x of
                Left  a -> Left a
                Right r -> f r

-- | Decode attributes.
decodeAttributes :: DT.Record -> Result [Attribute]
decodeAttributes = return . map f
    where f s = let (k, v) = BSC.break (==':') s
                    v'     = BS.tail v
                in (k, if BS.null v' then Nothing else Just v')

-- | Decode a 'Double'.
decodeDouble :: DT.Field -> Result Double
decodeDouble s = case readDouble s of
                    Just (d, _) -> Right d
                    Nothing     -> Left ("Couldn't parse Double: " ++ show s)

-- | Decode a 'Record'.
decodeRecord :: DT.Record -> Result Record
decodeRecord (t0:t1:lbl:attrs) = do
    t0'    <- decodeDouble t0
    t1'    <- decodeDouble t1
    attrs' <- decodeAttributes attrs
    return (Record t0' t1' lbl attrs')
decodeRecord r = Left ("Couldn't parse record: " ++ show r)

-- | Decode 'Content' from a lazy 'ByteString'.
decode :: ByteString -> Result Content
decode s = do
    xs <- DT.decode whiteSpace s
    mapM decodeRecord (filter f xs)
    where
        f []     = False
        f (r:rs) = not (BSC.null r) && (BSC.head r /= comment)

interact :: (Record -> Record) -> ByteString -> Result ByteString
interact f s = do
    c <- decode s
    let c' = map f c
    return (encode c')