src/ID3/Parser/General.hs

-- | This module contain different general parsers.
module ID3.Parser.General where

----  IMPORTS
import Text.ParserCombinators.Poly.State (Parser, runParser, stGet, stUpdate, onFail, next,
                                          satisfy, adjustErr, exactly)
import ID3.Parser.UnSync
import ID3.Type.Header (TagVersion, unsynch)
import ID3.Type.Flags (Flags(..))
--import qualified Data.ByteString.Lazy as BS
--import qualified Data.ByteString.Lazy.Char8 as C
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as C
import Data.Word (Word8)
import Bits
import Data.Accessor
import qualified Data.Text as Text
import Data.Text.Encoding (decodeASCII, decodeUtf16LE, decodeUtf16BE, decodeUtf8)
import Data.ByteString.Lazy.UTF8 (toString)
import Codec.Binary.UTF8.String as Codec
import Control.Monad (when)
----

---,--------------------------------
-- | Just a synonim for one item of input stream
type Token = Word8

-- | Parsers state
data St = State { id3TagVersion  :: TagVersion
                , headerFlags    :: Flags    -- ^ flags from tag's header
                , tagPos         :: Integer  -- ^ current position in tag
--              , frFlags        :: [Bool]}  -- ^ current frame flags
                , curSize        :: Integer  -- ^ current frame size
                }

instance Show St where
    show st = show (tagPos st, curSize st)
initState = State (4, 0) (Flags []) 0 10

type CharEncoding = Integer

type TagParser = Parser St Token

run :: TagParser a -> [Word8] -> (Either String a, [Token])
run p cont = (result, rest) where (result, _, rest) = runParser p initState cont


---,--------------------------------
tagVersionGet :: TagParser TagVersion
tagVersionGet = stGet >>= return . id3TagVersion

tagVersionSet :: TagVersion -> TagParser ()
tagVersionSet v = stUpdate (\st -> st {id3TagVersion = v})

---,--------------------------------
flagsGet :: TagParser Flags
flagsGet = stGet >>= return . headerFlags

flagsSet :: Flags -> TagParser ()
flagsSet fs = stUpdate (\st -> st {headerFlags = fs})

---,--------------------------------

-- pos = accessor (stGet >>= return . tagPos) (stUpdate $ \p st -> st {tagPos = p} )

-- | Returns 'tagPos' from 'St'.
posGet      :: TagParser Integer
posGet       = stGet >>= return . tagPos
-- | Updates 'tagPos' with given function.
posUpdate   :: (Integer -> Integer) -> TagParser ()
posUpdate f  = stUpdate ( \st -> st {tagPos = f (tagPos st)} )
-- | Sets 'tagPos' with given value.
posSet      :: Integer -> TagParser ()
posSet p     = posUpdate (\_ -> p)
-- | Decrements 'tagPos'.
posDec      :: TagParser ()
posDec       = posUpdate (\p -> p-1)
-- | Incremets 'tagPos'.
posInc      :: TagParser ()
posInc       = posUpdate (\p -> p+1)

---,--------------------------------
-- | Returns 'curSize' from 'St'.
sizeGet     :: TagParser Integer
sizeGet      = stGet >>= return . curSize
-- | Updates 'curSize' with given function.
sizeUpdate  :: (Integer -> Integer) -> TagParser ()
sizeUpdate f = stUpdate ( \st -> st {curSize = f (curSize st)} )
-- | Sets 'curSize' with given value.
sizeSet     :: Integer -> TagParser ()
sizeSet s    = sizeUpdate (\_ -> s)
-- | Decrements 'curSize'.
sizeDec     :: TagParser ()
sizeDec      = sizeUpdate (\x -> x-1)
-- | Incremets 'curSize'.
sizeInc     :: TagParser ()
sizeInc      = sizeUpdate (\x -> x+1)

---,--------------------------------
-- | Wrapper for /reiterative/ parsers.
--   Mnemonic: @if 'curSize' > 0 then@ continue @else@ stop
ifSize :: TagParser [a] -> TagParser [a]
ifSize p = do
    s <- sizeGet
    if s > 0
       then p
       else return []

-- | Wrapper for atomic parsers.
--   Increases 'tagPos' and decreases 'curSize'.
withSize p = do
    x <- p
    sizeDec
    posInc
    return x

--  Wrapper for any parser, that increaser tagPos and decreases curSize according to result of parsing
--   It is useful only for parsers, that return as much tokens, as they've read from the stream.
--   If result is list, then state updates according to it's length, else (!) result takes as a single-sized value.
{-withState :: TagParser a -> TagParser a
withState parser = do
        x <- parser
        case x of
            []    -> return x           -- empty list
            (_:_) -> do                 -- non-empty list
                    let n = toInteger $ length x
                    posUpdate (+n)
                    sizeUpdate (subtract n)
                    return x
            _     -> do                 -- singleton
                    posInc
                    sizeDec
                    return x
-}

---,--------------------------------
-- | @'many'' p@ parses a list of elements with individual parser @p@.
--   Cannot fail, since an empty list is a valid return value.
--   Unlike default 'many', stops if 'curSize' became 0.
many' :: TagParser a -> TagParser [a]
many' p = many1' p `onFail` return []

-- | Parse a non-empty list of items.
many1' :: TagParser a -> TagParser [a]
many1' p = ifSize $ do
    x <- p
    xs <- many' p
    return (x:xs)


---,--------------------------------
-- | @'manyTill'' p end@ parses a possibly-empty sequence of @p@'s, terminated by a @end@.
manyTill' :: TagParser a -> TagParser z -> TagParser [a]
manyTill' p end = manyTill1' p end `onFail` return []

-- | 'manyTill1\' p end' parses a non-empty sequence of p's, terminated by a end.
manyTill1' :: TagParser a -> TagParser z -> TagParser [a]
manyTill1' p end = ifSize $ (end >> return []) `onFail`
    (ifSize $ do
        x <- p
        xs <- manyTill' p end
        return (x:xs))

skipTill p end = end `onFail` do {p; skipTill p end}

---,--------------------------------
-- | Parse a list of items separated by discarded junk.
sepBy' :: TagParser a -> TagParser sep -> TagParser [a]
sepBy' p sep = sepBy1' p sep `onFail` return []

-- | Parse a non-empty list of items separated by discarded junk.
sepBy1' :: TagParser a -> TagParser sep -> TagParser [a]
sepBy1' p sep= ifSize $ do
    x <- p
    xs <- many' (sep >> p)
    return (x:xs)

---,--------------------------------
-- | 'count n p' parses a precise number of items, n, using the parser p, in sequence.
count :: (Num n) => n -> TagParser a -> TagParser [a]
count 0 _ = return []
count n p = do
    x <- p
    xs <- count (n-1) p
    return (x:xs)

-- | 'count' n p' parses a precise number of items, n, using the parser p, in sequence.
count' :: (Num n) => n -> TagParser a -> TagParser [a]
count' 0 _ = return []
count' n p = ifSize $ do
    x <- p
    xs <- count' (n-1) p
    return (x:xs)

---,--------------------------------
-- | Hybrid of 'count' and 'sepBy\''
countSepBy' :: (Num n) => n -> TagParser a -> TagParser sep -> TagParser [a]
countSepBy' 0 _ _ = return []
countSepBy' n p sep = ifSize $ do
    x <- p
    xs <- count' (n-1) (sep >> p)
    return (x:xs)

encPack :: CharEncoding -> [Token] -> String
encPack 0x00            s  = Text.unpack $ decodeASCII   $ BS.pack s
encPack 0x01 (0xFF:0xFE:s) = Text.unpack $ decodeUtf16LE $ BS.pack s
encPack 0x01 (0xFE:0xFF:s) = Text.unpack $ decodeUtf16BE $ BS.pack s
encPack 0x02            s  = Text.unpack $ decodeUtf16BE $ BS.pack s
encPack _               s  = Text.unpack $ decodeUtf8    $ BS.pack s

-- | Parses one value (as [Token]) till termination symbol
parseUntilWord8Null :: TagParser [Token]
parseUntilWord8Null = nonNull `manyTill'` (word8 0x00)

parseUntilWord16Null :: TagParser [Token]
parseUntilWord16Null = do
  s <- sizeGet
  when (s == 1) $ fail "Non-even number of bytes for UTF-16 string"
  if s > 1
    then do
      byte1 <- anyWord8
      byte2 <- anyWord8
      if byte1 == 0x00 && byte2 == 0x00
        then return []
        else do
          rest <- parseUntilWord16Null
          return $ [byte1, byte2] ++ rest
    else return []

-- any byte except null
nonNull = withSize $ satisfy (/=0x00) `adjustErr` (++"\nWTF: nonNull")

-- | Parses a character-encoding "code", a one-byte value that should be 0, 1, 2, or 3
parseEncoding :: TagParser CharEncoding
parseEncoding = anyWord8 >>= (return . toInteger)

-- | Parses one value and returns it as a 'String'
parseString :: CharEncoding -> TagParser String
parseString enc = do
    v <- case enc of
      0x01 -> parseUntilWord16Null -- UTF-16
      0x02 -> parseUntilWord16Null -- UTF-16 BOM
      _    -> parseUntilWord8Null  -- ISO-8859-1 or UTF-8
    return $ encPack enc v

-- | Parses one value and returns it as a 'Integer'
parseNumber :: TagParser Integer
parseNumber = parseUntilWord8Null >>= return . sum . (zipWith (*) (iterate (*10) 1)) .
  reverse . map toInteger

-- | Parses 3 bytes of language value (as a String) and returns a pair ("Language", value)
parseLanguage :: TagParser String
parseLanguage = do
    lang <- count' 3 anyWord8
    return $ encPack 0x03 lang

--(<|>) = onFail

---,--------------------------------
-- | Takes a list of 'Parser's and applies them by turns.
parsers :: [TagParser a] -> TagParser [a]
parsers [] = return []
parsers (p:ps) = do
    x <- p
    xs <- parsers ps
    return (x:xs)

---,--------------------------------
-- | Parses given 'Token'.
word8 :: Token -> TagParser Token
word8 w = (withSize $ satisfy (==w)) `err` (" \nWTF: word8 "++(show w))

-- | Parses given list of 'Token's.
word8s :: [Token] -> TagParser [Token]
word8s ws = parsers $ map word8 ws

-- | Parses given 'ByteString'.
byteString :: BS.ByteString -> TagParser BS.ByteString
byteString bs = (word8s $ BS.unpack bs) >> return bs

-- | Same as 'byteString' but argument is simple 'String'.
string :: String -> TagParser BS.ByteString
string = byteString . C.pack

-- | Parses upper-case letters (as 'Token')
upper :: TagParser Token
upper = satisfy (\x -> (0x41<=x)&&(x<=0x5a)) `err` ("\nWTF: upper")

-- | Parses digit-symbol (as 'Token')
digit :: TagParser Token
digit = satisfy (\x -> (0x30<=x)&&(x<=0x39)) `err` ("\nWTF: digit")

---,--------------------------------
-- | Parses any 'Token'.
anyWord8 :: TagParser Token
anyWord8 = withSize next `err` "anyWord8"

err p s = do
    pos <- posGet
    p `adjustErr` (++"\n"++"at "++(show pos)++": "++s)

---,--------------------------------
type Size = Integer
-- | 'parseSize n unsynchDecode' parses n bytes, doing decoding of "unsynchronized" data when unsynchDecode is True, and returns the represented 'Integer' value.
parseSize :: Integer -> Bool -> TagParser Size
parseSize n unsynchDecode = do
    s <- count n next
    posUpdate (+n)
    let size = if unsynchDecode then unSynchronise s else wordsToInteger s
    sizeSet size
    return size

-- | Takes template of flags-byte and returns it as ['Bool'] value.
--   For example, for %abcd0000 template you should use 'parseFlags_ [1..4]'.
parseFlags_ :: [Int] -> TagParser [Bool]
parseFlags_ nums =  do
    flag <- anyWord8
    sizeInc
--    posInc
    return $ map (\i -> testBit flag (i-1)) nums