{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
{-# OPTIONS_GHC -Wno-unused-imports #-}

{-|
Description:    

Copyright:      (c) 2020-2021 Sam May
License:        GPL-3.0-or-later
Maintainer:     ag@eitilt.life

Stability:      stable
Portability:    non-portable (requires libcdio)
-}
module Foreign.Libcdio.CdText.Binary
    ( Info ( .. )
    , emptyCdTextRaw
    , packCdTextBlock
    , joinBlockInfo
    , checksum
    ) where


import qualified Data.Bits as B
import qualified Data.Bifunctor as F.B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.C
import qualified Data.Char as C
import qualified Data.List as L
import qualified Data.Maybe as Y
import qualified Data.Word as W

import qualified Data.Text as T
import qualified Data.Text.Encoding.Error as T
import qualified Data.Text.Encoding.MsJIS as T

import Data.Bits ( (.&.), (.|.) )

import Foreign.Libcdio.Track
import Foreign.Libcdio.Types.Enums

import Sound.Libcdio.Common


type TrackId = W.Word8
type PackType = W.Word8
type BlockId = Word
type BlockSize = W.Word8


-- | Textual data used for describing tracks on a disc (as well as the disc
-- itself).
data Info = Info
    { title      :: Maybe String
    , performer  :: Maybe String
    , songwriter :: Maybe String
    , composer   :: Maybe String
    , arranger   :: Maybe String
    , message    :: Maybe String
    , code       :: Maybe String
    }
  deriving ( Eq, Show, Read )


emptyCdTextRaw :: [Maybe Language] -> BS.ByteString
emptyCdTextRaw [] = emptyCdTextRaw [Nothing]
emptyCdTextRaw ls =
    BS.concat . concat . flip map (zip [0..] ls) $ \(i, _) -> map checksum .
    joinBlockInfo ls [3 | _ <- ls] .
    generateBlockInfo i Iso8859_1 0 0 $
    replicate 15 0 ++ [3]


data Encoding
    = Iso8859_1
    | ASCII
    | MS_JIS
  deriving ( Eq, Ord, Bounded, Show, Read )
instance Enum Encoding where
    toEnum 0x00 = Iso8859_1
    toEnum 0x01 = ASCII
    toEnum 0x80 = MS_JIS
    toEnum _ = ASCII
    fromEnum Iso8859_1 = 0x00
    fromEnum ASCII = 0x01
    fromEnum MS_JIS = 0x80

data Pack
    = TitlePack
    | PerformerPack
    | SongwriterPack
    | ComposerPack
    | ArrangerPack
    | MessagePack
    | DiscIdPack
    | GenrePack
    | TocPack
    | Toc2Pack
    | ClosedPack
    | CodePack
    | InfoPack
  deriving ( Eq, Ord, Bounded, Enum, Show, Read )


-- | Generate the binary data representing a CDTEXT language block.
packCdTextBlock
    :: BlockId
    -> Maybe String
    -> Track
    -> Maybe Genre
    -> Maybe String
    -> Info
    -> [Info]
    -> [BS.ByteString]
packCdTextBlock blockIndex catalogue startTrack@(Track _) genreCode genreName disc tracks =
    allPacks <> generateBlockInfo blockIndex encoding startTrackId endTrackId allLengths
  where (titles, tt) = text title TitlePack (0 :: Int)
        (performers, tp) = text performer PerformerPack ip
        (songwriters, ts) = text songwriter SongwriterPack is
        (composers, tc) = text composer ComposerPack ic
        (arrangers, ta) = text arranger ArrangerPack ia
        (messages, tm) = text message MessagePack im
        (catalogueB, _) = pack' DiscIdPack False il (prepareSingleText ASCII catalogue, 0 :: Int)
        (genreB, _) = pack' GenrePack False ig (prepareGenre genreCode genreName, 0 :: Int)
     -- (tocB, _) = pack' TocPack False it _
     -- (toc2B, ) = pack' Toc2Pack False i2 _
     -- (closedB, ) = pack' ClosedPack False ix $ prepareSingleText Iso8859_1 _
        (codes, to) = pack' CodePack False io $ prepareText' ASCII code
        lt = length titles
        (ip, lp) = (     lt, length performers)
        (is, ls) = (ip + lp, length songwriters)
        (ic, lc) = (is + ls, length composers)
        (ia, la) = (ic + lc, length arrangers)
        (im, lm) = (ia + la, length messages)
        (il, ll) = (im + lm, length catalogueB)
        (ig, lg) = (il + ll, length genreB)
        (io, lo) = (ig + lg, length codes)
        allLengths = reverse . (3 :) . snd $ L.foldl'
            (\(acc, as) a -> if acc + a > 0xFC then (0xFC, (0xFC - acc) : as) else (acc + a, a : as))
            (0, [])
            [lt, lp, ls, lc, la, lm, ll, lg, 0, 0, 0, 0, 0, 0, lo]
        allPacks = take 0xFC
             $ titles
            <> performers
            <> songwriters
            <> composers
            <> arrangers
            <> messages
            <> catalogueB
            <> genreB
            <> codes
        encoding = guessEncoding . Y.catMaybes . concat $ infoList disc : map infoList tracks
        wideChars = encoding == MS_JIS
        infoList i = [f i | f <- [title, performer, songwriter, composer, arranger, message, code]]
        startTrackId = fromIntegral $ fromEnum startTrack
        endTrackId = foldr max 0 [tt, tp, ts, tc, ta, tm, to]
        text f p i = F.B.first (pack p wideChars blockIndex $ fromIntegral i) $
            prepareText' encoding f
        prepareSingleText _ Nothing = []
        prepareSingleText e (Just t) = (\t' -> [(0, t')]) $ encodeText e t
        prepareText' e f = (t', foldr (max . fst) 0 t')
          where t' = prepareText e startTrackId (f disc) (map f tracks)
        pack' p w i = F.B.first . pack p w blockIndex $ fromIntegral i
packCdTextBlock blockIndex catalogue DiscPregap genreCode genreName _ tracks =
    packCdTextBlock blockIndex catalogue 1 genreCode genreName disc tracks'
  where (disc, tracks') = case tracks of
            [] -> (Info Nothing Nothing Nothing Nothing Nothing Nothing Nothing, [])
            (t:ts) -> (t, ts)
packCdTextBlock blockIndex catalogue DiscLeadout genreCode genreName disc _ =
    packCdTextBlock blockIndex catalogue 1 genreCode genreName disc []

-- | Generate the local block info.  Note that this does /not/ result in full
-- payloads for the last two packs; @packInfo@ adds them automatically.
generateBlockInfo :: BlockId -> Encoding -> TrackId -> TrackId -> [Int] -> [BS.ByteString]
generateBlockInfo blockIndex encoding startTrack endTrack lengths =
    pack InfoPack False blockIndex (fromIntegral $ sum lengths - 3) . packTuple $
        [fromIntegral $ fromEnum encoding, startTrack, endTrack, 0x00] ++
        take 16 (map fromIntegral lengths ++ repeat 0)
  where packTuple bs = [(0, BS.pack bs)]


-- | Use the minimal encoding based on the characters used by the strings.  If
-- any character outside ISO 8859-1 ("Latin-1") is found, 'MS_JIS' is used as a
-- fallback; this isn't strictly comprehensive, but it is a lot harder to test
-- for (i.e. I'd have to write a function for it).
guessEncoding :: [String] -> Encoding
guessEncoding [] = Iso8859_1
guessEncoding ts
    | all (all C.isAscii) ts = ASCII
    | all (all C.isLatin1) ts = Iso8859_1
    | otherwise = MS_JIS


-- | Collapse the track texts to their minimal size if allowed (if present for
-- disc-level info, all tracks must be represented).
prepareText :: Encoding -> TrackId -> Maybe String -> [Maybe String] -> [(TrackId, BS.ByteString)]
prepareText e _ (Just d) [] = [(0, encodeText e d)]
prepareText e i d'@(Just d) ta@(t:_)
    | d' == t = case prepareTrackTexts e i' $ d' : trim ta of
        [] -> []
        ((_, bs):bss) -> (0, bs) : bss
    | otherwise = (0, encodeText e d) : uncurry (prepareTrackTexts e) (trimTexts i ta)
  where trim = L.dropWhileEnd Y.isNothing . dropWhile Y.isNothing
        i' = i - 1 + fromIntegral (length $ takeWhile Y.isNothing ta)
{- This block forces track info if it's present for the disc, as required by
 - the libcdio docs.  It doesn't look like that's actually required in these
 - references, but I don't trust them enough to take it out entirely.
prepareText e i d'@(Just d) ta@(t:_)
    | d' == t = case prepareTrackTexts e (i - 1) $ d' : ta of
        [] -> []
        ((_, bs):bss) -> (0, bs) : bss
    | otherwise = (0, encodeText e d) : prepareTrackTexts e i ta
-}
prepareText e i Nothing ts = uncurry (prepareTrackTexts e) $ trimTexts i ts

trimTexts :: TrackId -> [Maybe String] -> (TrackId, [Maybe String])
trimTexts i' [] = (i', [])
trimTexts i' (Nothing:ts') = trimTexts (i' + 1) ts'
trimTexts i' ts' = (i', L.dropWhileEnd Y.isNothing ts')

-- | Collapse the texts further by making use of the tab-replacement shortcut
-- for repeated values.
prepareTrackTexts :: Encoding -> TrackId -> [Maybe String] -> [(TrackId, BS.ByteString)]
prepareTrackTexts _ _ [] = []
prepareTrackTexts e i ts = encodeTrackTexts e i . reverse $ L.foldl' deduplicate [] ts
  where deduplicate [] (Just s) = [s]
        deduplicate ts' Nothing = "" : ts'
        deduplicate ts' (Just s) =
            let ss = takeWhile (== s) ts'
            in  s : map (const "\t") ss ++ deduplicate' (drop (length ss) ts')
        deduplicate' [] = []
        deduplicate' (t':ts') = deduplicate ts' $ Just t'

-- | Combine the genre code and description into a single bytestring.
prepareGenre :: Maybe Genre -> Maybe String -> [(TrackId, BS.ByteString)]
prepareGenre Nothing Nothing = []
prepareGenre gc gt = [(0, encodeGenre gc <> encodeText ASCII (Y.fromMaybe "" gt))]


-- | Conduct text to the proper encoder and append the proper terminator.
encodeText :: Encoding -> String -> BS.ByteString
encodeText MS_JIS "\t" = BS.pack [0x09,0x09,0x00,0x00]
encodeText ASCII t = BS.map (.&. 0x7F) $ encodeText Iso8859_1 t
encodeText Iso8859_1 t = BS.C.pack $ t ++ ['\NUL']
encodeText MS_JIS t = T.encodeMsJISWith T.ignore (T.pack t) <> BS.C.pack ['\NUL', '\NUL']

-- | Encode all members of a given textual category, pairing them with their
-- track index or 0 if one applies to the disc as a whole.
encodeTrackTexts :: Encoding -> TrackId -> [String] -> [(TrackId, BS.ByteString)]
encodeTrackTexts e i ts = zip [i ..] $ map (encodeText e) ts

-- | Store the genre code in a two-byte string.
encodeGenre :: Maybe Genre -> BS.ByteString
encodeGenre g = BS.pack [gh, gc]
  where gc = maybe 0x00 (fromIntegral . fromEnum) g
        -- Futureproofing decades-old technology
        gh = if gc <= 0xFF then 0x00 else B.shiftR gc 8


-- | Prevent index overflow for any single pack type.
pack :: Pack -> Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString]
pack p w b i bss = take 0xFF $ pack_ p w b i bss

-- | Conduct binary data and starting track to the proper packers.
pack_ :: Pack -> Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString]
pack_ TitlePack = packText 0x80
pack_ PerformerPack = packText 0x81
pack_ SongwriterPack = packText 0x82
pack_ ComposerPack = packText 0x83
pack_ ArrangerPack = packText 0x84
pack_ MessagePack = packText 0x85
pack_ DiscIdPack = packText 0x86
pack_ GenrePack = packGenre
pack_ ClosedPack = packText 0x8D
pack_ TocPack = const4 []
pack_ Toc2Pack = const4 []
pack_ CodePack = packText 0x8E
pack_ InfoPack = packBlockInfo

const4 :: a -> b -> c -> d -> e -> a
const4 a _ _ _ _ = a

packText
    :: PackType
    -> Bool
    -> BlockId
    -> BlockSize
    -> [(TrackId, BS.ByteString)] -> [BS.ByteString]
packText p w b i bss = zipWith (curry $ addHeader p w b) [i ..] .
    reverse $ L.foldl' splitText [] bss

packGenre :: Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString]
packGenre _ _ _ [] = []
packGenre w b i ((_, bs):_) = zipWith (curry $ addHeader 0x87 w b) [i ..] $ splitGenre bs

-- | Create the skeleton block info packs, leaving out counts and languages.
packBlockInfo :: Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString]
packBlockInfo w b i [] =
    [ addHeader 0x8F w b (i + p, (p, 0, BS.empty))
    | p <- [0 .. 2]
    ]
packBlockInfo w b i ((_, bs):_) =
    [ addHeader 0x8F w b (i, (0, 0, p1))
    , addHeader 0x8F w b (i + 1, (1, 0, p2))
    , addHeader 0x8F w b (i + 2, (2, 0, BS.empty))
    ]
  where (p1, p2) = BS.splitAt 12 bs

-- | Generate the four-byte identifying and metadata-carrying header.
-- 
-- NOTE: While the libcdio documentation on the CDTEXT format says that byte 3
-- (@l'@) is 15 if the text starts at any point before the previous pack,
-- that's not the case in any of my (admittedly limited and unverified)
-- reference blobs: it's capped at 15, yes, but if one pack has two characters,
-- the next has the full twelve, and the text still continues into a third,
-- that third will have a third byte of 14.
addHeader
    :: PackType
        -- ^ Pack type
    -> Bool
        -- ^ Two-byte characters?
    -> BlockId
        -- ^ Language index
    -> (BlockSize, (TrackId, Word, BS.ByteString))
                        -- ^ Starting pack index, track number, characters in previous block, and payload
    -> BS.ByteString
addHeader p w b (i, (t, l, bs)) = BS.pack [p, t, i, f] <> bs
  where l' = if l > 15 then 0x0F else 0x0F .&. fromIntegral l
        b' = 0x70 .&. B.shiftL (fromIntegral b) 4
        w' = if w then 0x80 else 0x00
        f = w' .|. b' .|. l'

-- | Takes tuples of (track index, track data) and adds them (initial track,
-- initial length in previous packs, packed data), where the packed data is
-- /no more than/ 12 bytes long.  Note that the final list will have to be
-- 'reverse'd.
splitText :: [(TrackId, Word, BS.ByteString)] -> (TrackId, BS.ByteString) -> [(TrackId, Word, BS.ByteString)]
splitText [] bst@(t, _) = splitText [(t, 0, BS.empty)] bst
splitText csa@((u, l, cs):css) (t, bs)
    | cl == 12 =
        [ (t, l', bs')
        | (l', bs') <- reverse . zip [0, 12 ..] $ splitAtEvery 12 bs
        ] ++ csa
    -- Will have incorrect t, l' if @BS.length cs > 12@, but as that shouldn't
    -- happen anyway, it's not an issue.
    | otherwise =
        let (cs':bss) = splitAtEvery 12 $ cs <> bs
        in  [ (t, l', bs')
            | (l', bs') <- reverse $ zip [(12 - cl), (24 - cl) ..] bss
            ] ++ (u, l, cs') : css
  where cl = fromIntegral $ BS.length cs

-- | Takes the genre string prefixed by a single, two-byte genre code
-- identifier, splits the string, and prepends the code to every pack.
splitGenre :: BS.ByteString -> [(TrackId, Word, BS.ByteString)]
splitGenre bs =
    [ (0, l', gc <> bs')
    | (l', bs') <- zip [0, 10 ..] . splitAtEvery 10 $ BS.drop 2 bs
    ]
  where gc = BS.take 2 bs

-- | Extend the builtin 'BS.splitAt' to return an entire list of substrings.
splitAtEvery :: Int -> BS.ByteString -> [BS.ByteString]
splitAtEvery i bs
    | i <= 0 = []
    | i >= BS.length bs = [bs]
    | otherwise = recurse $ BS.splitAt i bs
  where recurse (h, t) = h : splitAtEvery i t


-- | Now that we have the languages and sizes of all blocks, update the info
-- packs to include them.  Note that this requires that the info packs are
-- located at the end of the block.
joinBlockInfo :: [Maybe Language] -> [BlockSize] -> [BS.ByteString] -> [BS.ByteString]
joinBlockInfo _ _ [] = []
joinBlockInfo _ _ [b] = [b]
joinBlockInfo ls ss bss = h ++ [p2, p3]
  where (h, t) = splitAt (length bss - 2) bss
        ls' = take 8 $ map (maybe 0 $ fromIntegral . fromEnum) ls ++ repeat 0
        ss' = take 8 $ ss ++ repeat 0
        p2 = head t <> BS.pack (take 4 ss')
        p3 = head (tail t) <> BS.pack (drop 4 ss' ++ ls')


-- | Generate a checksum over the first 16 (including header) bytes of a data
-- pack, zero-padding /or truncating/ as necessary.
checksum :: BS.ByteString -> BS.ByteString
checksum bs = checksum' $ case compare 16 $ BS.length bs of
    GT -> BS.take 16 $ bs <> BS.replicate 16 0
    EQ -> bs
    LT -> BS.take 16 bs

-- | Actually run the checksum function on all the bytes of the input.  Note
-- that this doesn't verify that the length is actually the proper 16.
checksum' :: BS.ByteString -> BS.ByteString
checksum' bs = bs <> (BS.pack . split . finalize . BS.foldl' checksumBytes h' $ t <> BS.singleton 0)
  where split i = map fromIntegral [B.shiftR i 8, i]
        finalize i = B.xor i 0xFFFF
        (h, t) = Y.fromMaybe (0, BS.empty) $ BS.uncons bs
        h' = B.shiftL (fromIntegral h) 8

checksumBytes :: W.Word16 -> W.Word8 -> W.Word16
checksumBytes acc a = checksumBits 8 . B.xor acc $ fromIntegral a

checksumBits :: Word -> W.Word16 -> W.Word16
checksumBits 0 i = i
checksumBits c i = checksumBits (c - 1) i'
  where i'  | B.testBit i 15 = B.xor 0x1021 $ B.shiftL i 1
            | otherwise = B.shiftL i 1