{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} {-# OPTIONS_GHC -Wno-unused-imports #-} {-| Description: Copyright: (c) 2020-2021 Sam May License: GPL-3.0-or-later Maintainer: ag@eitilt.life Stability: stable Portability: non-portable (requires libcdio) -} module Foreign.Libcdio.CdText.Binary ( Info ( .. ) , emptyCdTextRaw , packCdTextBlock , joinBlockInfo , checksum ) where import qualified Data.Bits as B import qualified Data.Bifunctor as F.B import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS.C import qualified Data.Char as C import qualified Data.List as L import qualified Data.Maybe as Y import qualified Data.Word as W import qualified Data.Text as T import qualified Data.Text.Encoding.Error as T import qualified Data.Text.Encoding.MsJIS as T import Data.Bits ( (.&.), (.|.) ) import Foreign.Libcdio.Track import Foreign.Libcdio.Types.Enums import Sound.Libcdio.Common type TrackId = W.Word8 type PackType = W.Word8 type BlockId = Word type BlockSize = W.Word8 -- | Textual data used for describing tracks on a disc (as well as the disc -- itself). data Info = Info { title :: Maybe String , performer :: Maybe String , songwriter :: Maybe String , composer :: Maybe String , arranger :: Maybe String , message :: Maybe String , code :: Maybe String } deriving ( Eq, Show, Read ) emptyCdTextRaw :: [Maybe Language] -> BS.ByteString emptyCdTextRaw [] = emptyCdTextRaw [Nothing] emptyCdTextRaw ls = BS.concat . concat . flip map (zip [0..] ls) $ \(i, _) -> map checksum . joinBlockInfo ls [3 | _ <- ls] . generateBlockInfo i Iso8859_1 0 0 $ replicate 15 0 ++ [3] data Encoding = Iso8859_1 | ASCII | MS_JIS deriving ( Eq, Ord, Bounded, Show, Read ) instance Enum Encoding where toEnum 0x00 = Iso8859_1 toEnum 0x01 = ASCII toEnum 0x80 = MS_JIS toEnum _ = ASCII fromEnum Iso8859_1 = 0x00 fromEnum ASCII = 0x01 fromEnum MS_JIS = 0x80 data Pack = TitlePack | PerformerPack | SongwriterPack | ComposerPack | ArrangerPack | MessagePack | DiscIdPack | GenrePack | TocPack | Toc2Pack | ClosedPack | CodePack | InfoPack deriving ( Eq, Ord, Bounded, Enum, Show, Read ) -- | Generate the binary data representing a CDTEXT language block. packCdTextBlock :: BlockId -> Maybe String -> Track -> Maybe Genre -> Maybe String -> Info -> [Info] -> [BS.ByteString] packCdTextBlock blockIndex catalogue startTrack@(Track _) genreCode genreName disc tracks = allPacks <> generateBlockInfo blockIndex encoding startTrackId endTrackId allLengths where (titles, tt) = text title TitlePack (0 :: Int) (performers, tp) = text performer PerformerPack ip (songwriters, ts) = text songwriter SongwriterPack is (composers, tc) = text composer ComposerPack ic (arrangers, ta) = text arranger ArrangerPack ia (messages, tm) = text message MessagePack im (catalogueB, _) = pack' DiscIdPack False il (prepareSingleText ASCII catalogue, 0 :: Int) (genreB, _) = pack' GenrePack False ig (prepareGenre genreCode genreName, 0 :: Int) -- (tocB, _) = pack' TocPack False it _ -- (toc2B, ) = pack' Toc2Pack False i2 _ -- (closedB, ) = pack' ClosedPack False ix $ prepareSingleText Iso8859_1 _ (codes, to) = pack' CodePack False io $ prepareText' ASCII code lt = length titles (ip, lp) = ( lt, length performers) (is, ls) = (ip + lp, length songwriters) (ic, lc) = (is + ls, length composers) (ia, la) = (ic + lc, length arrangers) (im, lm) = (ia + la, length messages) (il, ll) = (im + lm, length catalogueB) (ig, lg) = (il + ll, length genreB) (io, lo) = (ig + lg, length codes) allLengths = reverse . (3 :) . snd $ L.foldl' (\(acc, as) a -> if acc + a > 0xFC then (0xFC, (0xFC - acc) : as) else (acc + a, a : as)) (0, []) [lt, lp, ls, lc, la, lm, ll, lg, 0, 0, 0, 0, 0, 0, lo] allPacks = take 0xFC $ titles <> performers <> songwriters <> composers <> arrangers <> messages <> catalogueB <> genreB <> codes encoding = guessEncoding . Y.catMaybes . concat $ infoList disc : map infoList tracks wideChars = encoding == MS_JIS infoList i = [f i | f <- [title, performer, songwriter, composer, arranger, message, code]] startTrackId = fromIntegral $ fromEnum startTrack endTrackId = foldr max 0 [tt, tp, ts, tc, ta, tm, to] text f p i = F.B.first (pack p wideChars blockIndex $ fromIntegral i) $ prepareText' encoding f prepareSingleText _ Nothing = [] prepareSingleText e (Just t) = (\t' -> [(0, t')]) $ encodeText e t prepareText' e f = (t', foldr (max . fst) 0 t') where t' = prepareText e startTrackId (f disc) (map f tracks) pack' p w i = F.B.first . pack p w blockIndex $ fromIntegral i packCdTextBlock blockIndex catalogue DiscPregap genreCode genreName _ tracks = packCdTextBlock blockIndex catalogue 1 genreCode genreName disc tracks' where (disc, tracks') = case tracks of [] -> (Info Nothing Nothing Nothing Nothing Nothing Nothing Nothing, []) (t:ts) -> (t, ts) packCdTextBlock blockIndex catalogue DiscLeadout genreCode genreName disc _ = packCdTextBlock blockIndex catalogue 1 genreCode genreName disc [] -- | Generate the local block info. Note that this does /not/ result in full -- payloads for the last two packs; @packInfo@ adds them automatically. generateBlockInfo :: BlockId -> Encoding -> TrackId -> TrackId -> [Int] -> [BS.ByteString] generateBlockInfo blockIndex encoding startTrack endTrack lengths = pack InfoPack False blockIndex (fromIntegral $ sum lengths - 3) . packTuple $ [fromIntegral $ fromEnum encoding, startTrack, endTrack, 0x00] ++ take 16 (map fromIntegral lengths ++ repeat 0) where packTuple bs = [(0, BS.pack bs)] -- | Use the minimal encoding based on the characters used by the strings. If -- any character outside ISO 8859-1 ("Latin-1") is found, 'MS_JIS' is used as a -- fallback; this isn't strictly comprehensive, but it is a lot harder to test -- for (i.e. I'd have to write a function for it). guessEncoding :: [String] -> Encoding guessEncoding [] = Iso8859_1 guessEncoding ts | all (all C.isAscii) ts = ASCII | all (all C.isLatin1) ts = Iso8859_1 | otherwise = MS_JIS -- | Collapse the track texts to their minimal size if allowed (if present for -- disc-level info, all tracks must be represented). prepareText :: Encoding -> TrackId -> Maybe String -> [Maybe String] -> [(TrackId, BS.ByteString)] prepareText e _ (Just d) [] = [(0, encodeText e d)] prepareText e i d'@(Just d) ta@(t:_) | d' == t = case prepareTrackTexts e i' $ d' : trim ta of [] -> [] ((_, bs):bss) -> (0, bs) : bss | otherwise = (0, encodeText e d) : uncurry (prepareTrackTexts e) (trimTexts i ta) where trim = L.dropWhileEnd Y.isNothing . dropWhile Y.isNothing i' = i - 1 + fromIntegral (length $ takeWhile Y.isNothing ta) {- This block forces track info if it's present for the disc, as required by - the libcdio docs. It doesn't look like that's actually required in these - references, but I don't trust them enough to take it out entirely. prepareText e i d'@(Just d) ta@(t:_) | d' == t = case prepareTrackTexts e (i - 1) $ d' : ta of [] -> [] ((_, bs):bss) -> (0, bs) : bss | otherwise = (0, encodeText e d) : prepareTrackTexts e i ta -} prepareText e i Nothing ts = uncurry (prepareTrackTexts e) $ trimTexts i ts trimTexts :: TrackId -> [Maybe String] -> (TrackId, [Maybe String]) trimTexts i' [] = (i', []) trimTexts i' (Nothing:ts') = trimTexts (i' + 1) ts' trimTexts i' ts' = (i', L.dropWhileEnd Y.isNothing ts') -- | Collapse the texts further by making use of the tab-replacement shortcut -- for repeated values. prepareTrackTexts :: Encoding -> TrackId -> [Maybe String] -> [(TrackId, BS.ByteString)] prepareTrackTexts _ _ [] = [] prepareTrackTexts e i ts = encodeTrackTexts e i . reverse $ L.foldl' deduplicate [] ts where deduplicate [] (Just s) = [s] deduplicate ts' Nothing = "" : ts' deduplicate ts' (Just s) = let ss = takeWhile (== s) ts' in s : map (const "\t") ss ++ deduplicate' (drop (length ss) ts') deduplicate' [] = [] deduplicate' (t':ts') = deduplicate ts' $ Just t' -- | Combine the genre code and description into a single bytestring. prepareGenre :: Maybe Genre -> Maybe String -> [(TrackId, BS.ByteString)] prepareGenre Nothing Nothing = [] prepareGenre gc gt = [(0, encodeGenre gc <> encodeText ASCII (Y.fromMaybe "" gt))] -- | Conduct text to the proper encoder and append the proper terminator. encodeText :: Encoding -> String -> BS.ByteString encodeText MS_JIS "\t" = BS.pack [0x09,0x09,0x00,0x00] encodeText ASCII t = BS.map (.&. 0x7F) $ encodeText Iso8859_1 t encodeText Iso8859_1 t = BS.C.pack $ t ++ ['\NUL'] encodeText MS_JIS t = T.encodeMsJISWith T.ignore (T.pack t) <> BS.C.pack ['\NUL', '\NUL'] -- | Encode all members of a given textual category, pairing them with their -- track index or 0 if one applies to the disc as a whole. encodeTrackTexts :: Encoding -> TrackId -> [String] -> [(TrackId, BS.ByteString)] encodeTrackTexts e i ts = zip [i ..] $ map (encodeText e) ts -- | Store the genre code in a two-byte string. encodeGenre :: Maybe Genre -> BS.ByteString encodeGenre g = BS.pack [gh, gc] where gc = maybe 0x00 (fromIntegral . fromEnum) g -- Futureproofing decades-old technology gh = if gc <= 0xFF then 0x00 else B.shiftR gc 8 -- | Prevent index overflow for any single pack type. pack :: Pack -> Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString] pack p w b i bss = take 0xFF $ pack_ p w b i bss -- | Conduct binary data and starting track to the proper packers. pack_ :: Pack -> Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString] pack_ TitlePack = packText 0x80 pack_ PerformerPack = packText 0x81 pack_ SongwriterPack = packText 0x82 pack_ ComposerPack = packText 0x83 pack_ ArrangerPack = packText 0x84 pack_ MessagePack = packText 0x85 pack_ DiscIdPack = packText 0x86 pack_ GenrePack = packGenre pack_ ClosedPack = packText 0x8D pack_ TocPack = const4 [] pack_ Toc2Pack = const4 [] pack_ CodePack = packText 0x8E pack_ InfoPack = packBlockInfo const4 :: a -> b -> c -> d -> e -> a const4 a _ _ _ _ = a packText :: PackType -> Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString] packText p w b i bss = zipWith (curry $ addHeader p w b) [i ..] . reverse $ L.foldl' splitText [] bss packGenre :: Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString] packGenre _ _ _ [] = [] packGenre w b i ((_, bs):_) = zipWith (curry $ addHeader 0x87 w b) [i ..] $ splitGenre bs -- | Create the skeleton block info packs, leaving out counts and languages. packBlockInfo :: Bool -> BlockId -> BlockSize -> [(TrackId, BS.ByteString)] -> [BS.ByteString] packBlockInfo w b i [] = [ addHeader 0x8F w b (i + p, (p, 0, BS.empty)) | p <- [0 .. 2] ] packBlockInfo w b i ((_, bs):_) = [ addHeader 0x8F w b (i, (0, 0, p1)) , addHeader 0x8F w b (i + 1, (1, 0, p2)) , addHeader 0x8F w b (i + 2, (2, 0, BS.empty)) ] where (p1, p2) = BS.splitAt 12 bs -- | Generate the four-byte identifying and metadata-carrying header. -- -- NOTE: While the libcdio documentation on the CDTEXT format says that byte 3 -- (@l'@) is 15 if the text starts at any point before the previous pack, -- that's not the case in any of my (admittedly limited and unverified) -- reference blobs: it's capped at 15, yes, but if one pack has two characters, -- the next has the full twelve, and the text still continues into a third, -- that third will have a third byte of 14. addHeader :: PackType -- ^ Pack type -> Bool -- ^ Two-byte characters? -> BlockId -- ^ Language index -> (BlockSize, (TrackId, Word, BS.ByteString)) -- ^ Starting pack index, track number, characters in previous block, and payload -> BS.ByteString addHeader p w b (i, (t, l, bs)) = BS.pack [p, t, i, f] <> bs where l' = if l > 15 then 0x0F else 0x0F .&. fromIntegral l b' = 0x70 .&. B.shiftL (fromIntegral b) 4 w' = if w then 0x80 else 0x00 f = w' .|. b' .|. l' -- | Takes tuples of (track index, track data) and adds them (initial track, -- initial length in previous packs, packed data), where the packed data is -- /no more than/ 12 bytes long. Note that the final list will have to be -- 'reverse'd. splitText :: [(TrackId, Word, BS.ByteString)] -> (TrackId, BS.ByteString) -> [(TrackId, Word, BS.ByteString)] splitText [] bst@(t, _) = splitText [(t, 0, BS.empty)] bst splitText csa@((u, l, cs):css) (t, bs) | cl == 12 = [ (t, l', bs') | (l', bs') <- reverse . zip [0, 12 ..] $ splitAtEvery 12 bs ] ++ csa -- Will have incorrect t, l' if @BS.length cs > 12@, but as that shouldn't -- happen anyway, it's not an issue. | otherwise = let (cs':bss) = splitAtEvery 12 $ cs <> bs in [ (t, l', bs') | (l', bs') <- reverse $ zip [(12 - cl), (24 - cl) ..] bss ] ++ (u, l, cs') : css where cl = fromIntegral $ BS.length cs -- | Takes the genre string prefixed by a single, two-byte genre code -- identifier, splits the string, and prepends the code to every pack. splitGenre :: BS.ByteString -> [(TrackId, Word, BS.ByteString)] splitGenre bs = [ (0, l', gc <> bs') | (l', bs') <- zip [0, 10 ..] . splitAtEvery 10 $ BS.drop 2 bs ] where gc = BS.take 2 bs -- | Extend the builtin 'BS.splitAt' to return an entire list of substrings. splitAtEvery :: Int -> BS.ByteString -> [BS.ByteString] splitAtEvery i bs | i <= 0 = [] | i >= BS.length bs = [bs] | otherwise = recurse $ BS.splitAt i bs where recurse (h, t) = h : splitAtEvery i t -- | Now that we have the languages and sizes of all blocks, update the info -- packs to include them. Note that this requires that the info packs are -- located at the end of the block. joinBlockInfo :: [Maybe Language] -> [BlockSize] -> [BS.ByteString] -> [BS.ByteString] joinBlockInfo _ _ [] = [] joinBlockInfo _ _ [b] = [b] joinBlockInfo ls ss bss = h ++ [p2, p3] where (h, t) = splitAt (length bss - 2) bss ls' = take 8 $ map (maybe 0 $ fromIntegral . fromEnum) ls ++ repeat 0 ss' = take 8 $ ss ++ repeat 0 p2 = head t <> BS.pack (take 4 ss') p3 = head (tail t) <> BS.pack (drop 4 ss' ++ ls') -- | Generate a checksum over the first 16 (including header) bytes of a data -- pack, zero-padding /or truncating/ as necessary. checksum :: BS.ByteString -> BS.ByteString checksum bs = checksum' $ case compare 16 $ BS.length bs of GT -> BS.take 16 $ bs <> BS.replicate 16 0 EQ -> bs LT -> BS.take 16 bs -- | Actually run the checksum function on all the bytes of the input. Note -- that this doesn't verify that the length is actually the proper 16. checksum' :: BS.ByteString -> BS.ByteString checksum' bs = bs <> (BS.pack . split . finalize . BS.foldl' checksumBytes h' $ t <> BS.singleton 0) where split i = map fromIntegral [B.shiftR i 8, i] finalize i = B.xor i 0xFFFF (h, t) = Y.fromMaybe (0, BS.empty) $ BS.uncons bs h' = B.shiftL (fromIntegral h) 8 checksumBytes :: W.Word16 -> W.Word8 -> W.Word16 checksumBytes acc a = checksumBits 8 . B.xor acc $ fromIntegral a checksumBits :: Word -> W.Word16 -> W.Word16 checksumBits 0 i = i checksumBits c i = checksumBits (c - 1) i' where i' | B.testBit i 15 = B.xor 0x1021 $ B.shiftL i 1 | otherwise = B.shiftL i 1