module ZMachine.Strings where import ZMachine.Base import Numeric (showHex) import Control.Monad.State (get, put, liftIO) import Control.Monad (liftM, zipWithM_) import Data.Bits import Data.Char (isSpace, toLower) import Data.List (elemIndex) import Data.Maybe (fromJust) getZChars :: ZM [Int] getZChars = do w <- liftM fromIntegral getNextWord (liftM ((w `shiftR` 10 .&. 31 :) . (w `shiftR` 5 .&. 31 :) . (w .&. 31 :)) (if w `testBit` 15 then return [] else getZChars)) data Alphabet = A0 | A1 | A2 deriving Enum a0, a1, a2 :: String a0 = "abcdefghijklmnopqrstuvwxyz" a1 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" a2 = " \n0123456789.,!?_#'\"/\\-:()" getAlphabet :: Alphabet -> ZM [Char] getAlphabet abet = do atable <- getWord 0x34 if atable == 0 then return (case abet of A0 -> a0; A1 -> a1; A2 -> a2) else mapM (\_ -> do byte <- getByte (fromIntegral atable + 26 * fromIntegral (fromEnum abet)) return (toEnum (fromIntegral byte)) ) ([0..25]::[Int]) decodeZChars :: [Int] -> ZM String decodeZChars zchars = do alphabets <- mapM getAlphabet [A0 ..] let alphabet abet = alphabets !! fromEnum abet decode :: Alphabet -> [Int] -> ZM String decode _ [] = return [] decode _ (0:xs) = liftM (' ':) $ decode A0 xs decode _ (4:xs) = decode A1 xs decode _ (5:xs) = decode A2 xs -- FIXME: ZSCII != Unicode decode A2 (6:a:b:xs) = liftM ((toEnum . fromEnum) (a `shiftL` 5 + b):) $ decode A0 xs decode a (x:xs@(~(x':xs'))) | x >= 1 && x <= 3 = do let number = 32*(x-1) + x' abbrevTable <- liftM fromIntegral $ getWord 0x18 -- liftIO $ putStr ("[abbrev table at $" -- ++ showHex abbrevTable "]") let abbrevEntryAddr = fromIntegral $ abbrevTable + 2*number abbrev <- getWord abbrevEntryAddr abbrevAddr <- decodeStringAddr abbrev liftIO $ putStr ("[finding abbrev " ++ show number -- ++ " with entry at $" -- ++ showHex abbrevEntryAddr "" -- ++ " and paddr $" ++ showHex abbrev "" ++ " at $" ++ showHex abbrevAddr "]") -- liftM2 (++) (getString abbrevAddr) $ decode A0 xs' liftM (("[abbrev " ++ show number ++ "]")++) $ decode A0 xs' | x >= 6 = liftM (alphabet a !! (x-6):) $ decode A0 xs | otherwise = error ("unimplemented Z-char " ++ show x) decode A0 zchars getNextString :: ZM String getNextString = decodeZChars =<< getZChars getString :: Addr -> ZM String getString addr = do st <- get put (st {ptr = addr}) ret <- getNextString put st return ret -- Tokenization -- XXX totally b0rk joinZChars :: [Int] -> [Val] joinZChars [c1] = joinZChars [c1, 5, 5] joinZChars [c1, c2] = joinZChars [c1, c2, 5] joinZChars [c1, c2, c3] = [0x8000 .|. head (joinZChars [c1, c2, c3, 0])] joinZChars (c1:c2:c3:cs) = fromIntegral (c1 `shiftL` 10 + c2 `shiftL` 5 + c3) : joinZChars cs encodeZChars :: String -> ZM [Int] encodeZChars str = do a0' <- getAlphabet A0 a1' <- getAlphabet A1 a2' <- getAlphabet A2 let go [] = [] go (' ':cs) = 0:go cs go (c:cs) = case () of _ | c `elem` a0' -> 6 + fromJust (elemIndex c a0') : go cs | c `elem` a1' -> 4 : 6 + fromJust (elemIndex c a1') : go cs | c `elem` a2' -> 5 : 6 + fromJust (elemIndex c a2') : go cs return $ go str lookupDictWord :: String -> ZM Val lookupDictWord word = do zchars <- encodeZChars (map toLower word) let [w1, w2, w3] = joinZChars $ take 9 (zchars ++ repeat 5) dictHeader <- liftM fromIntegral $ getWord 8 wordsepsLen <- liftM fromIntegral $ getByte dictHeader entryLength <- liftM fromIntegral $ getByte (dictHeader+1+wordsepsLen) let firstEntry = (dictHeader+1+wordsepsLen+3) entries <- liftM fromIntegral $ getWord (dictHeader+1+wordsepsLen+1) let seek low high | high >= low = do let entry = (low + high) `div` 2 entryAddr = firstEntry + entry*entryLength x1 <- getWord (entryAddr) x2 <- getWord (entryAddr + 2) x3 <- getWord (entryAddr + 4) case compare [w1, w2, w3] [x1, x2, x3] of LT -> seek low (entry-1) GT -> seek (entry+1) high EQ -> return (fromIntegral entryAddr) | otherwise = return 0 seek 0 (entries-1) zTokenize :: Val -> Addr -> ZM () zTokenize parse text = do nchars <- getByte (text+1) input <- mapM (liftM (toEnum . fromIntegral) . getByte) $ take (fromIntegral nchars) [text+2..] let wordlist = myWords input parseAddr = fromIntegral parse parseLen <- liftM fromIntegral $ getByte parseAddr let wordlist' = take parseLen wordlist each :: (Val, String) -> Addr -> ZM () each (off, s) n = do dictword <- lookupDictWord s let entAddr = parseAddr + 2 + 4*n putWord (entAddr) dictword putByte (entAddr+2) (fromIntegral $ length s) putByte (entAddr+3) off liftIO $ print wordlist' putByte (parseAddr+1) (fromIntegral $ length wordlist') zipWithM_ each wordlist' [0..] where myWords :: String -> [(Val, String)] myWords s = [(n, map fst w) | w@((_, n):_) <- myWords' (zip s [0..])] myWords' :: [(Char, Val)] -> [[(Char, Val)]] myWords' s = case dropWhile {-partain:Char.-}(isSpace . fst) s of [] -> [] s' -> w : myWords' s'' where (w, s'') = break {-partain:Char.-}(isSpace . fst) s'