{-# LANGUAGE FlexibleInstances, FlexibleContexts, GeneralizedNewtypeDeriving #-}
module Data.SGF.Parse.Encodings (guessEncoding, decodeWordStringExplicit) where

import Control.Exception.Extensible
import Control.Monad.State
import Control.Throws
import Data.Encoding
import Data.Word

type MyIHateGHC = MyEither DecodingException (String, [Word8])
newtype MyEither a b = MyEither (Either a b) deriving (Throws a)

instance Monad (MyEither a) where
    return = MyEither . Right
    (MyEither (Right x)) >>= f = f x
    (MyEither (Left  x)) >>= f = MyEither (Left x)

instance ByteSource (StateT [Word8] (MyEither DecodingException)) where
    sourceEmpty = gets null
    fetchWord8  = do
        s <- get
        case s of
            []      -> throwException UnexpectedEnd
            c:cs    -> put cs >> return c
    fetchAhead m = do
        s <- get
        v <- m
        put s
        return v

-- some ones that we know satisfy our invariant (see SGF.Parse.Raw)
encodings = map encodingFromString ["latin1", "utf-8", "ascii"]
guess ws encoding = case runStateT (decode encoding) ws :: MyIHateGHC of
    (MyEither (Right (s, []))) -> encodingFromStringExplicit s == Just encoding
    _ -> False

-- |
-- Try decoding the given word string with each of the known-good encodings to
-- see if the decoded name names the encoding used to decode.  It should be
-- impossible for this to return a list with more than one guess.
guessEncoding :: [Word8] -> [DynEncoding]
guessEncoding ws = filter (guess ws) encodings

-- |
-- A simple wrapper around the encoding package's 'decode' function.
decodeWordStringExplicit :: Encoding e => e -> [Word8] -> Either DecodingException String
decodeWordStringExplicit e ws = case runStateT (decode e) ws :: MyIHateGHC of
    (MyEither (Right (s,_))) -> Right s
    (MyEither (Left  ex   )) -> Left ex