{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

-- | An alphabet, where each character is a short bytestring.
--
-- Due to the overhead this incurs, we use 'ShortByteString's internally. We
-- also provide an 'Interned' instance to further reduce overhead using
-- hash-consing.
--
-- TODO we'd like to use the @stringable@ library but it depends on
-- @system-filepath@ which is not yet compatible with @text>=1@.

module NLP.Alphabet.MultiChar where

import           Data.Function (on)
import           Data.Hashable
import           Data.Interned
import           Data.String
import qualified Data.ByteString.Short as S
import qualified Data.ByteString.Short.Internal as S



-- | Interns a 'MultiChar' character.

internMultiChar :: MultiChar -> MultiChar
internMultiChar = uninternMultiChar . intern

-- | Wrap a short bytestring. Read and Show instances behave like for normal
-- strings.

newtype MultiChar = MultiChar { unMultiChar :: S.ShortByteString }
  deriving (Eq,Ord)

instance Show MultiChar where
  showsPrec p (MultiChar ps) r = showsPrec p ps r

instance Read MultiChar where
  readsPrec p str = [ (MultiChar x, y) | (x,y) <- readsPrec p str ]

instance Hashable MultiChar where
  hashWithSalt salt (MultiChar s@(S.SBS sbs)) = hashByteArrayWithSalt sbs 0 (S.length s) salt

instance IsString MultiChar where
  fromString = MultiChar . fromString



-- * Interned

data InternedMultiChar = InternedMultiChar
  { internedMultiCharId :: {-# UNPACK #-} !Id
  , uninternMultiChar   :: {-# UNPACK #-} !MultiChar
  }

instance IsString InternedMultiChar where
  fromString = intern . fromString

instance Eq InternedMultiChar where
  (==) = (==) `on` internedMultiCharId

instance Ord InternedMultiChar where
  compare = compare `on` internedMultiCharId

instance Show InternedMultiChar where
  showsPrec d (InternedMultiChar _ mc) = showsPrec d mc

instance Interned InternedMultiChar where
  type Uninterned InternedMultiChar = MultiChar
  newtype Description InternedMultiChar = DMC MultiChar deriving (Eq,Hashable)
  describe = DMC
  identify = InternedMultiChar
  cache = imcCache

imcCache :: Cache InternedMultiChar
imcCache = mkCache
{-# NOINLINE imcCache #-}