----------------------------------------------------------------------
-- |
-- Module      : Ident
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/15 11:43:33 $ 
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.8 $
--
-- (Description of the module)
-----------------------------------------------------------------------------

module GF.Infra.Ident (-- ** Identifiers
              ModuleName(..), moduleNameS,
	      Ident, ident2utf8, showIdent, prefixIdent,
              -- *** Normal identifiers (returned by the parser)
	      identS, identC, identW,
              -- *** Special identifiers for internal use
              identV, identA, identAV,
	      argIdent, isArgIdent, getArgIndex,
              varStr, varX, isWildIdent, varIndex,
              -- *** Raw identifiers
              RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent,
              isPrefixOf, showRawIdent
	     ) where

import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS(append,isPrefixOf)
                 -- Limit use of BS functions to the ones that work correctly on
                 -- UTF-8-encoded bytestrings!
import Data.Char(isDigit)
import PGF.Internal(Binary(..))
import GF.Text.Pretty


-- | Module names
newtype ModuleName = MN Ident deriving (Eq,Ord)

moduleNameS = MN . identS

instance Show ModuleName where showsPrec d (MN m) = showsPrec d m
instance Pretty ModuleName where pp (MN m) = pp m


-- | the constructors labelled /INTERNAL/ are
-- internal representation never returned by the parser
data Ident = 
   IC  {-# UNPACK #-} !RawIdent                                           -- ^ raw identifier after parsing, resolved in Rename
 | IW                                                                     -- ^ wildcard
--
-- below this constructor: internal representation never returned by the parser
 | IV  {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int                       -- ^ /INTERNAL/ variable
 | IA  {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int                       -- ^ /INTERNAL/ argument of cat at position
 | IAV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int {-# UNPACK #-} !Int   -- ^ /INTERNAL/ argument of cat with bindings at position
-- 
  deriving (Eq, Ord, Show, Read)

-- | Identifiers are stored as UTF-8-encoded bytestrings.
-- (It is also possible to use regular Haskell 'String's, with somewhat
-- reduced performance and increased memory use.)
newtype RawIdent = Id { rawId2utf8 :: UTF8.ByteString }
  deriving (Eq, Ord, Show, Read)

pack = UTF8.fromString
unpack = UTF8.toString

rawIdentS = Id . pack
rawIdentC = Id
showRawIdent = unpack . rawId2utf8

prefixRawIdent (Id x) (Id y) = Id (BS.append x y) 
isPrefixOf (Id x) (Id y) = BS.isPrefixOf x y

instance Binary RawIdent where
  put = put . rawId2utf8
  get = fmap rawIdentC get


-- | This function should be used with care, since the returned ByteString is
-- UTF-8-encoded.
ident2utf8 :: Ident -> UTF8.ByteString
ident2utf8 i = case i of
  IC (Id s) -> s
  IV (Id s) n -> BS.append s (pack ('_':show n))
  IA (Id s) j -> BS.append s (pack ('_':show j))
  IAV (Id s) b j -> BS.append s (pack ('_':show b ++ '_':show j))
  IW -> pack "_"

ident2raw = Id . ident2utf8

showIdent :: Ident -> String
showIdent i = unpack $! ident2utf8 i

instance Pretty Ident where pp = pp . showIdent

identS :: String -> Ident
identS = identC . rawIdentS

identC :: RawIdent -> Ident
identW :: Ident


prefixIdent :: String -> Ident -> Ident
prefixIdent pref = identC . Id . BS.append (pack pref) . ident2utf8

-- normal identifier
-- ident s = IC s

identV :: RawIdent -> Int -> Ident
identA :: RawIdent -> Int -> Ident
identAV:: RawIdent -> Int -> Int -> Ident

(identC, identV, identA, identAV, identW) = 
    (IC,     IV,     IA,     IAV,     IW)

-- | to mark argument variables
argIdent :: Int -> Ident -> Int -> Ident
argIdent 0 (IC c) i = identA  c i
argIdent b (IC c) i = identAV c b i

isArgIdent IA{}  = True
isArgIdent IAV{} = True
isArgIdent _     = False

getArgIndex (IA _ i)    = Just i
getArgIndex (IAV _ _ i) = Just i
getArgIndex (IC (Id bs))
  | isDigit c =
   -- (Just . read . unpack . snd . BS.spanEnd isDigit) bs -- not ok with UTF-8
      (Just . read . reverse . takeWhile isDigit) s
  where s@(c:_) = reverse (unpack bs)
getArgIndex x = Nothing

-- | used in lin defaults
varStr :: Ident
varStr = identA (rawIdentS "str") 0

-- | refreshing variables
varX :: Int -> Ident
varX = identV (rawIdentS "x")

isWildIdent :: Ident -> Bool
isWildIdent x = case x of
  IW -> True
  IC s | s == wild -> True
  _ -> False

wild = Id (pack "_")

varIndex :: Ident -> Int
varIndex (IV _ n) = n
varIndex _ = -1 --- other than IV should not count