-- GENERATED by C->Haskell Compiler, version 0.17.2 Crystal Seed, 24 Jan 2009 (Haskell)
-- Edit the ORIGNAL .chs file instead!


{-# LINE 1 "lib/CPython/Types/Unicode.chs" #-}
{-# LANGUAGE ForeignFunctionInterface #-}

-- Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program.  If not, see <http://www.gnu.org/licenses/>.

module CPython.Types.Unicode
	(
	-- * Unicode objects
	  Unicode
	, Encoding
	, ErrorHandling (..)
	, unicodeType
	, toUnicode
	, fromUnicode
	, length
	, fromEncodedObject
	, fromObject
	, encode
	, decode
	
	-- * Methods and slot functions
	, append
	, split
	, splitLines
	, translate
	, join
	, MatchDirection (..)
	, tailMatch
	, FindDirection (..)
	, find
	, count
	, replace
	, format
	, contains
	) where



import           Prelude hiding (length)
import           Control.Exception (ErrorCall (..), throwIO)
import qualified Data.Text as T

import           Data.Char (chr, ord)

import           CPython.Internal
import           CPython.Types.Bytes (Bytes)

newtype Unicode = Unicode (ForeignPtr Unicode)

instance Object Unicode where
	toObject (Unicode x) = SomeObject x
	fromForeignPtr = Unicode

instance Concrete Unicode where
	concreteType _ = unicodeType

type Encoding = T.Text
data ErrorHandling
	= Strict
	| Replace
	| Ignore
	deriving (Show, Eq)

withErrors :: ErrorHandling -> (CString -> IO a) -> IO a
withErrors errors = withCString $ case errors of
	Strict -> "strict"
	Replace -> "replace"
	Ignore -> "ignore"

unicodeType :: (Type)
unicodeType =
  unsafePerformIO $
  let {res = unicodeType'_} in
  peekStaticObject res >>= \res' ->
  return (res')

{-# LINE 87 "lib/CPython/Types/Unicode.chs" #-}


toUnicode :: T.Text -> IO Unicode
toUnicode str = withBuffer toPython >>= stealObject where
	toPython ptr len = let
		len' = fromIntegral len
		ptr' = castPtr ptr
		in hscpython_PyUnicode_FromUnicode ptr' len'
	ords = map (fromIntegral . ord) (T.unpack str) :: [CUInt]
	withBuffer = withArrayLen ords . flip

fromUnicode :: Unicode -> IO T.Text
fromUnicode obj = withObject obj $ \ptr -> do
	buffer <- hscpython_PyUnicode_AsUnicode ptr
	size <- hscpython_PyUnicode_GetSize ptr
	raw <- peekArray (fromIntegral size) buffer
	return . T.pack $ map (chr . fromIntegral) raw

length :: (Unicode) -> IO ((Integer))
length a1 =
  withObject a1 $ \a1' -> 
  length'_ a1' >>= \res ->
  checkIntReturn res >>= \res' ->
  return (res')

{-# LINE 115 "lib/CPython/Types/Unicode.chs" #-}


-- | Coerce an encoded object /obj/ to an Unicode object.
--
-- 'Bytes' and other char buffer compatible objects are decoded according to
-- the given encoding and error handling mode.
--
-- All other objects, including 'Unicode' objects, cause a @TypeError@ to be
-- thrown.
fromEncodedObject :: Object obj => (obj) -> (Encoding) -> (ErrorHandling) -> IO ((Unicode))
fromEncodedObject a1 a2 a3 =
  withObject a1 $ \a1' -> 
  withText a2 $ \a2' -> 
  withErrors a3 $ \a3' -> 
  fromEncodedObject'_ a1' a2' a3' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 129 "lib/CPython/Types/Unicode.chs" #-}


-- | Shortcut for @'fromEncodedObject' \"utf-8\" 'Strict'@
fromObject :: Object obj => obj -> IO Unicode
fromObject obj = fromEncodedObject obj (T.pack "utf-8") Strict

-- | Encode a 'Unicode' object and return the result as 'Bytes' object.
-- The encoding and error mode have the same meaning as the parameters of
-- the the @str.encode()@ method. The codec to be used is looked up using
-- the Python codec registry.
encode :: (Unicode) -> (Encoding) -> (ErrorHandling) -> IO ((Bytes))
encode a1 a2 a3 =
  withObject a1 $ \a1' -> 
  withText a2 $ \a2' -> 
  withErrors a3 $ \a3' -> 
  encode'_ a1' a2' a3' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 143 "lib/CPython/Types/Unicode.chs" #-}


-- | Create a 'Unicode' object by decoding a 'Bytes' object. The encoding and
-- error mode have the same meaning as the parameters of the the
-- @str.encode()@ method. The codec to be used is looked up using the Python
-- codec registry.
decode :: Bytes -> Encoding -> ErrorHandling -> IO Unicode
decode bytes enc errors =
	withObject bytes $ \bytesPtr ->
	withText enc $ \encPtr ->
	withErrors errors $ \errorsPtr ->
	alloca $ \bufferPtr ->
	alloca $ \lenPtr -> do
	pyBytesAsStringAndSize bytesPtr bufferPtr lenPtr
		>>= checkStatusCode
	buffer <- peek bufferPtr
	len <- peek lenPtr
	hscpython_PyUnicode_Decode buffer len encPtr errorsPtr
	>>= stealObject

append :: (Unicode) -> (Unicode) -> IO ((Unicode))
append a1 a2 =
  withObject a1 $ \a1' -> 
  withObject a2 $ \a2' -> 
  append'_ a1' a2' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 166 "lib/CPython/Types/Unicode.chs" #-}


-- | Split a string giving a 'List' of 'Unicode' objects. If the separator is
-- 'Nothing', splitting will be done at all whitespace substrings. Otherwise,
-- splits occur at the given separator. Separators are not included in the
-- resulting list.
split
	:: Unicode
	-> Maybe Unicode -- ^ Separator
	-> Maybe Integer -- ^ Maximum splits
	-> IO List
split s sep maxsplit =
	withObject s $ \sPtr ->
	maybeWith withObject sep $ \sepPtr ->
	let max' = maybe (- 1) fromInteger maxsplit in
	hscpython_PyUnicode_Split sPtr sepPtr max'
	>>= stealObject

-- | Split a 'Unicode' string at line breaks, returning a list of 'Unicode'
-- strings. CRLF is considered to be one line break. If the second parameter
-- is 'False', the line break characters are not included in the resulting
-- strings.
splitLines :: (Unicode) -> (Bool) -> IO ((List))
splitLines a1 a2 =
  withObject a1 $ \a1' -> 
  let {a2' = fromBool a2} in 
  splitLines'_ a1' a2' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 191 "lib/CPython/Types/Unicode.chs" #-}


-- | Translate a string by applying a character mapping table to it.
--
-- The mapping table must map Unicode ordinal integers to Unicode ordinal
-- integers or @None@ (causing deletion of the character).
--
-- Mapping tables need only provide the @__getitem__()@ interface;
-- dictionaries and sequences work well. Unmapped character ordinals (ones
-- which cause a @LookupError@) are left untouched and are copied as-is.
--
-- The error mode has the usual meaning for codecs.
translate :: Object table => (Unicode) -> (table) -> (ErrorHandling) -> IO ((Unicode))
translate a1 a2 a3 =
  withObject a1 $ \a1' -> 
  withObject a2 $ \a2' -> 
  withErrors a3 $ \a3' -> 
  translate'_ a1' a2' a3' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 208 "lib/CPython/Types/Unicode.chs" #-}


-- | Join a sequence of strings using the given separator.
join :: Sequence seq => (Unicode) -> (seq) -> IO ((Unicode))
join a1 a2 =
  withObject a1 $ \a1' -> 
  withObject a2 $ \a2' -> 
  join'_ a1' a2' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 215 "lib/CPython/Types/Unicode.chs" #-}


data MatchDirection = Prefix | Suffix
	deriving (Show, Eq)

-- | Return 'True' if the substring matches @string*[*start:end]@ at the
-- given tail end (either a 'Prefix' or 'Suffix' match), 'False' otherwise.
tailMatch
	:: Unicode -- ^ String
	-> Unicode -- ^ Substring
	-> Integer -- ^ Start
	-> Integer -- ^ End
	-> MatchDirection
	-> IO Bool
tailMatch str substr start end dir =
	withObject str $ \strPtr ->
	withObject substr $ \substrPtr ->
	let start' = fromInteger start in
	let end' = fromInteger end in
	let dir' = case dir of
		Prefix -> -1
		Suffix -> 1 in
	hscpython_PyUnicode_Tailmatch strPtr substrPtr start' end' dir'
	>>= checkBoolReturn

data FindDirection = Forwards | Backwards
	deriving (Show, Eq)

-- | Return the first position of the substring in @string*[*start:end]@
-- using the given direction. The return value is the index of the first
-- match; a value of 'Nothing' indicates that no match was found.
find
	:: Unicode -- ^ String
	-> Unicode -- ^ Substring
	-> Integer -- ^ Start
	-> Integer -- ^ End
	-> FindDirection
	-> IO (Maybe Integer)
find str substr start end dir =
	withObject str $ \strPtr ->
	withObject substr $ \substrPtr -> do
	let start' = fromInteger start
	let end' = fromInteger end
	let dir' = case dir of
		Forwards -> 1
		Backwards -> -1
	cRes <- hscpython_PyUnicode_Find strPtr substrPtr start' end' dir'
	exceptionIf $ cRes == -2
	case cRes of
		-1 -> return Nothing
		x | x >= 0 -> return . Just . toInteger $ x
		x -> throwIO . ErrorCall $ "Invalid return code: " ++ show x

-- | Return the number of non-overlapping occurrences of the substring in
-- @string[start:end]@.
count
	:: Unicode -- ^ String
	-> Unicode -- ^ Substring
	-> Integer -- ^ Start
	-> Integer -- ^ End
	-> IO Integer
count str substr start end =
	withObject str $ \str' ->
	withObject substr $ \substr' ->
	let start' = fromInteger start in
	let end' = fromInteger end in
	hscpython_PyUnicode_Count str' substr' start' end'
	>>= checkIntReturn

-- | Replace occurrences of the substring with a given replacement. If the
-- maximum count is 'Nothing', replace all occurences.
replace
	:: Unicode -- ^ String
	-> Unicode -- ^ Substring
	-> Unicode -- ^ Replacement
	-> Maybe Integer -- ^ Maximum count
	-> IO Unicode
replace str substr replstr maxcount =
	withObject str $ \strPtr ->
	withObject substr $ \substrPtr ->
	withObject replstr $ \replstrPtr ->
	let maxcount' = case maxcount of
		Nothing -> -1
		Just x -> fromInteger x in
	hscpython_PyUnicode_Replace strPtr substrPtr replstrPtr maxcount'
	>>= stealObject

-- | Return a new 'Unicode' object from the given format and args; this is
-- analogous to @format % args@.
format :: (Unicode) -> (Tuple) -> IO ((Unicode))
format a1 a2 =
  withObject a1 $ \a1' -> 
  withObject a2 $ \a2' -> 
  format'_ a1' a2' >>= \res ->
  stealObject res >>= \res' ->
  return (res')

{-# LINE 307 "lib/CPython/Types/Unicode.chs" #-}


-- | Check whether /element/ is contained in a string.
--
-- /element/ has to coerce to a one element string.
contains :: Object element => (Unicode) -> (element) -> IO ((Bool))
contains a1 a2 =
  withObject a1 $ \a1' -> 
  withObject a2 $ \a2' -> 
  contains'_ a1' a2' >>= \res ->
  checkBoolReturn res >>= \res' ->
  return (res')

{-# LINE 316 "lib/CPython/Types/Unicode.chs" #-}


foreign import ccall unsafe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Type"
  unicodeType'_ :: (Ptr ())

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_FromUnicode"
  hscpython_PyUnicode_FromUnicode :: ((Ptr CInt) -> (CLong -> (IO (Ptr ()))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_AsUnicode"
  hscpython_PyUnicode_AsUnicode :: ((Ptr ()) -> (IO (Ptr CInt)))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_GetSize"
  hscpython_PyUnicode_GetSize :: ((Ptr ()) -> (IO CLong))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_GetSize"
  length'_ :: ((Ptr ()) -> (IO CLong))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_FromEncodedObject"
  fromEncodedObject'_ :: ((Ptr ()) -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ())))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_AsEncodedString"
  encode'_ :: ((Ptr ()) -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ())))))

foreign import ccall safe "CPython/Types/Unicode.chs.h PyBytes_AsStringAndSize"
  pyBytesAsStringAndSize :: ((Ptr ()) -> ((Ptr (Ptr CChar)) -> ((Ptr CLong) -> (IO CInt))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Decode"
  hscpython_PyUnicode_Decode :: ((Ptr CChar) -> (CLong -> ((Ptr CChar) -> ((Ptr CChar) -> (IO (Ptr ()))))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Concat"
  append'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Split"
  hscpython_PyUnicode_Split :: ((Ptr ()) -> ((Ptr ()) -> (CLong -> (IO (Ptr ())))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Splitlines"
  splitLines'_ :: ((Ptr ()) -> (CInt -> (IO (Ptr ()))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Translate"
  translate'_ :: ((Ptr ()) -> ((Ptr ()) -> ((Ptr CChar) -> (IO (Ptr ())))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Join"
  join'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Tailmatch"
  hscpython_PyUnicode_Tailmatch :: ((Ptr ()) -> ((Ptr ()) -> (CLong -> (CLong -> (CInt -> (IO CInt))))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Find"
  hscpython_PyUnicode_Find :: ((Ptr ()) -> ((Ptr ()) -> (CLong -> (CLong -> (CInt -> (IO CLong))))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Count"
  hscpython_PyUnicode_Count :: ((Ptr ()) -> ((Ptr ()) -> (CLong -> (CLong -> (IO CLong)))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Replace"
  hscpython_PyUnicode_Replace :: ((Ptr ()) -> ((Ptr ()) -> ((Ptr ()) -> (CLong -> (IO (Ptr ()))))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Format"
  format'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO (Ptr ()))))

foreign import ccall safe "CPython/Types/Unicode.chs.h hscpython_PyUnicode_Contains"
  contains'_ :: ((Ptr ()) -> ((Ptr ()) -> (IO CInt)))