Text/Printf/Mauke.hs

{-# LANGUAGE CPP #-}

#define XEMPTY

module Text.Printf.Mauke (
    printf,
    hPrintf,
    FromChar(..),
    ToChar(..),
    PrintfType,
    HPrintfType,
    PrintfArg
) where

import Prelude hiding (putStr)

import Control.Monad

import Data.Char
import Data.Default
import Data.List
import Data.Maybe
import Data.Ratio

import qualified Data.Set as Set
import Data.Set (Set)

import Numeric

import System.IO hiding (putStr, hPutStr)
import System.IO.UTF8

import Data.Int
import Data.Word
import Foreign.Ptr
import Foreign.C.Types
import System.Posix.Types

import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL

thisModule :: String
thisModule = "Text.Printf.Mauke"

die :: String -> String -> a
die f s = error $ concat [thisModule, ".", f, ": ", s]

class FromChar a where
    fromChar :: Char -> a

instance FromChar Char where
    fromChar = id

class ToChar a where
    toChar :: a -> Char

instance ToChar Char where
    toChar = id

instance ToChar Word8 where
    toChar = chr . fromIntegral
    
pdie :: String -> a
pdie = die "printf"

-- | Format a variable number of arguments according to a format string,
-- similar to (s)printf in Perl. The return value is either a 'String' or
-- @'IO' a@, in which case the result is printed to 'stdout'. If you use the
-- 'IO' variant, don't use the result: Because it has to be of any type @a@,
-- this library makes it 'undefined'.
--
-- The format string consists of ordinary characters (everything except
-- @\'%\'@), which are passed through unchanged, and formatting directives,
-- which have the following form:
--
-- @
--  % /flag/XEMPTY* /vector/? /width/? /precision/? /type/
-- @
--
-- (@*@ and @?@ mean 0 or more and 0 or 1 of the preceding item, respectively.)
--
-- Flags:
--
-- [@space@] prefix positive numbers with a space
--
-- [@+@] prefix positive numbers with a plus sign (overrides space if both are
-- present)
--
-- [@-@] left-justify within the field
--
-- [@0@] pad with zeroes on the left, not spaces
--
-- [@#@] prefix binary numbers with @0b@\/@0B@, octal numbers with @0o@\/@0O@
-- and hexadecimal numbers with @0x@\/@0X@
--
-- The /vector/ flag @v@ tells 'printf' to format each character in the string
-- argument according to the current directive, then joins the results with a
-- separator that defaults to @\".\"@. When @*v@ is used, the separator is
-- taken from the argument list (use e.g. @'printf' \"%*v.2x\" \"\" str@ if you
-- want no separator).
-- 
-- The /width/ is either a decimal integer or @*@, in which case the width is
-- taken from the argument list (this argument must be an integer). It
-- specifies the minimum width for this field. Shorter values are padded on
-- the left with spaces (but this can be changed by the @0@ and @-@ flags). If
-- the /width/ taken from the argument list is negative, it behaves as if the
-- @-@ flag was specified.
--
-- The /precision/ consists of a @.@ followed by digits or a @*@ (see the
-- description of /width/ above). The effects depend on the format /type/:
--
-- * for floating point formats, this specifies the number of digits after the
-- decimal point
--
-- * for string formats, this is the maximum number of characters to appear
-- (longer strings are truncated)
--
-- * for integer formats, this is the minimum number of digits to appear in
-- the output; shorter values are zero-padded
--
-- Types:
--
-- [@%@] A percent sign. No argument is consumed.
--
-- [@c@] A character. If the argument is an integer, it is converted with
-- 'chr'.
--
-- [@s@] A string.
--
-- [@d@] A decimal integer.
--
-- [@u@] An unsigned decimal integer.
--
-- [@b@] A binary integer.
--
-- [@B@] Like @b@, but using a @0B@ prefix with @#@.
--
-- [@o@] An octal integer.
--
-- [@O@] Like @o@, but using a @0O@ prefix with @#@.
--
-- [@x@] A hexadecimal integer.
--
-- [@X@] Like @x@, but using uppercase letters.
--
-- [@e@] A floating point number in scientific notation.
--
-- [@E@] Like @e@, but using an uppercase @E@.
--
-- [@f@] A floating point number in fixed decimal notation.
--
-- [@g@] A floating point number in @%e@ or @%f@ notation.
--
-- [@G@] Like @g@, but using an uppercase @E@.
--
-- [@_@] A generic format; it behaves like @%c@, @%s@, @%d@ or @%g@, depending
-- on the argument type.
printf :: (PrintfType r) => String -> r
printf = collect id

class PrintfType a where
    collect :: ([Arg] -> [Arg]) -> String -> a

instance (FromChar a) => PrintfType [a] where
    collect arg fmt = map fromChar $ format fmt (arg [])

instance (PrintfArg a, PrintfType r) => PrintfType (a -> r) where
    collect arg fmt x = collect (arg . (embed x :)) fmt

instance PrintfType (IO a) where
    collect arg fmt = putStr (collect arg fmt) >> return (pdie undefinedResult)

undefinedResult :: String
undefinedResult = "the return value is a lie"

-- | Like 'printf', except that the result is printed to the specified
-- 'Handle'.
hPrintf :: (HPrintfType r) => Handle -> String -> r
hPrintf h = hcollect h id

class HPrintfType a where
    hcollect :: Handle -> ([Arg] -> [Arg]) -> String -> a

instance (PrintfArg a, HPrintfType r) => HPrintfType (a -> r) where
    hcollect h arg fmt x = hcollect h (arg . (embed x :)) fmt

instance HPrintfType (IO a) where
    hcollect h arg fmt = hPutStr h (format fmt (arg [])) >> return (die "hPrintf" undefinedResult)

data Arg
    = AInt Integer
    | AChar Char
    | AStr String
    | AFloat Double
    deriving (Eq, Ord, Show, Read)

ashow :: Arg -> String
ashow (AInt i) = show i
ashow (AChar c) = show c
ashow (AStr s) = show s
ashow (AFloat d) = show d

class PrintfArg a where
    embed :: a -> Arg

instance PrintfArg Char where
    embed = AChar

instance (ToChar a) => PrintfArg [a] where
    embed = AStr . map toChar

instance PrintfArg BS.ByteString where
    embed = AStr . BS.unpack

instance PrintfArg BL.ByteString where
    embed = AStr . BL.unpack

instance PrintfArg Double where
    embed = AFloat

instance PrintfArg Float where
    embed = AFloat . realToFrac

instance PrintfArg CFloat where
    embed = AFloat . realToFrac

instance PrintfArg CDouble where
    embed = AFloat . realToFrac

#if 0
instance PrintfArg CLDouble where
    embed = AFloat . realToFrac
#endif

instance (Integral a) => PrintfArg (Ratio a) where
    embed = AFloat . realToFrac

instance PrintfArg Integer where
    embed = AInt

instance PrintfArg Int where
    embed = AInt . fromIntegral

instance PrintfArg Int8 where
    embed = AInt . fromIntegral

instance PrintfArg Int16 where
    embed = AInt . fromIntegral

instance PrintfArg Int32 where
    embed = AInt . fromIntegral

instance PrintfArg Int64 where
    embed = AInt . fromIntegral

instance PrintfArg Word where
    embed = AInt . fromIntegral

instance PrintfArg Word8 where
    embed = AInt . fromIntegral

instance PrintfArg Word16 where
    embed = AInt . fromIntegral

instance PrintfArg Word32 where
    embed = AInt . fromIntegral

instance PrintfArg Word64 where
    embed = AInt . fromIntegral

instance PrintfArg IntPtr where
    embed = AInt . fromIntegral

instance PrintfArg WordPtr where
    embed = AInt . fromIntegral

instance PrintfArg CChar where
    embed = AInt . fromIntegral

instance PrintfArg CSChar where
    embed = AInt . fromIntegral

instance PrintfArg CUChar where
    embed = AInt . fromIntegral

instance PrintfArg CShort where
    embed = AInt . fromIntegral

instance PrintfArg CUShort where
    embed = AInt . fromIntegral

instance PrintfArg CInt where
    embed = AInt . fromIntegral

instance PrintfArg CUInt where
    embed = AInt . fromIntegral

instance PrintfArg CLong where
    embed = AInt . fromIntegral

instance PrintfArg CULong where
    embed = AInt . fromIntegral

instance PrintfArg CPtrdiff where
    embed = AInt . fromIntegral

instance PrintfArg CSize where
    embed = AInt . fromIntegral

instance PrintfArg CWchar where
    embed = AInt . fromIntegral

instance PrintfArg CSigAtomic where
    embed = AInt . fromIntegral

instance PrintfArg CLLong where
    embed = AInt . fromIntegral

instance PrintfArg CULLong where
    embed = AInt . fromIntegral

instance PrintfArg CIntPtr where
    embed = AInt . fromIntegral

instance PrintfArg CUIntPtr where
    embed = AInt . fromIntegral

instance PrintfArg CIntMax where
    embed = AInt . fromIntegral

instance PrintfArg CUIntMax where
    embed = AInt . fromIntegral

instance PrintfArg CIno where
    embed = AInt . fromIntegral

instance PrintfArg CMode where
    embed = AInt . fromIntegral

instance PrintfArg COff where
    embed = AInt . fromIntegral

instance PrintfArg CPid where
    embed = AInt . fromIntegral

instance PrintfArg CSsize where
    embed = AInt . fromIntegral

instance PrintfArg Fd where
    embed = AInt . fromIntegral

#ifdef __unix__
instance PrintfArg CGid where
    embed = AInt . fromIntegral

instance PrintfArg CNlink where
    embed = AInt . fromIntegral

instance PrintfArg CUid where
    embed = AInt . fromIntegral

instance PrintfArg CTcflag where
    embed = AInt . fromIntegral

instance PrintfArg CRLim where
    embed = AInt . fromIntegral
#endif

format :: String -> [Arg] -> String
format "" [] = ""
format "" (x : _) = die "printf" $ "excess argument: " ++ ashow x
format ('%' : fmt) args =
    let
        (spec, fmt', args') = parse fmt args 
        (args'', ss) = apply spec args'
    in
    ss $ format fmt' args''
format (c : fmt) args = c : format fmt args

data Spec = Spec{
    flags :: !(Set Flag),
    vector :: !(Maybe String),
    width :: !Integer,
    precision :: !(Maybe Integer),
    ftype :: !Type
} deriving (Eq, Ord, Show, Read)

instance Default Spec where
    def = Spec def def def def def

data Flag = FSpace | FPlus | FZero | FAlt
    deriving (Eq, Ord, Show, Read)

ch2flag :: Char -> Flag
ch2flag c = case c of
    ' ' -> FSpace
    '+' -> FPlus
    '0' -> FZero
    '#' -> FAlt
    _ -> die "ch2flag" $ "internal error: " ++ show c

data Type
    = Tpercent
    | Tc | Ts | Td | Tu | To | Tx | Te | Tf | Tg
    | TO | TX | TE | TG | Tb | TB
    | Tany
    deriving (Eq, Ord, Show, Read)

instance Default Type where
    def = Tany

ch2type :: Char -> Type
ch2type c = case c of
    '%' -> Tpercent
    'c' -> Tc
    's' -> Ts
    'd' -> Td
    'u' -> Tu
    'o' -> To
    'O' -> TO
    'x' -> Tx
    'X' -> TX
    'e' -> Te
    'f' -> Tf
    'g' -> Tg
    'E' -> TE
    'G' -> TG
    'b' -> Tb
    'B' -> TB
    '_' -> Tany
    _ -> pdie $ "invalid format specifier: " ++ show c

enoarg :: a
enoarg = pdie "missing argument"

auncons :: [Arg] -> (Arg, [Arg])
auncons [] = enoarg
auncons (x : xs) = (x, xs)

arg2int :: Arg -> Integer
arg2int (AInt i) = i
arg2int x = pdie $ "invalid argument: expected int, got " ++ ashow x

arg2int' :: Arg -> Integer
arg2int' (AInt i) = i
arg2int' (AChar c) = fromIntegral $ ord c
arg2int' x = pdie $ "invalid argument: expected int, got " ++ ashow x

arg2str :: Arg -> String
arg2str (AStr s) = s
arg2str x = pdie $ "invalid argument: expected string, got " ++ ashow x

arg2float :: Arg -> Double
arg2float (AFloat f) = f
arg2float x = pdie $ "invalid argument: expected float, got " ++ ashow x

parseInt :: String -> [Arg] -> (Maybe Integer, String, [Arg])
parseInt str args = case str of
    '*' : str' ->
        let (arg, args') = auncons args in
        (Just $ arg2int arg, str', args')
    _ ->
        let (d, str') = span (\c -> c >= '0' && c <= '9') str in
        (if null d then Nothing else Just $ read d, str', args)

parseVec :: String -> [Arg] -> (Maybe String, String, [Arg])
parseVec str args = case str of
    'v' : str' -> (Just ".", str', args)
    '*' : 'v' : str' -> (Just sa, str', args')
    _ -> (Nothing, str, args)
    where
    (arg, args') = auncons args
    sa = arg2str arg

parse :: String -> [Arg] -> (Spec, String, [Arg])
parse s args =
    let
        (fch, s1) = span (`elem` " +-0#") s
        fl = Set.fromList . map ch2flag . filter ('-' /=) $ fch
        (vc, s2, args1) = parseVec s1 args
        (wd, s3, args2) = parseInt s2 args1
        (pr, s4, args3) = case s3 of
            '.' : t ->
                let (mi, str, ar) = parseInt t args2 in
                (mi `mplus` Just 0, str, ar)
            _ -> (Nothing, s3, args2)
        (tp, s5) = case s4 of
            "" -> pdie $ "unterminated formatting directive"
            c : cs -> (ch2type c, cs)
    in (
        def{
            flags = fl,
            vector = vc,
            width = (if '-' `elem` fch then negate else id) . fromMaybe 0 $ wd,
            precision = pr,
            ftype = tp
        },
        s5,
        args3
    )

padWith :: a -> Integer -> [a] -> [a]
padWith c w s
    | w <= 0 = lgo (negate w) s
    | otherwise = genericReplicate (missingFrom w s) c ++ s
    where
    lgo n xs | n <= 0 = xs
    lgo n [] = genericReplicate n c
    lgo n (x : xs) = x : lgo (pred n) xs
    missingFrom n _ | n <= 0 = 0
    missingFrom n [] = n
    missingFrom n (_ : xs) = missingFrom (pred n) xs

padChar :: Spec -> Char
padChar spc
    | FZero `Set.member` flags spc
    && width spc > 0
    && (
        isNothing (precision spc) ||
        ftype spc `notElem` [Td, Tu, To, Tx, TX, Tb, TB]
    ) = '0'
    | otherwise = ' '

int2char :: Integer -> Char
int2char i
    | i < lo || i > hi = '\xfffd'
    | otherwise = chr (fromInteger i)
    where
    lo = fromIntegral $ ord minBound
    hi = fromIntegral $ ord maxBound

apply :: Spec -> [Arg] -> ([Arg], String -> String)
apply spc args
    | isJust (vector spc) =
        let Just d = vector spc in
        args' <&>
            ($ "") . foldr (.) id . intersperse (d ++) . map (snd . apply spc{ vector = Nothing } . return . embed) $ arg2str arg
    | otherwise = case ftype spc of
        Tpercent -> args <&> "%"
        Tc -> args' <&> [int2char argi]
        Ts -> args' <&> maybe id genericTake (precision spc) . arg2str $ arg
        Tu -> args' <&>
            maybe id (padWith '0' . max 0) (precision spc) $ show argu
        Td -> ifmt show
        To -> ifmt $ showBase 8
        TO -> ifmt $ showBase 8
        Tx -> ifmt $ showBase 16
        TX -> ifmt $ uc . showBase 16
        Tb -> ifmt $ showBase 2
        TB -> ifmt $ showBase 2
        Tf -> ffmt . dF $ showFFloat fprec
        Te -> ffmt . dF $ showEFloat fprec
        TE -> ffmt . (uc .) . dF $ showEFloat fprec
        Tg -> ffmt . dF $ showGFloat (fmap fromIntegral $ precision spc)
        TG -> ffmt . (uc .) . dF $ showGFloat (fmap fromIntegral $ precision spc)
        Tany ->
            spc{
                ftype = case arg of
                    AInt{} -> Td
                    AChar{} -> Tc
                    AStr{} -> Ts
                    AFloat{} -> Tg
            } `apply` args
    where
    uc = map toUpper
    showBase b n = showIntAtBase b intToDigit n ""
    dF f = flip f ""
    infixr 0 <&>
    x <&> y = (x, (pad y ++))
    pC = padChar spc
    pad = padWith pC (width spc)
    (arg, args') = auncons args
    argf = arg2float arg
    fprec = Just $ maybe 6 fromIntegral (precision spc)
    fprefix
        | argf < 0 = "-"
        | FPlus `Set.member` flags spc = "+"
        | FSpace `Set.member` flags spc = " "
        | otherwise = ""
    argi = arg2int' arg
    argu
        | argi < 0 = pdie $ "invalid argument: expected unsigned int, got " ++ show argi
        | otherwise = argi
    arga = abs argi
    iprefix =
        case () of
        _
            | argi < 0 -> "-"
            | FPlus `Set.member` flags spc -> "+"
            | FSpace `Set.member` flags spc -> " "
            | otherwise -> ""
        ++
        if FAlt `Set.notMember` flags spc then ""
        else case ftype spc of
            To -> "0o"
            TO -> "0O"
            Tx -> "0x"
            TX -> "0X"
            Tb -> "0b"
            TB -> "0B"
            _ -> ""
    ifmt pp = (,) args' . (++) $
        (if pC /= '0' then pad else id) $
        iprefix ++
        maybe
            (
                if pC == '0'
                then padWith '0' (max 0 $ width spc - fromIntegral (length iprefix))
                else id
            )
            (padWith '0' . max 0)
            (precision spc)
            (pp arga)
    ffmt pp = (,) args' . (++) $
        case () of
        _
            | isNaN argf -> padWith ' ' (width spc) $ fprefix ++ "nan"
            | isInfinite argf -> padWith ' ' (width spc) $ fprefix ++ "inf"
            | otherwise ->
                fprefix ++
                (
                    if pC == '0'
                    then padWith '0' (max 0 $ width spc - fromIntegral (length fprefix))
                    else id
                ) (pp $ abs argf)