module Data.Text export { -- * Construction textLit; textOfVector; vectorOfText; paste; pastes; -- * Projections sizeOfText; -- * Conversions copyTextToVector; copyTextLitToVector; copyTextVecToVector; -- * Operators textOfWord8; -- * Showing showBool; showNat; showBinaryNat; showDecimalNat; showHexNat; showBaseNat; digitBinary; digitDecimal; digitHex; } import Data.Numeric.Nat import Data.Numeric.Bool import Data.Function import Data.List -- | The TextLit type is define in the runtime system and contains -- a pointer to the literal utf-8 text data in static memory. import foreign boxed type TextLit : Data -- | Runtime functions for dealing with unboxed text literals. import foreign c value -- | Box a text literal. makeTextLit : TextLit# -> TextLit -- | Get the size of a boxed text literal. sizeOfTextLit : TextLit -> Nat# -- | Get a single byte from a boxed text literal. indexTextLit : TextLit -> Nat# -> Word8# -- | Top level region containing text vectors. import foreign abstract type RegionText : Region -- | Capabilities to allocate and read top-level text vectors. import foreign abstract capability capTopTextAlloc : Alloc RegionText capTopTextRead : Read RegionText where ------------------------------------------------------------------------------- -- Names used by the Source Tetra desugarer to implement string literals. textLit (x : TextLit#) : Text = TextLit (makeTextLit x) paste (x y : Text) : Text = TextApp x y pastes (x y : Text) : Text = x % " " % y ------------------------------------------------------------------------------- data Text where TextLit : TextLit -> Text TextVec : Vector# RegionText Word8# -> Text TextApp : Text -> Text -> Text -- Construction --------------------------------------------------------------- -- | O(1). Wrap a vector of utf8 data into a text object. textOfVector (vec: Vector# RegionText Word8#): Text = TextVec vec -- | Copy a Text object into a flat vector of utf-8 bytes. vectorOfText [r1: Region] (tt: Text) : S (Alloc r1) (Vector# r1 Word8#) = extend r1 using r2 with { Alloc r2; Write r2 } in do -- Allocate a vector to hold all the data, -- including an extra null terminator byte. vec = vectorAlloc# [r2] [Word8#] (add (sizeOfText tt) 1) -- Copy the text data into the vector. iEnd = copyTextToVector tt vec 0 -- Write the null terminator. vectorWrite# vec iEnd 0w8 vec -- | Wrap a single 8-bit character into a text object. -- textOfWord8 (w8: Word8#): Text = TextVec (extend RegionText using r1 with { Alloc r1; Write r1 } in do -- Allocate the vector to hold the data, -- including an extra null terminator byte. vec = vectorAlloc# [r1] [Word8#] 2 -- Write the character. vectorWrite# vec 0 w8 -- Write the null terminator. vectorWrite# vec 1 0w8 vec) -- Projections ---------------------------------------------------------------- -- | Get the size of the utf8 data in a Text object, in bytes. -- -- * This is NOT the same as the length of the text string in characters, -- as single characters can be encoded using multiple bytes. -- sizeOfText (tt: Text): Nat# = case tt of TextLit lit -> sizeOfTextLit lit -- The size of a text vector is the vector size minus -- the null terminator byte. TextVec vec -> vectorLength# vec - 1 TextApp t1 t2 -> sizeOfText t1 + sizeOfText t2 -- Conversions ---------------------------------------------------------------- -- | Copy a text literal to a mutable vector of utf-8 bytes. copyTextToVector [r: Region] (tt: Text) (vec: Vector# r Word8#) (i0: Nat#) : S (Write r) Nat# = case tt of TextLit lit -> copyTextLitToVector lit vec i0 0 (sizeOfTextLit lit) TextVec vec2 -> copyTextVecToVector vec2 vec i0 0 (vectorLength# vec2) TextApp t1 t2 -> do i1 = copyTextToVector t1 vec i0 i2 = copyTextToVector t2 vec i1 i2 -- | Copy a text literal to a mutable vector of utf-8 bytes. copyTextLitToVector [r: Region] (tt: TextLit) (vec: Vector# r Word8#) (iDst iSrc nSrc: Nat#) : S (Write r) Nat# = case iSrc >= nSrc of True -> iDst False -> do vectorWrite# vec iDst (indexTextLit tt iSrc) copyTextLitToVector tt vec (iDst + 1) (iSrc + 1) nSrc -- | Copy a text source vector to a mutable destination of utf-8 bytes. copyTextVecToVector [r1 r2: Region] (vecSrc: Vector# r1 Word8#) (vecDst: Vector# r2 Word8#) (iDst iSrc nSrc: Nat#) : S (Read r1 + Write r2) Nat# = case iSrc >= nSrc of True -> iDst False -> do vectorWrite# vecDst iDst (vectorRead# vecSrc iSrc) copyTextVecToVector vecSrc vecDst (iDst + 1) (iSrc + 1) nSrc -- Operators ------------------------------------------------------------------ -- | If this text is not already in flat form then flatten it. -- -- This allocates a new contiguous vector for the text object and -- allows the program to release space for intermediate append nodes. -- flattenText (tt: Text): Text = case tt of -- Single text literals are already flat. TextLit lit -> tt -- Single text vectors are already flat. TextVec vec -> tt -- Text has an outer append-node, -- so flatten the whole thing. TextApp _ _ -> textOfVector (run vectorOfText [RegionText] tt) -- Showing -------------------------------------------------------------------- -- | Convert a Bool to a String. showBool (x : Bool#) : Text = if x then "True" else "False" -- | Show a natural number. showNat (x: Nat#): Text = showBaseNat 10 digitDecimal 0 "X" x ------------------------------------------------------------------------------- -- | Show a natural number, in binary. showBinaryNat (x: Nat#): Text = showBaseNat 2 digitBinary 0 "X" x digitBinary (n: Nat#): Text = case n of 0 -> "0" 1 -> "1" _ -> "X" -- | Show a natural number in decimal. showDecimalNat (x: Nat#): Text = showBaseNat 10 digitDecimal 0 "X" x digitDecimal (n: Nat#): Text = case n of 0 -> "0" 1 -> "1" 2 -> "2" 3 -> "3" 4 -> "4" 5 -> "5" 6 -> "6" 7 -> "7" 8 -> "8" 9 -> "9" _ -> "X" -- | Show a natural number in hex. showHexNat (x: Nat#): Text = showBaseNat 16 digitHex 0 "X" x digitHex (n: Nat#): Text = case n of 0 -> "0" 1 -> "1" 2 -> "2" 3 -> "3" 4 -> "4" 5 -> "5" 6 -> "6" 7 -> "7" 8 -> "8" 9 -> "9" 10 -> "a" 11 -> "b" 12 -> "c" 13 -> "d" 14 -> "e" 15 -> "f" _ -> "X" ------------------------------------------------------------------------------- -- | Show a natural number using an arbitrary base encoding. showBaseNat (base: Nat#) -- ^ Base of encoding. (digit: Nat# -> Text) -- ^ Show a digit in this base. (width: Nat#) -- ^ Width of output, or 0 to not pad. (pad: Text) -- ^ Character to pad output with. (x: Nat#) -- ^ Number to print. : Text = do s = showBaseNat' base digit width pad True x if x < 0 then "-" % s else s showBaseNat' base digit width pad first x | and (x == 0) first = showBaseNat' base digit (width - 1) pad False x % "0" | and (x == 0) (width > 0) = showBaseNat' base digit (width - 1) pad False x % pad | x == 0 = "" | otherwise = showBaseNat' base digit (width - 1) pad False (div x base) % digit (rem x base)