-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Basics for XML/HTML representation and processing -- -- We provide basic data types for XML representation, like names, -- attributes, entities. Yes we try hard to get type safe XML handling -- out of Haskell 98. We also provide information about exceptional HTML -- tags, like self-closing tags. This package provides common -- functionality that is both needed in list (tagsoup-ht) and tree -- (wraxml) representations of XML. @package xml-basic @version 0.0.1 -- | Position in a file. -- -- Cf. to Text.ParserCombinators.Parsec.Pos module Text.XML.Basic.Position -- | Position in a file consisting of file name, row and column -- coordinates. Upper left is (0,0), but show routines can display this -- with different offsets. data T type FileName = String type Row = Int type Column = Int new :: FileName -> Row -> Column -> T initialize :: FileName -> T row :: T T Row column :: T T Column fileName :: T T FileName updateOnChar :: Char -> T -> T updateOnString :: T -> String -> T -- | Convert the file position to a format that development environments -- can understand. toReportText :: T -> String instance Eq T instance Ord T instance Show T module Text.XML.Basic.Utility -- | Needs break from utility-ht in order to be as lazy as -- updateAppend'. updateAppend :: (a -> Bool) -> a -> (a -> a) -> [a] -> [a] -- | Apply f to the first element, where p holds. If no -- such element exists, append the default value deflt to the -- list. updateAppend' :: (a -> Bool) -> a -> (a -> a) -> [a] -> [a] module Text.XML.Basic.Entity type Name = String -- | A table mapping XML entity names to code points. Although entity -- references can in principle represent more than one character, the -- standard entities only contain one character. list :: [(Name, Char)] -- | This list excludes apos as Internet Explorer does not know -- about it. listInternetExploder :: [(Name, Char)] mapNameToChar :: Map Name Char mapCharToName :: Map Char Name -- | Lookup a numeric entity, the leading '#' must have already -- been removed. -- --
--   numberToChar "65" == Success 'A'
--   numberToChar "x41" == Success 'A'
--   numberToChar "x4E" === Success 'N'
--   numberToChar "x4e" === Success 'N'
--   numberToChar "Haskell" == Exception "..."
--   numberToChar "" == Exception "..."
--   numberToChar "89439085908539082" == Exception "..."
--   
-- -- It's safe to use that for arbitrary big number strings, since we abort -- parsing as soon as possible. -- --
--   numberToChar (repeat '1') == Exception "..."
--   
numberToChar :: String -> Exceptional String Char instance Monoid (Update e a) module Text.HTML.Basic.Entity type Name = String -- | A table mapping HTML entity names to code points. Although entity -- references can in principle represent more than one character, the -- standard entities only contain one character. list :: [(Name, Char)] listInternetExploder :: [(Name, Char)] mapNameToChar :: Map Name Char mapCharToName :: Map Char Name mapCharToNameInternetExploder :: Map Char Name mapNameToUpper :: Map String String mapNameToLower :: Map String String -- | Lookup a numeric entity, the leading '#' must have already -- been removed. -- --
--   numberToChar "65" == Success 'A'
--   numberToChar "x41" == Success 'A'
--   numberToChar "x4E" === Success 'N'
--   numberToChar "x4e" === Success 'N'
--   numberToChar "Haskell" == Exception "..."
--   numberToChar "" == Exception "..."
--   numberToChar "89439085908539082" == Exception "..."
--   
-- -- It's safe to use that for arbitrary big number strings, since we abort -- parsing as soon as possible. -- --
--   numberToChar (repeat '1') == Exception "..."
--   
numberToChar :: String -> Exceptional String Char -- | We provide a type class for tag and attribute names. Instances can be -- names that preserve case, names with lowercase letters as canonical -- representation. To do: Qualified names. module Text.XML.Basic.Name class C name fromString :: (C name) => String -> name toString :: (C name) => name -> String -- | We need to distinguish between tag names and attribute names, because -- DOCTYPE as tag name must be written upper case, whereas as attribute -- name it may be written either way. class Tag ident tagFromString :: (Tag ident) => String -> ident tagToString :: (Tag ident) => ident -> String class Attribute ident attributeFromString :: (Attribute ident) => String -> ident attributeToString :: (Attribute ident) => ident -> String match :: (C name, Eq name) => String -> name -> Bool matchAny :: (C name, Eq name) => [String] -> name -> Bool module Text.XML.Basic.Format class C object run :: (C object) => object -> ShowS blank :: ShowS eq :: ShowS lt :: ShowS gt :: ShowS slash :: ShowS amp :: ShowS sharp :: ShowS colon :: ShowS semicolon :: ShowS apos :: ShowS quot :: ShowS lpar :: ShowS rpar :: ShowS exclam :: ShowS quest :: ShowS nl :: ShowS angle :: ShowS -> ShowS -- | Internet Explorer does not recognize ' and thus we have to -- format it literally. stringQuoted :: String -> ShowS name :: (C name) => name -> ShowS many :: (a -> ShowS) -> [a] -> ShowS instance (C object) => C [object] instance C Char -- | All kinds of representations of a character in XML combined in one -- type. Note that an entity can in principle represent a large text, -- thus an "XML character" might actually be a text. However the standard -- entities consist of one character. In contrast to our representation, -- HaXml uses Unicode substrings instead of Unicode characters, which is -- certainly more efficient for common XML texts that contain mainly -- Unicode text and only few references. However our representation is -- unique, whereas HaXmls may represent a text as -- abc,def or abcdef. module Text.XML.Basic.Character data T Unicode :: Char -> T CharRef :: Int -> T EntityRef :: String -> T -- | If a reference cannot be resolved then an Exception -- constructor with an error message is returned. toUnicode :: T -> Exceptional String Char toUnicodeGen :: Map String Char -> T -> Exceptional String Char -- | If a reference cannot be resolved then a reference string is returned. toUnicodeOrFormat :: T -> ShowS toUnicodeOrFormatGen :: Map String Char -> T -> ShowS fromUnicode :: Char -> T fromCharRef :: Int -> T fromEntityRef :: String -> T maybeUnicode :: T -> Maybe Char maybeCharRef :: T -> Maybe Int maybeEntityRef :: T -> Maybe String isUnicode :: T -> Bool isCharRef :: T -> Bool isEntityRef :: T -> Bool isRef :: T -> Bool unicode :: Char -> T refC :: Int -> T refE :: String -> T -- | Convert unicode character to XML Char. If there is a entity reference, -- use this. If it is ASCII, represent it as Char. Otherwise use a -- character reference. asciiFromUnicode :: Char -> T asciiFromUnicodeGen :: Map Char String -> Char -> T reduceRef :: T -> T -- | try to convert a References to equivalent Unicode characters reduceRefGen :: Map String Char -> T -> T validCharRef :: Int -> Bool switchUnicodeRuns :: (String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a] instance Eq T instance C T instance Show T module Text.HTML.Basic.Character data T Unicode :: Char -> T CharRef :: Int -> T EntityRef :: String -> T toUnicode :: T -> Exceptional String Char toUnicodeOrFormat :: T -> ShowS fromUnicode :: Char -> T fromCharRef :: Int -> T fromEntityRef :: String -> T maybeUnicode :: T -> Maybe Char maybeCharRef :: T -> Maybe Int maybeEntityRef :: T -> Maybe String isUnicode :: T -> Bool isCharRef :: T -> Bool isEntityRef :: T -> Bool isRef :: T -> Bool unicode :: Char -> T refC :: Int -> T refE :: String -> T -- | Convert unicode character to XML Char. If there is a named reference, -- use this. If it is ASCII, represent it as Char. Otherwise use a -- numeric reference. asciiFromUnicode :: Char -> T asciiFromUnicodeInternetExploder :: Char -> T reduceRef :: T -> T validCharRef :: Int -> Bool switchUnicodeRuns :: (String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a] isLower :: T -> Bool isUpper :: T -> Bool toLower :: T -> T toUpper :: T -> T module Text.XML.Basic.Attribute -- | An HTML attribute id="name" generates ("id","name") data T name string Cons :: Name name -> string -> T name string name_ :: T name string -> Name name value_ :: T name string -> string cons :: (Attribute name) => Name name -> string -> T name string new :: (Attribute name) => String -> string -> T name string lift :: (Name name -> string -> (Name name, string)) -> T name string -> T name string toPair :: (Attribute name) => T name string -> (String, string) fromPair :: (Attribute name) => (String, string) -> T name string name :: T (T name string) (Name name) value :: T (T name string) string -- | Each attribute is preceded by a space, that is there is a space -- between adjacent attributes and one leading space. formatListBlankHead :: (Attribute name, C string) => [T name string] -> ShowS mapName :: (name0 -> name1) -> T name0 string -> T name1 string newtype Name ident Name :: ident -> Name ident unname :: Name ident -> ident mapValues :: (str0 -> str1) -> ([T name str0] -> [T name str1]) mapValuesA :: (Applicative f) => (str0 -> f str1) -> ([T name str0] -> f [T name str1]) -- | Process specific attributes of an attribute list. The function name is -- inspired by Data.Map. adjustOn :: (Name name -> Bool) -> (string -> string) -> ([T name string] -> [T name string]) adjustOnA :: (Applicative f) => (Name name -> Bool) -> (string -> f string) -> ([T name string] -> f [T name string]) insert :: (Attribute name, Eq name) => Name name -> string -> ([T name string] -> [T name string]) -- | Insert an attribute into an attribute list. If an attribute with the -- same name is already present, then the value of this attribute is -- changed to f newValue oldValue. The function name is -- analogous to Data.Map. insertWith :: (Attribute name, Eq name) => (string -> string -> string) -> Name name -> string -> ([T name string] -> [T name string]) match :: (Attribute name, Eq name, Eq string) => String -> string -> T name string -> Bool -- | matchManyValues name [value0, value1] attrs checks whether -- (name, value0) or (name, value1) is contained in -- attrs. The values are handled case-sensitive. matchAnyValue :: (Attribute name, Eq name, Eq string) => String -> [string] -> T name string -> Bool lookup :: (Attribute name, Eq name) => Name name -> [T name string] -> Maybe string lookupLit :: (Attribute name, Eq name) => String -> [T name string] -> Maybe string any :: (T name string -> Bool) -> [T name string] -> Bool anyName :: (Name name -> Bool) -> [T name string] -> Bool anyValue :: (string -> Bool) -> [T name string] -> Bool anyLit :: (Eq name, Attribute name, Eq string) => String -> string -> [T name string] -> Bool anyNameLit :: (Eq name, Attribute name) => String -> [T name string] -> Bool anyValueLit :: (Eq string) => string -> [T name string] -> Bool instance (Eq ident) => Eq (Name ident) instance (Ord ident) => Ord (Name ident) instance (Eq name, Eq string) => Eq (T name string) instance (Ord name, Ord string) => Ord (T name string) instance (Attribute ident) => C (Name ident) instance (Show ident) => Show (Name ident) instance Traversable (T name) instance Foldable (T name) instance Functor (T name) instance (Attribute name, C string) => C (T name string) instance (Attribute name, Show string) => Show (T name string) module Text.XML.Basic.ProcessingInstruction data T name string Known :: [T name string] -> T name string Unknown :: String -> T name string mapName :: (name0 -> name1) -> T name0 string -> T name1 string mapAttributes :: ([T name0 string0] -> [T name1 string1]) -> T name0 string0 -> T name1 string1 mapAttributesA :: (Applicative f) => ([T name0 string0] -> f [T name1 string1]) -> T name0 string0 -> f (T name1 string1) instance (Show string, Attribute name) => Show (T name string) instance (Eq name, Eq string) => Eq (T name string) instance (Ord name, Ord string) => Ord (T name string) instance Traversable (T name) instance Foldable (T name) instance Functor (T name) instance (Attribute name, C string) => C (T name string) -- | This name type preserves the characters case of its input. This is the -- right choice for case-sensitive names (XML) or if you like to preserve -- case of HTML tags. In the latter case it is however more difficult to -- match tag names. module Text.XML.Basic.Name.MixedCase newtype T Cons :: String -> T instance Eq T instance Ord T instance Attribute T instance Tag T instance Show T -- | This name type preserves the characters case of its input and divides -- the names into namespace and local identifier. module Text.XML.Basic.Name.Qualified data T Cons :: String -> String -> T namespace_ :: T -> String local_ :: T -> String namespace :: T T String local :: T T String fromString :: String -> T toString :: T -> String instance Show T instance Eq T instance Ord T instance Attribute T instance Tag T -- | We do not define a tag data type here, since this is too much bound to -- the particular use (e.g. list or tree structure). However we define a -- tag name and some special names. module Text.XML.Basic.Tag newtype Name ident Name :: ident -> Name ident unname :: Name ident -> ident doctype :: (Tag name) => Name name doctypeString :: String cdata :: (Tag name) => Name name cdataString :: String instance (Eq ident) => Eq (Name ident) instance (Ord ident) => Ord (Name ident) instance (Tag ident) => C (Name ident) instance (Show ident) => Show (Name ident) -- | This name uses only lowercase characters as canonical representation, -- except for DOCTYPE and CDATA. This is optimal for -- processing HTML which is case-insensitiv. module Text.XML.Basic.Name.LowerCase newtype T Cons :: String -> T instance Eq T instance Ord T instance Attribute T instance Tag T instance Show T -- | We do not define a tag data type here, since this is too much bound to -- the particular use (e.g. list or tree structure). However we define a -- tag name and several module Text.HTML.Basic.Tag newtype Name ident Name :: ident -> Name ident unname :: Name ident -> ident doctype :: (Tag name) => Name name doctypeString :: String cdata :: (Tag name) => Name name cdataString :: String -- | Check whether a HTML tag is empty. isEmpty :: (Tag name, Ord name) => Name name -> Bool -- | Some tags, namely those for text styles like FONT, B, I, are used -- quite sloppily. That is, they are not terminated or not terminated in -- the right order. We close them implicitly, if another tag closes and -- ignore non-matching closing tags. isSloppy :: (Tag name, Ord name) => Name name -> Bool isInnerOf :: (Tag name, Ord name) => Name name -> Name name -> Bool closes :: (Tag name, Ord name) => Name name -> Name name -> Bool