-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Basics for XML/HTML representation and processing -- -- We provide basic data types for XML representation, like names, -- attributes, entities. Yes we try hard to get type safe XML handling -- out of Haskell 98. We also provide information about exceptional HTML -- tags, like self-closing tags. This package provides common -- functionality that is both needed in list (tagchup) and tree (wraxml) -- representations of XML. @package xml-basic @version 0.1.3.2 module Text.XML.Basic.Entity type Name = String -- | A table mapping XML entity names to code points. Although entity -- references can in principle represent more than one character, the -- standard entities only contain one character. list :: [(Name, Char)] -- | This list excludes apos as Internet Explorer does not know -- about it. listInternetExploder :: [(Name, Char)] mapNameToChar :: Map Name Char mapCharToName :: Map Char Name -- | Lookup a numeric entity, the leading '#' must have already -- been removed. -- --
-- numberToChar "65" == Success 'A' -- numberToChar "x41" == Success 'A' -- numberToChar "x4E" === Success 'N' -- numberToChar "x4e" === Success 'N' -- numberToChar "Haskell" == Exception "..." -- numberToChar "" == Exception "..." -- numberToChar "89439085908539082" == Exception "..." ---- -- It's safe to use that for arbitrary big number strings, since we abort -- parsing as soon as possible. -- --
-- numberToChar (repeat '1') == Exception "..." --numberToChar :: String -> Exceptional String Char instance GHC.Base.Semigroup (Text.XML.Basic.Entity.Update e a) instance GHC.Base.Monoid (Text.XML.Basic.Entity.Update e a) module Text.HTML.Basic.Entity type Name = String -- | A table mapping HTML entity names to code points. Although entity -- references can in principle represent more than one character, the -- standard entities only contain one character. list :: [(Name, Char)] listInternetExploder :: [(Name, Char)] mapNameToChar :: Map Name Char mapCharToName :: Map Char Name mapCharToNameInternetExploder :: Map Char Name mapNameToUpper :: Map String String mapNameToLower :: Map String String -- | Lookup a numeric entity, the leading '#' must have already -- been removed. -- --
-- numberToChar "65" == Success 'A' -- numberToChar "x41" == Success 'A' -- numberToChar "x4E" === Success 'N' -- numberToChar "x4e" === Success 'N' -- numberToChar "Haskell" == Exception "..." -- numberToChar "" == Exception "..." -- numberToChar "89439085908539082" == Exception "..." ---- -- It's safe to use that for arbitrary big number strings, since we abort -- parsing as soon as possible. -- --
-- numberToChar (repeat '1') == Exception "..." --numberToChar :: String -> Exceptional String Char -- | We provide a type class for tag and attribute names. Instances can be -- names that preserve case, names with lowercase letters as canonical -- representation. module Text.XML.Basic.Name class Ord name => C name fromString :: C name => String -> name toString :: C name => name -> String -- | We need to distinguish between tag names and attribute names, because -- DOCTYPE as tag name must be written upper case, whereas as attribute -- name it may be written either way. class Ord ident => Tag ident tagFromString :: Tag ident => String -> ident tagToString :: Tag ident => ident -> String class Ord ident => Attribute ident attributeFromString :: Attribute ident => String -> ident attributeToString :: Attribute ident => ident -> String match :: C name => String -> name -> Bool matchAny :: C name => [String] -> name -> Bool module Text.XML.Basic.Format class C object run :: C object => object -> ShowS nl :: ShowS blank :: ShowS eq :: ShowS lt :: ShowS gt :: ShowS slash :: ShowS amp :: ShowS sharp :: ShowS colon :: ShowS semicolon :: ShowS apos :: ShowS quot :: ShowS lpar :: ShowS rpar :: ShowS exclam :: ShowS quest :: ShowS angle :: ShowS -> ShowS -- | Internet Explorer does not recognize ' and thus we have to -- format it literally. stringQuoted :: String -> ShowS name :: C name => name -> ShowS many :: (a -> ShowS) -> [a] -> ShowS instance Text.XML.Basic.Format.C GHC.Types.Char instance Text.XML.Basic.Format.C object => Text.XML.Basic.Format.C [object] -- | All kinds of representations of a character in XML combined in one -- type. Note that an entity can in principle represent a large text, -- thus an "XML character" might actually be a text. However the standard -- entities consist of one character. In contrast to our representation, -- HaXml uses Unicode substrings instead of Unicode characters, which is -- certainly more efficient for common XML texts that contain mainly -- Unicode text and only few references. However our representation is -- unique, whereas HaXmls may represent a text as "abc","def" or -- "abcdef". module Text.XML.Basic.Character data T Unicode :: Char -> T CharRef :: Int -> T EntityRef :: String -> T -- | If a reference cannot be resolved then an Exception -- constructor with an error message is returned. toUnicode :: T -> Exceptional String Char toUnicodeGen :: Map String Char -> T -> Exceptional String Char -- | If a reference cannot be resolved then a reference string is returned. toUnicodeOrFormat :: T -> ShowS toUnicodeOrFormatGen :: Map String Char -> T -> ShowS fromUnicode :: Char -> T fromCharRef :: Int -> T fromEntityRef :: String -> T maybeUnicode :: T -> Maybe Char maybeCharRef :: T -> Maybe Int maybeEntityRef :: T -> Maybe String isUnicode :: T -> Bool isCharRef :: T -> Bool isEntityRef :: T -> Bool isRef :: T -> Bool unicode :: Char -> T refC :: Int -> T refE :: String -> T -- | Convert unicode character to XML Char, where Unicode constructor is -- only used for ASCII characters. This is achieved by the following -- decision: If there is a entity reference, use this. If it is ASCII, -- represent it as Char. Otherwise use a character reference. asciiFromUnicode :: Char -> T asciiFromUnicodeGen :: Map Char String -> Char -> T -- | Generate XML character from Unicode character with minimal use of -- references. The only references used are the XML entity references -- ', ", &, -- <, >. minimalRefFromUnicode :: Char -> T -- | Reduce the use of references. Represent as much as possible characters -- as Unicode characters, that is, using the Unicode constructor. reduceRef :: T -> T -- | try to convert a References to equivalent Unicode characters reduceRefGen :: Map String Char -> T -> T validCharRef :: Int -> Bool switchUnicodeRuns :: (String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a] instance GHC.Classes.Eq Text.XML.Basic.Character.T instance GHC.Show.Show Text.XML.Basic.Character.T instance Text.XML.Basic.Format.C Text.XML.Basic.Character.T module Text.HTML.Basic.Character data T Unicode :: Char -> T CharRef :: Int -> T EntityRef :: String -> T toUnicode :: T -> Exceptional String Char toUnicodeOrFormat :: T -> ShowS fromUnicode :: Char -> T fromCharRef :: Int -> T fromEntityRef :: String -> T maybeUnicode :: T -> Maybe Char maybeCharRef :: T -> Maybe Int maybeEntityRef :: T -> Maybe String isUnicode :: T -> Bool isCharRef :: T -> Bool isEntityRef :: T -> Bool isRef :: T -> Bool unicode :: Char -> T refC :: Int -> T refE :: String -> T -- | Convert unicode character to XML Char. If there is a named reference, -- use this. If it is ASCII, represent it as Char. Otherwise use a -- numeric reference. asciiFromUnicode :: Char -> T asciiFromUnicodeInternetExploder :: Char -> T -- | Generate XML character from Unicode character with minimal use of -- references. The only references used are the XML entity references -- ', ", &, -- <, >. minimalRefFromUnicode :: Char -> T reduceRef :: T -> T validCharRef :: Int -> Bool switchUnicodeRuns :: (String -> a) -> (Int -> a) -> (String -> a) -> [T] -> [a] isLower :: T -> Bool isUpper :: T -> Bool toLower :: T -> T toUpper :: T -> T -- | This name type preserves the characters case of its input. This is the -- right choice for case-sensitive names (XML) or if you like to preserve -- case of HTML tags. In the latter case it is however more difficult to -- match tag names. module Text.XML.Basic.Name.MixedCase newtype T Cons :: String -> T instance GHC.Classes.Ord Text.XML.Basic.Name.MixedCase.T instance GHC.Classes.Eq Text.XML.Basic.Name.MixedCase.T instance GHC.Show.Show Text.XML.Basic.Name.MixedCase.T instance Text.XML.Basic.Name.Tag Text.XML.Basic.Name.MixedCase.T instance Text.XML.Basic.Name.Attribute Text.XML.Basic.Name.MixedCase.T -- | This name type preserves the characters case of its input and divides -- the names into namespace and local identifier. module Text.XML.Basic.Name.Qualified data T Cons :: String -> T [namespace_, local_] :: T -> String namespace :: T T String local :: T T String fromString :: String -> T toString :: T -> String instance GHC.Classes.Ord Text.XML.Basic.Name.Qualified.T instance GHC.Classes.Eq Text.XML.Basic.Name.Qualified.T instance GHC.Show.Show Text.XML.Basic.Name.Qualified.T instance Text.XML.Basic.Name.Tag Text.XML.Basic.Name.Qualified.T instance Text.XML.Basic.Name.Attribute Text.XML.Basic.Name.Qualified.T -- | Position in a file. -- -- Cf. to Text.ParserCombinators.Parsec.Pos module Text.XML.Basic.Position -- | Position in a file consisting of file name, row and column -- coordinates. Upper left is (0,0), but show routines can display this -- with different offsets. data T type FileName = String type Row = Int type Column = Int new :: FileName -> Row -> Column -> T initialize :: FileName -> T row :: T T Row column :: T T Column fileName :: T T FileName updateOnChar :: Char -> T -> T updateOnString :: T -> String -> T -- | Convert the file position to a format that development environments -- can understand. toReportText :: T -> String instance GHC.Classes.Ord Text.XML.Basic.Position.T instance GHC.Classes.Eq Text.XML.Basic.Position.T instance GHC.Show.Show Text.XML.Basic.Position.T module Text.XML.Basic.String -- | should be [Word8] type Encoded = String -- | Decode encoded characters and XML references. Invalid references are -- silently skipped. decode :: (Encoded -> String) -> [T] -> String decodeGen :: Map Name Char -> (Encoded -> String) -> [T] -> String module Text.HTML.Basic.String -- | should be [Word8] type Encoded = String -- | Invalid references are silently skipped. decode :: (Encoded -> String) -> [T] -> String module Text.XML.Basic.Utility -- | Needs break from utility-ht in order to be as lazy as -- updateAppend'. updateAppend :: (a -> Bool) -> a -> (a -> a) -> [a] -> [a] -- | Apply f to the first element, where p holds. If no -- such element exists, append the default value deflt to the -- list. updateAppend' :: (a -> Bool) -> a -> (a -> a) -> [a] -> [a] module Text.XML.Basic.Attribute -- | An HTML attribute id="name" generates ("id","name") data T name string Cons :: Name name -> string -> T name string [name_] :: T name string -> Name name [value_] :: T name string -> string cons :: Attribute name => Name name -> string -> T name string new :: Attribute name => String -> string -> T name string lift :: (Name name -> string -> (Name name, string)) -> T name string -> T name string toPair :: Attribute name => T name string -> (String, string) fromPair :: Attribute name => (String, string) -> T name string name :: T (T name string) (Name name) value :: T (T name string) string -- | Each attribute is preceded by a space, that is there is a space -- between adjacent attributes and one leading space. formatListBlankHead :: (Attribute name, C string) => [T name string] -> ShowS mapName :: (Name name0 -> Name name1) -> T name0 string -> T name1 string newtype Name ident Name :: ident -> Name ident [unname] :: Name ident -> ident versionName :: Attribute name => Name name encodingName :: Attribute name => Name name versionString :: String encodingString :: String mapValues :: (str0 -> str1) -> [T name str0] -> [T name str1] mapValuesA :: Applicative f => (str0 -> f str1) -> [T name str0] -> f [T name str1] -- | Process specific attributes of an attribute list. The function name is -- inspired by Data.Map. adjustOn :: (Name name -> Bool) -> (string -> string) -> [T name string] -> [T name string] adjustOnA :: Applicative f => (Name name -> Bool) -> (string -> f string) -> [T name string] -> f [T name string] insert :: Attribute name => Name name -> string -> [T name string] -> [T name string] -- | Insert an attribute into an attribute list. If an attribute with the -- same name is already present, then the value of this attribute is -- changed to f newValue oldValue. The function name is -- analogous to Data.Map. insertWith :: Attribute name => (string -> string -> string) -> Name name -> string -> [T name string] -> [T name string] match :: (Attribute name, Eq string) => String -> string -> T name string -> Bool -- | matchManyValues name [value0, value1] attrs checks whether -- (name, value0) or (name, value1) is contained in -- attrs. The values are handled case-sensitive. matchAnyValue :: (Attribute name, Eq string) => String -> [string] -> T name string -> Bool lookup :: Attribute name => Name name -> [T name string] -> Maybe string lookupLit :: Attribute name => String -> [T name string] -> Maybe string any :: (T name string -> Bool) -> [T name string] -> Bool anyName :: (Name name -> Bool) -> [T name string] -> Bool anyValue :: (string -> Bool) -> [T name string] -> Bool anyLit :: (Attribute name, Eq string) => String -> string -> [T name string] -> Bool anyNameLit :: Attribute name => String -> [T name string] -> Bool anyValueLit :: Eq string => string -> [T name string] -> Bool instance GHC.Classes.Ord ident => GHC.Classes.Ord (Text.XML.Basic.Attribute.Name ident) instance GHC.Classes.Eq ident => GHC.Classes.Eq (Text.XML.Basic.Attribute.Name ident) instance (GHC.Classes.Ord name, GHC.Classes.Ord string) => GHC.Classes.Ord (Text.XML.Basic.Attribute.T name string) instance (GHC.Classes.Eq name, GHC.Classes.Eq string) => GHC.Classes.Eq (Text.XML.Basic.Attribute.T name string) instance (Text.XML.Basic.Name.Attribute name, GHC.Show.Show string) => GHC.Show.Show (Text.XML.Basic.Attribute.T name string) instance (Text.XML.Basic.Name.Attribute name, Text.XML.Basic.Format.C string) => Text.XML.Basic.Format.C (Text.XML.Basic.Attribute.T name string) instance GHC.Base.Functor (Text.XML.Basic.Attribute.T name) instance Data.Foldable.Foldable (Text.XML.Basic.Attribute.T name) instance Data.Traversable.Traversable (Text.XML.Basic.Attribute.T name) instance GHC.Show.Show ident => GHC.Show.Show (Text.XML.Basic.Attribute.Name ident) instance Text.XML.Basic.Name.Attribute ident => Text.XML.Basic.Name.C (Text.XML.Basic.Attribute.Name ident) module Text.XML.Basic.ProcessingInstruction data T name string Known :: [T name string] -> T name string Unknown :: String -> T name string mapName :: (Name name0 -> Name name1) -> T name0 string -> T name1 string mapAttributes :: ([T name0 string0] -> [T name1 string1]) -> T name0 string0 -> T name1 string1 mapAttributesA :: Applicative f => ([T name0 string0] -> f [T name1 string1]) -> T name0 string0 -> f (T name1 string1) instance (GHC.Classes.Ord name, GHC.Classes.Ord string) => GHC.Classes.Ord (Text.XML.Basic.ProcessingInstruction.T name string) instance (GHC.Classes.Eq name, GHC.Classes.Eq string) => GHC.Classes.Eq (Text.XML.Basic.ProcessingInstruction.T name string) instance (Text.XML.Basic.Name.Attribute name, GHC.Show.Show string) => GHC.Show.Show (Text.XML.Basic.ProcessingInstruction.T name string) instance (Text.XML.Basic.Name.Attribute name, Text.XML.Basic.Format.C string) => Text.XML.Basic.Format.C (Text.XML.Basic.ProcessingInstruction.T name string) instance GHC.Base.Functor (Text.XML.Basic.ProcessingInstruction.T name) instance Data.Foldable.Foldable (Text.XML.Basic.ProcessingInstruction.T name) instance Data.Traversable.Traversable (Text.XML.Basic.ProcessingInstruction.T name) -- | We do not define a tag data type here, since this is too much bound to -- the particular use (e.g. list or tree structure). However we define a -- tag name and some special names. module Text.XML.Basic.Tag newtype Name ident Name :: ident -> Name ident [unname] :: Name ident -> ident -- | Deprecated: use doctypeName instead doctype :: Tag name => Name name doctypeName :: Tag name => Name name doctypeString :: String -- | Deprecated: use cdataName instead cdata :: Tag name => Name name cdataName :: Tag name => Name name cdataString :: String xmlName :: Tag name => Name name xmlString :: String maybeXMLEncoding :: (Tag name, Attribute name) => Name name -> T name string -> Maybe string instance GHC.Classes.Ord ident => GHC.Classes.Ord (Text.XML.Basic.Tag.Name ident) instance GHC.Classes.Eq ident => GHC.Classes.Eq (Text.XML.Basic.Tag.Name ident) instance GHC.Show.Show ident => GHC.Show.Show (Text.XML.Basic.Tag.Name ident) instance Text.XML.Basic.Name.Tag ident => Text.XML.Basic.Name.C (Text.XML.Basic.Tag.Name ident) -- | This name uses only lowercase characters as canonical representation, -- except for DOCTYPE and CDATA. This is optimal for -- processing HTML which is case-insensitiv. module Text.XML.Basic.Name.LowerCase newtype T Cons :: String -> T instance GHC.Classes.Ord Text.XML.Basic.Name.LowerCase.T instance GHC.Classes.Eq Text.XML.Basic.Name.LowerCase.T instance GHC.Show.Show Text.XML.Basic.Name.LowerCase.T instance Text.XML.Basic.Name.Tag Text.XML.Basic.Name.LowerCase.T instance Text.XML.Basic.Name.Attribute Text.XML.Basic.Name.LowerCase.T -- | We do not define a tag data type here, since this is too much bound to -- the particular use (e.g. list or tree structure). However we define a -- tag name and several module Text.HTML.Basic.Tag newtype Name ident Name :: ident -> Name ident [unname] :: Name ident -> ident -- | Deprecated: use doctypeName instead doctype :: Tag name => Name name doctypeName :: Tag name => Name name doctypeString :: String -- | Deprecated: use cdataName instead cdata :: Tag name => Name name cdataName :: Tag name => Name name cdataString :: String -- | Check whether a HTML tag is empty. isEmpty :: Tag name => Name name -> Bool -- | Some tags, namely those for text styles like FONT, B, I, are used -- quite sloppily. That is, they are not terminated or not terminated in -- the right order. We close them implicitly, if another tag closes and -- ignore non-matching closing tags. isSloppy :: Tag name => Name name -> Bool isInnerOf :: Tag name => Name name -> Name name -> Bool closes :: Tag name => Name name -> Name name -> Bool maybeMetaHTTPHeader :: (Tag name, Attribute name) => Name name -> [T name string] -> Maybe (string, string) -- | A simple routine that does not check for valid syntax of the -- Content-Type specification. -- -- In future we might use a distinct Encoding type instead of -- plain String. maybeMetaEncoding :: (Tag name, Attribute name) => Name name -> [T name String] -> Maybe String maybeMetaCharset :: (Tag name, Attribute name) => Name name -> [T name string] -> Maybe string -- | Extract charset from Content-Type declaration. -- --
-- encodingFromContentType "text/html; charset=UTF-8" == "utf-8" ---- -- The routine does not perform any syntax check. encodingFromContentType :: String -> String