-- | -- Common useful options -- -- Version : $Id: XmlOptions.hs,v 1.1 2006/11/09 20:27:42 hxml Exp $ -- -- module Text.XML.HXT.DOM.XmlOptions ( inputOptions , relaxOptions , outputOptions , generalOptions , versionOptions , showOptions , selectOptions , removeOptions , optionIsSet , isTrueValue ) where import Text.XML.HXT.DOM.XmlKeywords import Text.XML.HXT.DOM.TypeDefs import Data.Maybe import System.Console.GetOpt -- ------------------------------------------------------------ -- -- | -- commonly useful options for XML input -- -- can be used for option definition with haskell getopt -- -- defines options: 'a_trace', 'a_proxy', 'a_use_curl', 'a_do_not_use_curl', 'a_options_curl', 'a_encoding', -- 'a_issue_errors', 'a_do_not_issue_errors', 'a_parse_html', 'a_parse_by_mimetype', 'a_tagsoup' 'a_issue_warnings', 'a_do_not_issue_warnings', -- 'a_parse_xml', 'a_validate', 'a_do_not_validate', 'a_canonicalize', 'a_do_not_canonicalize', --- 'a_preserve_comment', 'a_do_not_preserve_comment', 'a_check_namespaces', 'a_do_not_check_namespaces', -- 'a_remove_whitespace', 'a_do_not_remove_whitespace' inputOptions :: [OptDescr (String, String)] inputOptions = [ Option "t" [a_trace] (OptArg trc "LEVEL") "trace level (0-4), default 1" , Option "p" [a_proxy] (ReqArg (att a_proxy) "PROXY") "proxy for http access (e.g. \"www-cache:3128\")" , Option "" [a_redirect] (NoArg (att a_redirect v_1)) "automatically follow redirected URIs" , Option "" [a_no_redirect] (NoArg (att a_redirect v_0)) "switch off following redirected URIs" , Option "" [a_use_curl] (NoArg (att a_use_curl v_1)) "obsolete, since hxt-8.1 HTTP access is always done with curl bindings" , Option "" [a_do_not_use_curl] (NoArg (att a_use_curl v_0)) "obsolete, since hxt-8.1 HTTP access is always done with curl bindings" , Option "" [a_options_curl] (ReqArg (att a_options_curl) "STR") "additional curl options, e.g. for timeout, ..." , Option "" [a_default_baseuri] (ReqArg (att transferURI) "URI") "default base URI, default: \"file:////\"" , Option "e" [a_encoding] (ReqArg (att a_encoding) "CHARSET") ( "default document encoding (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ", ...)" ) , Option "" [a_mime_types] (ReqArg (att a_mime_types) "FILE") "set mime type configuration file, e.g. \"/etc/mime.types\"" , Option "" [a_issue_errors] (NoArg (att a_issue_errors v_1)) "issue all errorr messages on stderr (default)" , Option "" [a_do_not_issue_errors] (NoArg (att a_issue_errors v_0)) "ignore all error messages" , Option "" [a_ignore_encoding_errors] (NoArg (att a_ignore_encoding_errors v_1)) "ignore encoding errors" , Option "" [a_ignore_none_xml_contents] (NoArg (att a_ignore_none_xml_contents v_1)) "discards all contents of none XML/HTML documents, only the meta info remains in the doc tree" , Option "" [a_accept_mimetypes] (ReqArg (att a_accept_mimetypes) "MIMETYPES") "only accept documents matching the given list of mimetype specs" , Option "H" [a_parse_html] (NoArg (att a_parse_html v_1)) "parse input as HTML, try to interprete everything as HTML, no validation" , Option "M" [a_parse_by_mimetype] (NoArg (att a_parse_by_mimetype v_1)) "parse dependent on mime type: text/html as HTML, text/xml and text/xhtml and others as XML, else no parse" , Option "" [a_parse_xml] (NoArg (att a_parse_html v_0)) "parse input as XML, (default)" , Option "" [a_strict_input] (NoArg (att a_strict_input v_1)) "read input files strictly, this ensures closing the files correctly even if not read completely" , Option "T" [a_tagsoup] (NoArg (att a_tagsoup v_1)) "lazy tagsoup parser, for HTML and XML, no DTD, no validation, no PIs, only XHTML entityrefs" , Option "" [a_issue_warnings] (NoArg (att a_issue_warnings v_1)) "issue warnings, when parsing HTML (default)" , Option "Q" [a_do_not_issue_warnings] (NoArg (att a_issue_warnings v_0)) "ignore warnings, when parsing HTML" , Option "" [a_validate] (NoArg (att a_validate v_1)) "document validation when parsing XML (default)" , Option "w" [a_do_not_validate] (NoArg (att a_validate v_0)) "only wellformed check, no validation" , Option "" [a_canonicalize] (NoArg (att a_canonicalize v_1)) "canonicalize document, remove DTD, comment, transform CDATA, CharRef's, ... (default)" , Option "c" [a_do_not_canonicalize] (NoArg (att a_canonicalize v_0)) "do not canonicalize document, don't remove DTD, comment, don't transform CDATA, CharRef's, ..." , Option "C" [a_preserve_comment] (NoArg (att a_preserve_comment v_1)) "don't remove comments during canonicalisation" , Option "" [a_do_not_preserve_comment] (NoArg (att a_preserve_comment v_0)) "remove comments during canonicalisation (default)" , Option "n" [a_check_namespaces] (NoArg (att a_check_namespaces v_1)) "tag tree with namespace information and check namespaces" , Option "" [a_do_not_check_namespaces] (NoArg (att a_check_namespaces v_0)) "ignore namespaces (default)" , Option "r" [a_remove_whitespace] (NoArg (att a_remove_whitespace v_1)) "remove redundant whitespace, simplifies tree and processing" , Option "" [a_do_not_remove_whitespace] (NoArg (att a_remove_whitespace v_0)) "don't remove redundant whitespace (default)" ] where att n v = (n, v) trc = att a_trace . show . max 0 . min 9 . (read :: String -> Int) . ('0':) . filter (`elem` "0123456789") . fromMaybe v_1 -- | available Relax NG validation options -- -- defines options -- 'a_check_restrictions', 'a_validate_externalRef', 'a_validate_include', 'a_do_not_check_restrictions', -- 'a_do_not_validate_externalRef', 'a_do_not_validate_include' relaxOptions :: [OptDescr (String, String)] relaxOptions = [ Option "X" [a_relax_schema] (ReqArg (att a_relax_schema) "SCHEMA") "validation with Relax NG, SCHEMA is the URI for the Relax NG schema" , Option "" [a_check_restrictions] (NoArg (a_check_restrictions, v_1)) "check Relax NG schema restrictions during schema simplification (default)" , Option "" [a_do_not_check_restrictions] (NoArg (a_check_restrictions, v_0)) "do not check Relax NG schema restrictions" , Option "" [a_validate_externalRef] (NoArg (a_validate_externalRef, v_1)) "validate a Relax NG schema referenced by a externalRef-Pattern (default)" , Option "" [a_do_not_validate_externalRef] (NoArg (a_validate_externalRef, v_0)) "do not validate a Relax NG schema referenced by an externalRef-Pattern" , Option "" [a_validate_include] (NoArg (a_validate_include, v_1)) "validate a Relax NG schema referenced by an include-Pattern (default)" , Option "" [a_do_not_validate_include] (NoArg (a_validate_include, v_0)) "do not validate a Relax NG schema referenced by an include-Pattern" {- , Option "" [a_output_changes] (NoArg (a_output_changes, v_1)) "output Pattern transformations in case of an error" , Option "" [a_do_not_collect_errors] (NoArg (a_do_not_collect_errors, v_1)) "stop Relax NG simplification after the first error has occurred" -} ] where att n v = (n, v) -- | -- commonly useful options for XML output -- -- defines options: 'a_indent', 'a_output_encoding', 'a_output_file', 'a_output_html' outputOptions :: [OptDescr (String, String)] outputOptions = [ Option "i" [a_indent] (NoArg (att a_indent v_1)) "indent XML output for readability" , Option "o" [a_output_encoding] (ReqArg (att a_output_encoding) "CHARSET") ( "encoding of output (" ++ utf8 ++ ", " ++ isoLatin1 ++ ", " ++ usAscii ++ ")" ) , Option "f" [a_output_file] (ReqArg (att a_output_file) "FILE") "output file for resulting document (default: stdout)" , Option "" [a_output_html] (NoArg (att a_output_html v_1)) "output of none ASCII chars as HTMl entity references" , Option "" [a_no_xml_pi] (NoArg (att a_no_xml_pi v_1)) ("output without processing instruction, useful in combination with --" ++ show a_output_html) , Option "" [a_output_xhtml] (NoArg (att a_output_xhtml v_1)) "output of HTML elements with empty content (script, ...) done in format instead of " , Option "" [a_no_empty_elem_for] (ReqArg (att a_no_empty_elem_for) "NAMES") "output of empty elements done in format only for given list of element names" , Option "" [a_no_empty_elements] (NoArg (att a_no_empty_elements v_1)) "output of empty elements done in format instead of " , Option "" [a_add_default_dtd] (NoArg (att a_add_default_dtd v_1)) "add the document type declaration given in the input document" ] where att n v = (n, v) -- | -- commonly useful options -- -- defines options: 'a_verbose', 'a_help' generalOptions :: [OptDescr (String, String)] generalOptions = [ Option "v" [a_verbose] (NoArg (a_verbose, v_1)) "verbose output" , Option "h?" [a_help] (NoArg (a_help, v_1)) "this message" ] -- | -- defines 'a_version' option versionOptions :: [OptDescr (String, String)] versionOptions = [ Option "V" [a_version] (NoArg (a_version, v_1)) "show program version" ] -- | -- debug output options showOptions :: [OptDescr (String, String)] showOptions = [ Option "" [a_show_tree] (NoArg (a_show_tree, v_1)) "output tree representation instead of document source" , Option "" [a_show_haskell] (NoArg (a_show_haskell, v_1)) "output internal Haskell representation instead of document source" ] -- ------------------------------------------------------------ -- | -- select options from a predefined list of option desciptions selectOptions :: [String] -> [OptDescr (String, String)] -> [OptDescr (String, String)] selectOptions ol os = concat . map (\ on -> filter (\ (Option _ ons _ _) -> on `elem` ons) os) $ ol removeOptions :: [String] -> [OptDescr (String, String)] -> [OptDescr (String, String)] removeOptions ol os = filter (\ (Option _ ons _ _) -> not . any (`elem` ol) $ ons ) os -- | -- check whether an option is set -- -- reads the value of an attribute, usually applied to a document root node, -- and checks if the value represents True. The following strings are interpreted -- as true: \"1\", \"True\", \"true\", \"yes\", \"Yes\". optionIsSet :: String -> Attributes -> Bool optionIsSet n = isTrueValue . lookupDef "" n -- | check whether a string represents True -- -- definition: -- -- > isTrueValue = (`elem` ["1", "True", "true", "Yes", "yes"]) isTrueValue :: String -> Bool isTrueValue = (`elem` ["1", "True", "true", "Yes", "yes"]) -- ------------------------------------------------------------