module XmlToJson(xmlToJson, Flag(..)) where
import Control.Applicative ((*>), (<*))
import Control.Arrow (first, (&&&), (***), (>>>))
import Control.Arrow.ArrowTree (ArrowTree)
import Control.Category (id)
import Control.Monad (forM_)
import Data.Maybe (catMaybes)
import Data.Tree.NTree.TypeDefs
import Data.Tree.Class (Tree)
import Prelude hiding (id)
import Text.Regex.Posix ((=~))
import Text.XML.HXT.Core (ArrowXml, XNode (..), XmlTree,
deep, getAttrl, getChildren,
getName, getText, hasName, isElem,
localPart, no, readDocument, runLA,
runX, withValidate)
#ifdef UseCurl
import Text.XML.HXT.Curl
#endif
import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy.Char8 as BS
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as Vector
import Text.XML.HXT.Expat (withExpat)
data Flag = Input String | StartFrom String | Multiline | SkipRoots | NoIgnoreNulls | WrapArray | NoCollapseText String | ShowHelp
deriving (Show, Eq)
getStartNodes :: ArrowXml cat => [Flag] -> cat (NTree XNode) XmlTree
getStartNodes flags =
case [x | StartFrom x <- flags] of
[] -> getChildren >>> isElem
[x] -> deep (isElem >>> hasName x)
_ -> error "Expecting at most one --tag-name (-t) option"
getCollapseTextRegex :: [Flag] -> Maybe String
getCollapseTextRegex flags = singleOrNothing "Expecting at most one --no-collapse-text option" [x | NoCollapseText x <- flags]
wrapAction :: [Flag] -> IO a -> IO a
wrapAction flags act
| WrapArray `elem` flags = putStr "[" *> act <* putStr "]"
| otherwise = act
multiline :: [Flag] -> [BS.ByteString] -> BS.ByteString
multiline flags = case (WrapArray `elem` flags, Multiline `elem` flags) of
(False, _) -> BS.intercalate (BS.pack "\n")
(True, False) -> BS.intercalate (BS.pack ",")
(True, True) -> BS.intercalate (BS.pack ",\n")
ignoreNulls :: [Flag] -> [Aeson.Value] -> [Aeson.Value]
ignoreNulls flags
| NoIgnoreNulls `notElem` flags =
filter (/= Aeson.Null)
| otherwise = id
nodesFilter :: (Data.Tree.Class.Tree t, Control.Arrow.ArrowTree.ArrowTree a) => [Flag] -> a (t b) (t b)
nodesFilter flags
| SkipRoots `elem` flags = getChildren
| otherwise = id
xmlToJson :: [Flag] -> [String] -> IO ()
xmlToJson flags inputFiles =
forM_ inputFiles $ \src -> do
rootElems <-
runX $
readDocument
[ withValidate no
, withExpat True
#ifdef UseCurl
, withCurl []
#endif
]
src
>>> getStartNodes flags
>>> nodesFilter flags
wrapAction flags
. BS.putStr . multiline flags
. map Aeson.encode
. ignoreNulls flags
. map (wrapRoot . xmlTreeToJSON (getCollapseTextRegex flags))
$ rootElems
data JSValueName = Text | Tag String | Attr String
deriving (Eq, Ord, Show)
concatMapValues :: (Ord k) => [M.Map k v] -> M.Map k [v]
concatMapValues = M.unionsWith (++) . (fmap . fmap) (: [])
getAttrVals :: XmlTree -> [(String, String)]
getAttrVals = runLA (getAttrl >>> getName &&& (getChildren >>> getText))
arrayValuesToJSONArrays :: (Ord k) => M.Map k [Aeson.Value] -> M.Map k Aeson.Value
arrayValuesToJSONArrays = M.mapMaybe f
where
f [] = Nothing
f [x] = Just x
f xss = Just $ Aeson.Array . Vector.fromList $ xss
packJSValueName :: JSValueName -> T.Text
packJSValueName Text = T.pack "value"
packJSValueName (Attr x) = T.pack x
packJSValueName (Tag x) = T.pack x
wrapRoot :: Maybe (JSValueName, Aeson.Value) -> Aeson.Value
wrapRoot Nothing = Aeson.Null
wrapRoot (Just (a, b)) = Aeson.object [(packJSValueName a, b)]
tagMapToJSValue :: Bool -> M.Map JSValueName Aeson.Value -> Aeson.Value
tagMapToJSValue collapseTextRegex m = case (collapseTextRegex, M.toList m) of
(True, [(Text, val)]) -> val
_ ->
Aeson.Object . HashMap.fromList . (map . first) packJSValueName $ M.toList m
xmlTreeToJSON :: Maybe String -> XmlTree -> Maybe (JSValueName, Aeson.Value)
xmlTreeToJSON collapseTextRegex node@(NTree (XTag qName _) children)
= Just (Tag (localPart qName),
tagMapToJSValue shouldCollapseText objMap)
where
objMap =
arrayValuesToJSONArrays
. concatMapValues
. map (uncurry M.singleton)
. (++) attrVals
. catMaybes
$ map (xmlTreeToJSON collapseTextRegex) children
attrVals =
map (Attr *** Aeson.String . T.pack) $ getAttrVals node
shouldCollapseText = case collapseTextRegex of
Nothing -> True
Just "" -> False
Just pattern -> not $ localPart qName =~ pattern
xmlTreeToJSON _ (NTree (XText str) _)
| T.null text = Nothing
| otherwise = Just (Text, Aeson.String text)
where
text = T.strip $ T.pack str
xmlTreeToJSON _ _ = Nothing
singleOrNothing :: String -> [a] -> Maybe a
singleOrNothing _ [] = Nothing
singleOrNothing _ [x] = Just x
singleOrNothing msg _ = error msg