-- | Functions for parsing, processing and visualization of taxonomy data. -- -- === Usage example: -- * Read in taxonomy data -- -- > eitherTaxtree <- readNamedTaxonomy "/path/to/NCBI_taxonomydump_directory" -- -- * Process data -- -- > let subtree = extractTaxonomySubTreebyLevel [562] (fromRight eitherTaxTree) (Just 4) -- -- * Visualize result -- -- tput "/path/to/dotdirectory" subtree module Bio.Taxonomy ( -- * Datatypes -- Datatypes used to represent taxonomy data module Bio.TaxonomyData, -- * Parsing -- Functions prefixed with "read" read from filepaths, functions with parse from Haskell Strings. readTaxonomy, readNamedTaxonomy, parseTaxonomy, parseNCBITaxCitations, readNCBITaxCitations, parseNCBITaxDelNodes, readNCBITaxDelNodes, parseNCBITaxDivisions, readNCBITaxDivisions, parseNCBITaxGenCodes, readNCBITaxGenCodes, parseNCBITaxMergedNodes, readNCBITaxMergedNodes, parseNCBITaxNames, readNCBITaxNames, parseNCBITaxNodes, readNCBITaxNodes, parseNCBISimpleTaxons, readNCBISimpleTaxons, readNCBITaxonomyDatabase, -- * Processing compareSubTrees, extractTaxonomySubTreebyLevel, extractTaxonomySubTreebyRank, safeNodePath, getParentbyRank, -- * Visualization drawTreeComparison, drawTaxonomy, writeTree, writeDotTree, writeJsonTree ) where import Prelude import System.IO import Bio.TaxonomyData import Text.Parsec.Prim (runP) import Text.ParserCombinators.Parsec import Control.Monad import Data.List import qualified Data.Vector as V import Data.Maybe import qualified Data.Either.Unwrap as E import Data.Graph.Inductive import qualified Data.GraphViz as GV import qualified Data.GraphViz.Printing as GVP import qualified Data.GraphViz.Attributes.Colors as GVAC import qualified Data.GraphViz.Attributes.Complete as GVA import qualified Data.Text.Lazy as TL import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.Aeson.Encode as AE -------------------------------------------------------- --------------------------------------- -- Parsing functions -- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree with scientific names for each node is generated. readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) readNamedTaxonomy directoryPath = do nodeNames <- readNCBITaxNames (directoryPath ++ "names.dmp") let scientificNameBS = B.pack "scientific name" if E.isLeft nodeNames then return (Left (E.fromLeft nodeNames)) else do let nodeNamesVector = V.fromList (E.fromRight nodeNames) let filteredNodeNames = V.filter (\a -> nameClass a == scientificNameBS) nodeNamesVector parseFromFileEncISO88591 (genParserNamedTaxonomyGraph filteredNodeNames) (directoryPath ++ "nodes.dmp") -- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree is generated. readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) readTaxonomy = parseFromFileEncISO88591 genParserTaxonomyGraph -- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree is generated. parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double) parseTaxonomy = parse genParserTaxonomyGraph "parseTaxonomy" genParserTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double) genParserTaxonomyGraph = do nodesEdges <- many1 (try genParserGraphNodeEdge) optional eof let (nodesList,edgesList) = unzip nodesEdges let taxedges = filter (\(a,b,_) -> a /= b) edgesList --let taxnodes = concat nodesList --return (mkGraph taxnodes taxedges) return $! mkGraph nodesList taxedges genParserNamedTaxonomyGraph :: V.Vector TaxName -> GenParser Char st (Gr SimpleTaxon Double) genParserNamedTaxonomyGraph filteredNodeNames = do nodesEdges <- many1 (try genParserGraphNodeEdge) optional eof let (nodesList,edgesList) = unzip nodesEdges let taxedges = filter (\(a,b,_) -> a /= b) edgesList let taxnamednodes = map (setNodeScientificName filteredNodeNames) nodesList return $! mkGraph taxnamednodes taxedges setNodeScientificName :: V.Vector TaxName -> (t, SimpleTaxon) -> (t, SimpleTaxon) setNodeScientificName inputTaxNames (inputNode,inputTaxon) = outputNode where maybeRetrievedName = V.find (\a -> nameTaxId a == simpleTaxId inputTaxon) inputTaxNames retrievedName = maybe (B.pack "no name") nameTxt maybeRetrievedName outputNode = (inputNode,inputTaxon{simpleScientificName = retrievedName}) genParserGraphNodeEdge :: GenParser Char st ((Int,SimpleTaxon),(Int,Int,Double)) genParserGraphNodeEdge = do _simpleTaxId <- many1 digit string "\t|\t" _simpleParentTaxId <- many1 digit string "\t|\t" _simpleRank <- many1 (noneOf "\t") many1 (noneOf "\n") char '\n' let _simpleTaxIdInt = readInt _simpleTaxId let _simpleParentTaxIdInt = readInt _simpleParentTaxId return ((_simpleTaxIdInt,SimpleTaxon _simpleTaxIdInt B.empty _simpleParentTaxIdInt (readRank _simpleRank)),(_simpleTaxIdInt,_simpleParentTaxIdInt,1 :: Double)) -- | parse NCBITaxCitations from input string parseNCBITaxCitations :: String -> Either ParseError [TaxCitation] parseNCBITaxCitations = parse genParserNCBITaxCitations "parseTaxCitations" -- | parse NCBITaxCitations from input filePath readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation]) readNCBITaxCitations = parseFromFileEncISO88591 genParserNCBITaxCitations -- | parse NCBITaxDelNodes from input string parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode] parseNCBITaxDelNodes = parse genParserNCBITaxDelNodes "parseTaxDelNodes" -- | parse NCBITaxDelNodes from input filePath readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode]) readNCBITaxDelNodes = parseFromFile genParserNCBITaxDelNodes -- | parse NCBITaxDivisons from input string parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision] parseNCBITaxDivisions = parse genParserNCBITaxDivisons "parseTaxDivisons" -- | parse NCBITaxDivisons from input filePath readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision]) readNCBITaxDivisions = parseFromFile genParserNCBITaxDivisons -- | parse NCBITaxGenCodes from input string parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode] parseNCBITaxGenCodes = parse genParserNCBITaxGenCodes "parseTaxGenCodes" -- | parse NCBITaxGenCodes from input filePath readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode]) readNCBITaxGenCodes = parseFromFile genParserNCBITaxGenCodes -- | parse NCBITaxMergedNodes from input string parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode] parseNCBITaxMergedNodes = parse genParserNCBITaxMergedNodes "parseTaxMergedNodes" -- | parse NCBITaxMergedNodes from input filePath readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode]) readNCBITaxMergedNodes = parseFromFile genParserNCBITaxMergedNodes -- | parse NCBITaxNames from input string parseNCBITaxNames :: String -> Either ParseError [TaxName] parseNCBITaxNames = parse genParserNCBITaxNames "parseTaxNames" -- | parse NCBITaxNames from input filePath readNCBITaxNames :: String -> IO (Either ParseError [TaxName]) readNCBITaxNames = parseFromFile genParserNCBITaxNames -- | parse NCBITaxNames from input string parseNCBITaxNodes :: String -> Either ParseError TaxNode parseNCBITaxNodes = parse genParserNCBITaxNode "parseTaxNode" -- | parse NCBITaxCitations from input filePath readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode]) readNCBITaxNodes = parseFromFile genParserNCBITaxNodes -- | parse NCBISimpleTaxNames from input string parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon parseNCBISimpleTaxons = parse genParserNCBISimpleTaxon "parseSimpleTaxon" -- | parse NCBITaxCitations from input filePath readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon]) readNCBISimpleTaxons = parseFromFile genParserNCBISimpleTaxons -- | Parse the input as NCBITax datatype readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump) readNCBITaxonomyDatabase folder = do citations <- readNCBITaxCitations (folder ++ "citations.dmp") let citationsError = extractParseError citations taxdelNodes <- readNCBITaxDelNodes (folder ++ "delnodes.dmp") let delNodesError = extractParseError taxdelNodes divisons <- readNCBITaxDivisions (folder ++ "division.dmp") let divisonsError = extractParseError divisons genCodes <- readNCBITaxGenCodes (folder ++ "gencode.dmp") let genCodesError = extractParseError genCodes mergedNodes <- readNCBITaxMergedNodes (folder ++ "merged.dmp") let mergedNodesError = extractParseError mergedNodes names <- readNCBITaxNames (folder ++ "names.dmp") let namesError = extractParseError names taxnodes <- readNCBITaxNodes (folder ++ "nodes.dmp") let nodesError = extractParseError taxnodes let parseErrors = [citationsError, delNodesError, divisonsError, genCodesError, mergedNodesError, namesError, nodesError] return (checkParsing parseErrors citations taxdelNodes divisons genCodes mergedNodes names taxnodes) genParserNCBITaxCitations :: GenParser Char st [TaxCitation] genParserNCBITaxCitations = many1 genParserNCBITaxCitation genParserNCBITaxDelNodes :: GenParser Char st [TaxDelNode] genParserNCBITaxDelNodes = many1 genParserNCBITaxDelNode genParserNCBITaxDivisons :: GenParser Char st [TaxDivision] genParserNCBITaxDivisons = many1 genParserNCBITaxDivision genParserNCBITaxGenCodes :: GenParser Char st [TaxGenCode] genParserNCBITaxGenCodes = many1 genParserNCBITaxGenCode genParserNCBITaxMergedNodes :: GenParser Char st [TaxMergedNode] genParserNCBITaxMergedNodes = many1 genParserNCBITaxMergedNode genParserNCBITaxNames :: GenParser Char st [TaxName] genParserNCBITaxNames = many1 genParserNCBITaxName genParserNCBITaxNodes :: GenParser Char st [TaxNode] genParserNCBITaxNodes = many1 genParserNCBITaxNode genParserNCBISimpleTaxons :: GenParser Char st [SimpleTaxon] genParserNCBISimpleTaxons = many1 genParserNCBISimpleTaxon genParserNCBITaxCitation :: GenParser Char st TaxCitation genParserNCBITaxCitation = do _citId <- many1 digit string "\t|\t" _citKey <- optionMaybe (many1 (noneOf "\t")) string "\t|\t" _pubmedId <- optionMaybe (many1 digit) string "\t|\t" _medlineId <- optionMaybe (many1 digit) tab char '|' _url <- genParserTaxURL char '|' tab _text <- optionMaybe (many1 (noneOf "\t")) string "\t|\t" _taxIdList <- optionMaybe (many1 genParserTaxIdList) string "\t|\n" return $ TaxCitation (readInt _citId) _citKey (liftM readInt _pubmedId) (liftM readInt _medlineId) _url _text _taxIdList genParserNCBITaxDelNode :: GenParser Char st TaxDelNode genParserNCBITaxDelNode = do taxdelNode <- many1 digit space char '|' char '\n' return $ TaxDelNode (readInt taxdelNode) genParserNCBITaxDivision :: GenParser Char st TaxDivision genParserNCBITaxDivision = do _divisionId <- many1 digit string "\t|\t" _divisionCDE <- many1 upper string "\t|\t" _divisionName <- many1 (noneOf "\t") string "\t|\t" _comments <- optionMaybe (many1 (noneOf "\t")) string "\t|\n" return $ TaxDivision (readInt _divisionId) _divisionCDE _divisionName _comments genParserNCBITaxGenCode :: GenParser Char st TaxGenCode genParserNCBITaxGenCode = do _geneticCodeId <- many1 digit string "\t|\t" _abbreviation <- optionMaybe (many1 (noneOf "\t")) string "\t|\t" _genCodeName <- many1 (noneOf "\t") string "\t|\t" _cde <- many1 (noneOf "\t") string "\t|\t" _starts <- many1 (noneOf "\t") string "\t|\n" return $ TaxGenCode (readInt _geneticCodeId) _abbreviation _genCodeName _cde _starts genParserNCBITaxMergedNode :: GenParser Char st TaxMergedNode genParserNCBITaxMergedNode = do _oldTaxId <- many1 digit string "\t|\t" _newTaxId <- many1 digit string "\t|\n" return $ TaxMergedNode (readInt _oldTaxId) (readInt _newTaxId) genParserNCBITaxName :: GenParser Char st TaxName genParserNCBITaxName = do _taxId <- many1 digit string "\t|\t" _nameTxt <- many1 (noneOf "\t\n") string "\t|\t" _uniqueName <- optionMaybe (many1 (noneOf "\t\n")) string "\t|\t" _nameClass <- many1 (noneOf "\t\n") tab char '|' newline return $! TaxName (readInt _taxId) (B.pack _nameTxt) (maybe B.empty B.pack _uniqueName) (B.pack _nameClass) genParserNCBISimpleTaxon :: GenParser Char st SimpleTaxon genParserNCBISimpleTaxon = do _simpleTaxId <- many1 digit string "\t|\t" _simpleParentTaxId <- many1 digit string "\t|\t" _simpleRank <- many1 (noneOf "\t") many1 (noneOf "\n") char '\n' return $! SimpleTaxon (readInt _simpleTaxId) B.empty (readInt _simpleParentTaxId) (readRank _simpleRank) genParserNCBITaxNode :: GenParser Char st TaxNode genParserNCBITaxNode = do _taxId <- many1 digit string "\t|\t" _parentTaxId <- many1 digit string "\t|\t" _rank <- many1 (noneOf "\t") string "\t|\t" _emblCode <- optionMaybe (many1 (noneOf "\t")) string "\t|\t" _divisionId <- many1 digit string "\t|\t" _inheritedDivFlag <- many1 digit string "\t|\t" _geneticCodeId <- many1 digit string "\t|\t" _inheritedGCFlag <- many1 digit string "\t|\t" _mitochondrialGeneticCodeId <- many1 digit string "\t|\t" _inheritedMGCFlag <- many1 digit string "\t|\t" _genBankHiddenFlag <- many1 digit string "\t|\t" _hiddenSubtreeRootFlag <- many1 digit string "\t|\t" _comments <- optionMaybe (many1 (noneOf "\t")) tab char '|' char '\n' return $ TaxNode (readInt _taxId) (readInt _parentTaxId) (readRank _rank) _emblCode _divisionId (readBool _inheritedDivFlag) _geneticCodeId (readBool _inheritedGCFlag) _mitochondrialGeneticCodeId (readBool _inheritedMGCFlag) (readBool _genBankHiddenFlag) (readBool _hiddenSubtreeRootFlag) _comments --------------------------------------- -- Processing functions -- | Extract a subtree correpsonding to input node paths to root. Only nodes in level number distance to root are included. Used in Ids2TreeCompare tool. compareSubTrees :: [Gr SimpleTaxon Double] -> (Int,Gr CompareTaxon Double) compareSubTrees graphs = (length graphs,resultGraph) where treesLabNodes = map labNodes graphs treesLabEdges = map labEdges graphs mergedNodes = nub (concat treesLabNodes) mergedEdges = nub (concat treesLabEdges) --annotate node in which of the compared trees they are present comparedNodes = annotateTaxonsDifference treesLabNodes mergedNodes resultGraph = mkGraph comparedNodes mergedEdges :: Gr CompareTaxon Double annotateTaxonsDifference :: [[LNode SimpleTaxon]] -> [LNode SimpleTaxon] -> [LNode CompareTaxon] annotateTaxonsDifference treesNodes mergedtreeNodes = comparedNodes where comparedNodes = map (annotateTaxonDifference indexedTreesNodes) mergedtreeNodes indexedTreesNodes = zip [0..(length treesNodes)] treesNodes annotateTaxonDifference :: [(Int,[LNode SimpleTaxon])] -> LNode SimpleTaxon -> LNode CompareTaxon annotateTaxonDifference indexedTreesNodes mergedtreeNode = comparedNode where comparedNode = (simpleTaxId (snd mergedtreeNode),CompareTaxon (simpleScientificName (snd mergedtreeNode)) (simpleRank (snd mergedtreeNode)) currentInTree) currentInTree = concatMap (\(i,treeNodes) -> [i | mergedtreeNode `elem` treeNodes]) indexedTreesNodes -- | Extract a subtree corresponding to input node paths to root. Only nodes in level number distance to root are included. Used in Ids2Tree tool. extractTaxonomySubTreebyLevel :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double extractTaxonomySubTreebyLevel inputNodes graph levelNumber = taxonomySubTree where paths = nub (concatMap (getPath (1 :: Node) graph) inputNodes) contexts = map (context graph) paths lnodes = map labNode' contexts ledges = nub (concatMap (out graph . fst) lnodes) unfilteredTaxonomySubTree = mkGraph lnodes ledges :: Gr SimpleTaxon Double filteredLNodes = filterNodesByLevel levelNumber lnodes unfilteredTaxonomySubTree filteredledges = nub (concatMap (out graph . fst) filteredLNodes) taxonomySubTree = mkGraph filteredLNodes filteredledges :: Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. If a Rank is provided, all node that are less or equal are omitted extractTaxonomySubTreebyRank :: [Node] -> Gr SimpleTaxon Double -> Maybe Rank -> Gr SimpleTaxon Double extractTaxonomySubTreebyRank inputNodes graph highestRank = taxonomySubTree where paths = nub (concatMap (getPath (1 :: Node) graph) inputNodes) contexts = map (context graph) paths lnodes = map labNode' contexts filteredLNodes = filterNodesByRank highestRank lnodes filteredledges = nub (concatMap (out graph . fst) filteredLNodes) taxonomySubTree = mkGraph filteredLNodes filteredledges :: Gr SimpleTaxon Double getPath :: Node -> Gr SimpleTaxon Double -> Node -> Path getPath root graph node = sp node root graph -- | Extract parent node with specified Rank getParentbyRank :: Node -> Gr SimpleTaxon Double -> Maybe Rank -> Maybe (Node, SimpleTaxon) getParentbyRank inputNode graph requestedRank = filteredLNode where path = sp (inputNode :: Node) (1 :: Node) graph nodeContext = map (context graph) path lnode = map labNode' nodeContext filteredLNode = findNodeByRank requestedRank lnode -- | Filter nodes by distance from root filterNodesByLevel :: Maybe Int -> [(Node, SimpleTaxon)] -> Gr SimpleTaxon Double -> [(Node, SimpleTaxon)] filterNodesByLevel levelNumber inputNodes graph | isJust levelNumber = filteredNodes | otherwise = inputNodes --distances of all nodes to root where nodedistances = level (1::Node) (undir graph) sortedNodeDistances = sortBy sortByNodeID nodedistances sortedInputNodes = sortBy sortByNodeID inputNodes zippedNodeDistancesInputNodes = zip sortedNodeDistances sortedInputNodes zippedFilteredNodes = filter (\((_,distance),(_,_)) -> distance <= fromJust levelNumber) zippedNodeDistancesInputNodes filteredNodes = map snd zippedFilteredNodes sortByNodeID :: (Node,a) -> (Node,a) -> Ordering sortByNodeID (n1, _) (n2, _) | n1 < n2 = GT | n1 > n2 = LT | n1 == n2 = EQ | otherwise = EQ -- | Find only taxons of a specific rank in a list of input taxons findNodeByRank :: Maybe Rank -> [(t, SimpleTaxon)] -> Maybe (t, SimpleTaxon) findNodeByRank requestedRank inputNodes | isJust requestedRank = filteredNodes | otherwise = Nothing where filteredNodes = find (\(_,t) -> simpleRank t == fromJust requestedRank) inputNodes -- | Filter a list of input taxons for a minimal provided rank filterNodesByRank :: Maybe Rank -> [(t, SimpleTaxon)] -> [(t, SimpleTaxon)] filterNodesByRank highestRank inputNodes | isJust highestRank = filteredNodes | otherwise = inputNodes where filteredNodes = filter (\(_,t) -> simpleRank t >= fromJust highestRank) inputNodes ++ noRankNodes noRankNodes = filter (\(_,t) -> simpleRank t == Norank) inputNodes -- | Returns path between 2 maybe nodes. Used in TreeDistance tool. safeNodePath :: Maybe Node -> Gr SimpleTaxon Double -> Maybe Node -> Either String [Node] safeNodePath nodeid1 graphOutput nodeid2 | isJust nodeid1 && isJust nodeid2 = Right (sp (fromJust nodeid1) (fromJust nodeid2) (undir graphOutput)) | otherwise = Left "Both taxonomy ids must be provided for distance computation" --------------------------------------- -- Visualisation functions -- | Draw graph in dot format. Used in Ids2Tree tool. drawTaxonomy :: Gr SimpleTaxon Double -> String drawTaxonomy inputGraph = do let params = GV.nonClusteredParams {GV.isDirected = True , GV.globalAttributes = [GV.GraphAttrs [GVA.Size (GVA.GSize (20 :: Double) (Just (20 :: Double)) False)]] , GV.isDotCluster = const True , GV.fmtNode = \ (_,l) -> [GV.textLabel (TL.pack (show (simpleRank l) ++ "\n" ++ B.unpack (simpleScientificName l)))] , GV.fmtEdge = const [] } let dotFormat = GV.graphToDot params inputGraph let dottext = GVP.renderDot $ GVP.toDot dotFormat TL.unpack dottext -- | Draw tree comparison graph in dot format. Used in Ids2TreeCompare tool. drawTreeComparison :: (Int,Gr CompareTaxon Double) -> String drawTreeComparison (treeNumber,inputGraph) = do let cList = makeColorList treeNumber let params = GV.nonClusteredParams {GV.isDirected = True , GV.globalAttributes = [] , GV.isDotCluster = const True , GV.fmtNode = \ (_,l) -> [GV.textLabel (TL.pack (show (compareRank l) ++ "\n" ++ B.unpack (compareScientificName l))), GV.style GV.wedged, GVA.Color (selectColors (inTree l) cList)] , GV.fmtEdge = const [] } let dotFormat = GV.graphToDot params (grev inputGraph) let dottext = GVP.renderDot $ GVP.toDot dotFormat TL.unpack dottext -- | Colors from color list are selected according to in which of the compared trees the node is contained. selectColors :: [Int] -> [GVA.Color] -> GVAC.ColorList selectColors inTrees currentColorList = GVAC.toColorList (map (\i -> currentColorList !! i) inTrees) -- | A color list is sampled from the spectrum according to how many trees are compared. makeColorList :: Int -> [GVA.Color] makeColorList treeNumber = cList where cList = map (\i -> GVAC.HSV ((fromIntegral i/fromIntegral neededColors) * 0.708) 0.5 1.0) [0..neededColors] neededColors = treeNumber - 1 -- | Write tree representation either as dot or json to provided file path writeTree :: String -> String -> Gr SimpleTaxon Double -> IO () writeTree requestedFormat outputDirectoryPath inputGraph = do case requestedFormat of "dot" -> writeDotTree outputDirectoryPath inputGraph "json"-> writeJsonTree outputDirectoryPath inputGraph _ -> writeDotTree outputDirectoryPath inputGraph -- | Write tree representation as dot to provided file path. -- Graphviz tools like dot can be applied to the written .dot file to generate e.g. svg-format images. writeDotTree :: String -> Gr SimpleTaxon Double -> IO () writeDotTree outputDirectoryPath inputGraph = do let diagram = drawTaxonomy (grev inputGraph) writeFile (outputDirectoryPath ++ "taxonomy.dot") diagram -- | Write tree representation as json to provided file path. -- You can visualize the result for example with 3Djs. writeJsonTree :: String -> Gr SimpleTaxon Double -> IO () writeJsonTree outputDirectoryPath inputGraph = do let jsonOutput = AE.encode (grev inputGraph) L.writeFile (outputDirectoryPath ++ "taxonomy.json") jsonOutput --------------------------------------- -- Auxiliary functions readInt :: String -> Int readInt = read readBool :: String -> Bool readBool "0" = False readBool "1" = True readBool _ = False readRank :: String -> Rank readRank a = read a :: Rank genParserTaxIdList :: GenParser Char st Int genParserTaxIdList = do optional (char ' ') _taxId <- many1 digit optional (char ' ') return (readInt _taxId) genParserTaxURL :: GenParser Char st (Maybe String) genParserTaxURL = do tab url1 <- optionMaybe (many1 (noneOf "\t")) tab url2 <- optionMaybe (many1 (noneOf "|")) return (concatenateURLParts url1 url2) concatenateURLParts :: Maybe String -> Maybe String -> Maybe String concatenateURLParts url1 url2 | isJust url1 && isJust url2 = maybeStringConcat url1 url2 | isJust url1 && isNothing url2 = url1 | otherwise = Nothing maybeStringConcat :: Maybe String -> Maybe String -> Maybe String maybeStringConcat = liftM2 (++) readEncodedFile :: TextEncoding -> FilePath -> IO String readEncodedFile encoding name = do handle <- openFile name ReadMode hSetEncoding handle encoding hGetContents handle parseFromFileEncISO88591 :: Parser a -> String -> IO (Either ParseError a) parseFromFileEncISO88591 parser fname = do input <- readEncodedFile latin1 fname return (runP parser () fname input) -- | check a list of parsing results for presence of Left aka Parse error checkParsing :: [String] -> Either ParseError [TaxCitation] -> Either ParseError [TaxDelNode] -> Either ParseError [TaxDivision] -> Either ParseError [TaxGenCode] -> Either ParseError [TaxMergedNode] -> Either ParseError [TaxName] -> Either ParseError [TaxNode]-> Either [String] NCBITaxDump checkParsing parseErrors citations taxdelNodes divisons genCodes mergedNodes names taxnodes | join parseErrors == "" = Right (NCBITaxDump (E.fromRight citations) (E.fromRight taxdelNodes) (E.fromRight divisons) (E.fromRight genCodes) (E.fromRight mergedNodes) (E.fromRight names) (E.fromRight taxnodes)) | otherwise = Left parseErrors extractParseError :: Either ParseError a -> String extractParseError _parse | E.isLeft _parse = show (E.fromLeft _parse) | otherwise = ""