-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Libary for parsing, processing and vizualization of taxonomy data -- -- Haskell cabal Taxonomy libary contains tools, parsers, datastructures -- and visualisation for the NCBI (National Center for Biotechnology -- Information) Taxonomy datasources. -- -- It can utilize information from the Entrez REST interface via -- EntrezHTTP, as well as from the files of the Taxonomy database -- dump. -- -- Input data is parsed into a FGL based datastructure, which enables a -- wealth of processing steps like node distances, retrieval of parent -- nodes or extraction of subtrees. -- -- Trees can be visualised via dot-format (graphviz) -- -- -- or via json-format (http://d3js.org/d3js). -- -- The TaxonomyTools package contains tools based on this package. @package Taxonomy @version 1.0.3 -- | This module contains data structures for taxonomy data module Bio.TaxonomyData -- | SimpleTaxon only contains the most relevant fields of a taxonomy -- entry. For all annotaded fields use the Taxon datatype and its -- associated functions data SimpleTaxon SimpleTaxon :: Int -> Text -> Int -> Rank -> SimpleTaxon [simpleTaxId] :: SimpleTaxon -> Int [simpleScientificName] :: SimpleTaxon -> Text [simpleParentTaxId] :: SimpleTaxon -> Int [simpleRank] :: SimpleTaxon -> Rank -- | Datastructure for tree comparisons data CompareTaxon CompareTaxon :: Text -> Rank -> [Int] -> CompareTaxon [compareScientificName] :: CompareTaxon -> Text [compareRank] :: CompareTaxon -> Rank [inTree] :: CompareTaxon -> [Int] -- | Data structure for Entrez taxonomy fetch result data Taxon Taxon :: Int -> String -> Int -> Rank -> String -> TaxGenCode -> TaxGenCode -> String -> [LineageTaxon] -> String -> String -> String -> Taxon [taxonTaxId] :: Taxon -> Int [taxonScientificName] :: Taxon -> String [taxonParentTaxId] :: Taxon -> Int [taxonRank] :: Taxon -> Rank [division] :: Taxon -> String [geneticCode] :: Taxon -> TaxGenCode [mitoGeneticCode] :: Taxon -> TaxGenCode [lineage] :: Taxon -> String [lineageEx] :: Taxon -> [LineageTaxon] [createDate] :: Taxon -> String [updateDate] :: Taxon -> String [pubDate] :: Taxon -> String data TaxonName TaxonName :: String -> String -> TaxonName [classCDE] :: TaxonName -> String [dispName] :: TaxonName -> String -- | Lineage Taxons denote all parent Taxonomy nodes of a node retrieved by -- Entrez fetch data LineageTaxon LineageTaxon :: Int -> String -> Rank -> LineageTaxon [lineageTaxId] :: LineageTaxon -> Int [lineageScienticName] :: LineageTaxon -> String [lineageRank] :: LineageTaxon -> Rank -- | NCBI Taxonomy database dump hierachichal data structure as defined in -- ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_readme.txt data NCBITaxDump NCBITaxDump :: [TaxCitation] -> [TaxDelNode] -> [TaxDivision] -> [TaxGenCode] -> [TaxMergedNode] -> [TaxName] -> [TaxNode] -> NCBITaxDump [taxCitations] :: NCBITaxDump -> [TaxCitation] [taxDelNodes] :: NCBITaxDump -> [TaxDelNode] [taxDivisions] :: NCBITaxDump -> [TaxDivision] [taxGenCodes] :: NCBITaxDump -> [TaxGenCode] [taxMergedNodes] :: NCBITaxDump -> [TaxMergedNode] [taxNames] :: NCBITaxDump -> [TaxName] [taxNodes] :: NCBITaxDump -> [TaxNode] -- | Datastructure for entries of Taxonomy database dump citations file data TaxCitation TaxCitation :: Int -> Maybe String -> Maybe Int -> Maybe Int -> Maybe String -> Maybe String -> Maybe [Int] -> TaxCitation [citId] :: TaxCitation -> Int [citKey] :: TaxCitation -> Maybe String [pubmedId] :: TaxCitation -> Maybe Int [medlineId] :: TaxCitation -> Maybe Int [url] :: TaxCitation -> Maybe String [text] :: TaxCitation -> Maybe String [taxIdList] :: TaxCitation -> Maybe [Int] -- | Datastructure for entries of Taxonomy database dump deleted nodes file data TaxDelNode TaxDelNode :: Int -> TaxDelNode [delTaxId] :: TaxDelNode -> Int -- | Datastructure for entries of Taxonomy database dump division file data TaxDivision TaxDivision :: Int -> String -> String -> Maybe String -> TaxDivision [divisionId] :: TaxDivision -> Int [divisionCDE] :: TaxDivision -> String [divisonName] :: TaxDivision -> String [divisionComments] :: TaxDivision -> Maybe String -- | Datastructure for entries of Taxonomy database dump gencode file data TaxGenCode TaxGenCode :: Int -> Maybe String -> String -> String -> String -> TaxGenCode [geneticCodeId] :: TaxGenCode -> Int [abbreviation] :: TaxGenCode -> Maybe String [geneCodeName] :: TaxGenCode -> String [cde] :: TaxGenCode -> String [starts] :: TaxGenCode -> String -- | Datastructure for entries of Taxonomy database dump mergednodes file data TaxMergedNode TaxMergedNode :: Int -> Int -> TaxMergedNode [oldTaxId] :: TaxMergedNode -> Int [newTaxId] :: TaxMergedNode -> Int -- | Datastructure for entries of Taxonomy database dump names file data TaxName TaxName :: Int -> Text -> Text -> Text -> TaxName [nameTaxId] :: TaxName -> Int [nameTxt] :: TaxName -> Text [uniqueName] :: TaxName -> Text [nameClass] :: TaxName -> Text -- | Taxonomic ranks: NCBI uses the uncommon Speciessubgroup data Rank Norank :: Rank Form :: Rank Variety :: Rank Infraspecies :: Rank Subspecies :: Rank Speciessubgroup :: Rank Species :: Rank Speciesgroup :: Rank Superspecies :: Rank Series :: Rank Section :: Rank Subgenus :: Rank Genus :: Rank Subtribe :: Rank Tribe :: Rank Supertribe :: Rank Subfamily :: Rank Family :: Rank Superfamily :: Rank Parvorder :: Rank Infraorder :: Rank Suborder :: Rank Order :: Rank Superorder :: Rank Magnorder :: Rank Cohort :: Rank Legion :: Rank Parvclass :: Rank Infraclass :: Rank Subclass :: Rank Class :: Rank Superclass :: Rank Microphylum :: Rank Infraphylum :: Rank Subphylum :: Rank Phylum :: Rank Superphylum :: Rank Infrakingdom :: Rank Subkingdom :: Rank Kingdom :: Rank Superkingdom :: Rank Domain :: Rank readsRank :: String -> [(Rank, String)] -- | Datastructure for entries of Taxonomy database dump nodes file data TaxNode TaxNode :: Int -> Int -> Rank -> Maybe String -> String -> Bool -> String -> Bool -> String -> Bool -> Bool -> Bool -> Maybe String -> TaxNode [taxId] :: TaxNode -> Int [parentTaxId] :: TaxNode -> Int [rank] :: TaxNode -> Rank [emblCode] :: TaxNode -> Maybe String [nodeDivisionId] :: TaxNode -> String [inheritedDivFlag] :: TaxNode -> Bool [nodeGeneticCodeId] :: TaxNode -> String [inheritedGCFlag] :: TaxNode -> Bool [mitochondrialGeneticCodeId] :: TaxNode -> String [inheritedMGCFlag] :: TaxNode -> Bool [genBankHiddenFlag] :: TaxNode -> Bool [hiddenSubtreeRootFlag] :: TaxNode -> Bool [nodeComments] :: TaxNode -> Maybe String -- | Simple Gene2Accession table data SimpleGene2Accession SimpleGene2Accession :: Int -> String -> SimpleGene2Accession [simpleTaxIdEntry] :: SimpleGene2Accession -> Int [simpleGenomicNucleotideAccessionVersion] :: SimpleGene2Accession -> String -- | Datastructure for Gene2Accession table data Gene2Accession Gene2Accession :: Int -> Int -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> Gene2Accession [taxIdEntry] :: Gene2Accession -> Int [geneID] :: Gene2Accession -> Int [status] :: Gene2Accession -> String [rnaNucleotideAccessionVersion] :: Gene2Accession -> String [rnaNucleotideGi] :: Gene2Accession -> String [proteinAccessionVersion] :: Gene2Accession -> String [proteinGi] :: Gene2Accession -> String [genomicNucleotideAccessionVersion] :: Gene2Accession -> String [genomicNucleotideGi] :: Gene2Accession -> String [startPositionOnTheGenomicAccession] :: Gene2Accession -> String [endPositionOnTheGenomicAccession] :: Gene2Accession -> String [orientation] :: Gene2Accession -> String [assembly] :: Gene2Accession -> String [maturePeptideAccessionVersion] :: Gene2Accession -> String [maturePeptideGi] :: Gene2Accession -> String simpleTaxonJSONValue :: Gr SimpleTaxon Double -> Node -> Value instance GHC.Read.Read Bio.TaxonomyData.Gene2Accession instance GHC.Classes.Eq Bio.TaxonomyData.Gene2Accession instance GHC.Show.Show Bio.TaxonomyData.Gene2Accession instance GHC.Read.Read Bio.TaxonomyData.SimpleGene2Accession instance GHC.Classes.Eq Bio.TaxonomyData.SimpleGene2Accession instance GHC.Show.Show Bio.TaxonomyData.SimpleGene2Accession instance GHC.Classes.Eq Bio.TaxonomyData.NCBITaxDump instance GHC.Read.Read Bio.TaxonomyData.NCBITaxDump instance GHC.Show.Show Bio.TaxonomyData.NCBITaxDump instance GHC.Classes.Eq Bio.TaxonomyData.TaxNode instance GHC.Read.Read Bio.TaxonomyData.TaxNode instance GHC.Show.Show Bio.TaxonomyData.TaxNode instance GHC.Classes.Eq Bio.TaxonomyData.SimpleTaxon instance GHC.Read.Read Bio.TaxonomyData.SimpleTaxon instance GHC.Show.Show Bio.TaxonomyData.SimpleTaxon instance GHC.Classes.Eq Bio.TaxonomyData.CompareTaxon instance GHC.Read.Read Bio.TaxonomyData.CompareTaxon instance GHC.Show.Show Bio.TaxonomyData.CompareTaxon instance GHC.Classes.Eq Bio.TaxonomyData.Taxon instance GHC.Show.Show Bio.TaxonomyData.Taxon instance GHC.Classes.Eq Bio.TaxonomyData.LineageTaxon instance GHC.Show.Show Bio.TaxonomyData.LineageTaxon instance GHC.Enum.Enum Bio.TaxonomyData.Rank instance GHC.Enum.Bounded Bio.TaxonomyData.Rank instance GHC.Show.Show Bio.TaxonomyData.Rank instance GHC.Classes.Ord Bio.TaxonomyData.Rank instance GHC.Classes.Eq Bio.TaxonomyData.Rank instance GHC.Classes.Eq Bio.TaxonomyData.TaxName instance GHC.Read.Read Bio.TaxonomyData.TaxName instance GHC.Show.Show Bio.TaxonomyData.TaxName instance GHC.Classes.Eq Bio.TaxonomyData.TaxMergedNode instance GHC.Read.Read Bio.TaxonomyData.TaxMergedNode instance GHC.Show.Show Bio.TaxonomyData.TaxMergedNode instance GHC.Classes.Eq Bio.TaxonomyData.TaxGenCode instance GHC.Read.Read Bio.TaxonomyData.TaxGenCode instance GHC.Show.Show Bio.TaxonomyData.TaxGenCode instance GHC.Classes.Eq Bio.TaxonomyData.TaxDivision instance GHC.Read.Read Bio.TaxonomyData.TaxDivision instance GHC.Show.Show Bio.TaxonomyData.TaxDivision instance GHC.Classes.Eq Bio.TaxonomyData.TaxDelNode instance GHC.Read.Read Bio.TaxonomyData.TaxDelNode instance GHC.Show.Show Bio.TaxonomyData.TaxDelNode instance GHC.Classes.Eq Bio.TaxonomyData.TaxCitation instance GHC.Read.Read Bio.TaxonomyData.TaxCitation instance GHC.Show.Show Bio.TaxonomyData.TaxCitation instance GHC.Classes.Eq Bio.TaxonomyData.TaxonName instance GHC.Show.Show Bio.TaxonomyData.TaxonName instance GHC.Read.Read Bio.TaxonomyData.Rank instance Data.Aeson.Types.ToJSON.ToJSON (Data.Graph.Inductive.Tree.Gr Bio.TaxonomyData.SimpleTaxon GHC.Types.Double) -- | Functions for parsing, processing and visualization of taxonomy data. -- --

Usage example:

-- -- -- -- module Bio.Taxonomy -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree with scientific names for each node is -- generated. readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double) -- | parse NCBITaxCitations from input string parseNCBITaxCitations :: String -> Either ParseError [TaxCitation] -- | parse NCBITaxCitations from input filePath readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation]) -- | parse NCBITaxDelNodes from input string parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode] -- | parse NCBITaxDelNodes from input filePath readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode]) -- | parse NCBITaxDivisons from input string parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision] -- | parse NCBITaxDivisons from input filePath readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision]) -- | parse NCBITaxGenCodes from input string parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode] -- | parse NCBITaxGenCodes from input filePath readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode]) -- | parse NCBITaxMergedNodes from input string parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode] -- | parse NCBITaxMergedNodes from input filePath readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode]) -- | parse NCBITaxNames from input string parseNCBITaxNames :: String -> Either ParseError [TaxName] -- | parse NCBITaxNames from input filePath readNCBITaxNames :: String -> IO (Either ParseError [TaxName]) -- | parse NCBITaxNames from input string parseNCBITaxNodes :: String -> Either ParseError TaxNode -- | parse NCBITaxCitations from input filePath readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode]) -- | parse NCBISimpleTaxNames from input string parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon -- | parse NCBITaxCitations from input filePath readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon]) -- | Parse the input as NCBITax datatype readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump) -- | Extract a subtree correpsonding to input node paths to root. Only -- nodes in level number distance to root are included. Used in -- Ids2TreeCompare tool. compareSubTrees :: [Gr SimpleTaxon Double] -> (Int, Gr CompareTaxon Double) -- | Extract a subtree corresponding to input node paths to root. Only -- nodes in level number distance to root are included. Used in Ids2Tree -- tool. extractTaxonomySubTreebyLevel :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. Only -- nodes in level number distance to root are included. Used in Ids2Tree -- tool. extractTaxonomySubTreebyLevelNew :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. If a Rank -- is provided, all node that are less or equal are omitted extractTaxonomySubTreebyRank :: [Node] -> Gr SimpleTaxon Double -> Maybe Rank -> Gr SimpleTaxon Double -- | Returns path between 2 maybe nodes. Used in TreeDistance tool. safeNodePath :: Maybe Node -> Gr SimpleTaxon Double -> Maybe Node -> Either String Path -- | Extract parent node with specified Rank getParentbyRank :: Node -> Gr SimpleTaxon Double -> Maybe Rank -> Maybe (Node, SimpleTaxon) -- | Draw tree comparison graph in dot format. Used in Ids2TreeCompare -- tool. drawTaxonomyComparison :: Bool -> (Int, Gr CompareTaxon Double) -> String -- | Draw graph in dot format. Used in Ids2Tree tool. drawTaxonomy :: Bool -> Gr SimpleTaxon Double -> String -- | Write tree representation either as dot or json to provided file path writeTree :: String -> String -> Bool -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as dot to provided file path. Graphviz tools -- like dot can be applied to the written .dot file to generate e.g. -- svg-format images. writeDotTree :: String -> Bool -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as json to provided file path. You can -- visualize the result for example with 3Djs. writeJsonTree :: String -> Gr SimpleTaxon Double -> IO ()