-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Libary for parsing, processing and vizualization of taxonomy data -- -- Haskell cabal Taxonomy libary contains tools, parsers, datastructures -- and visualisation for the NCBI (National Center for Biotechnology -- Information) Taxonomy datasources. -- -- It can utilize information from the Entrez REST interface via -- EntrezHTTP, as well as from the files of the Taxonomy database -- dump. -- -- Input data is parsed into a FGL based datastructure, which enables a -- wealth of processing steps like node distances, retrieval of parent -- nodes or extraction of subtrees. -- -- Trees can be visualised via dot-format (graphviz) -- -- -- or via json-format (http://d3js.org/d3js). -- -- The TaxonomyTools package contains tools based on this package. @package Taxonomy @version 2.2.0 -- | This module contains data structures for taxonomy data module Biobase.Taxonomy.Types -- | SimpleTaxon only contains the most relevant fields of a taxonomy -- entry. For all annotaded fields use the Taxon datatype and its -- associated functions data SimpleTaxon SimpleTaxon :: Int -> Text -> Int -> Rank -> SimpleTaxon [$sel:simpleTaxId:SimpleTaxon] :: SimpleTaxon -> Int [$sel:simpleScientificName:SimpleTaxon] :: SimpleTaxon -> Text [$sel:simpleParentTaxId:SimpleTaxon] :: SimpleTaxon -> Int [$sel:simpleRank:SimpleTaxon] :: SimpleTaxon -> Rank -- | Datastructure for tree comparisons data CompareTaxon CompareTaxon :: Text -> Rank -> [Int] -> CompareTaxon [$sel:compareScientificName:CompareTaxon] :: CompareTaxon -> Text [$sel:compareRank:CompareTaxon] :: CompareTaxon -> Rank [$sel:inTree:CompareTaxon] :: CompareTaxon -> [Int] -- | Data structure for Entrez taxonomy fetch result data Taxon Taxon :: Int -> ByteString -> Int -> Rank -> ByteString -> TaxGenCode -> TaxGenCode -> ByteString -> [LineageTaxon] -> ByteString -> ByteString -> ByteString -> Taxon [$sel:taxonTaxId:Taxon] :: Taxon -> Int [$sel:taxonScientificName:Taxon] :: Taxon -> ByteString [$sel:taxonParentTaxId:Taxon] :: Taxon -> Int [$sel:taxonRank:Taxon] :: Taxon -> Rank [$sel:division:Taxon] :: Taxon -> ByteString [$sel:geneticCode:Taxon] :: Taxon -> TaxGenCode [$sel:mitoGeneticCode:Taxon] :: Taxon -> TaxGenCode [$sel:lineage:Taxon] :: Taxon -> ByteString [$sel:lineageEx:Taxon] :: Taxon -> [LineageTaxon] [$sel:createDate:Taxon] :: Taxon -> ByteString [$sel:updateDate:Taxon] :: Taxon -> ByteString [$sel:pubDate:Taxon] :: Taxon -> ByteString data TaxonName TaxonName :: ByteString -> ByteString -> TaxonName [$sel:classCDE:TaxonName] :: TaxonName -> ByteString [$sel:dispName:TaxonName] :: TaxonName -> ByteString -- | Lineage Taxons denote all parent Taxonomy nodes of a node retrieved by -- Entrez fetch data Lineage Lineage :: Int -> ByteString -> Rank -> [LineageTaxon] -> Lineage [$sel:lineageStartTaxId:Lineage] :: Lineage -> Int [$sel:lineageStartScienticName:Lineage] :: Lineage -> ByteString [$sel:lineageStartRank:Lineage] :: Lineage -> Rank [$sel:lineageTaxons:Lineage] :: Lineage -> [LineageTaxon] -- | Lineage Taxons denote all parent Taxonomy nodes of a node retrieved by -- Entrez fetch data LineageTaxon LineageTaxon :: Int -> ByteString -> Rank -> LineageTaxon [$sel:lineageTaxId:LineageTaxon] :: LineageTaxon -> Int [$sel:lineageScienticName:LineageTaxon] :: LineageTaxon -> ByteString [$sel:lineageRank:LineageTaxon] :: LineageTaxon -> Rank -- | NCBI Taxonomy database dump hierachichal data structure as defined in -- ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_readme.txt data NCBITaxDump NCBITaxDump :: [TaxCitation] -> [TaxDelNode] -> [TaxDivision] -> [TaxGenCode] -> [TaxMergedNode] -> [TaxName] -> [TaxNode] -> NCBITaxDump [$sel:taxCitations:NCBITaxDump] :: NCBITaxDump -> [TaxCitation] [$sel:taxDelNodes:NCBITaxDump] :: NCBITaxDump -> [TaxDelNode] [$sel:taxDivisions:NCBITaxDump] :: NCBITaxDump -> [TaxDivision] [$sel:taxGenCodes:NCBITaxDump] :: NCBITaxDump -> [TaxGenCode] [$sel:taxMergedNodes:NCBITaxDump] :: NCBITaxDump -> [TaxMergedNode] [$sel:taxNames:NCBITaxDump] :: NCBITaxDump -> [TaxName] [$sel:taxNodes:NCBITaxDump] :: NCBITaxDump -> [TaxNode] -- | Datastructure for entries of Taxonomy database dump citations file data TaxCitation TaxCitation :: Int -> ByteString -> Maybe Int -> Maybe Int -> ByteString -> ByteString -> [Int] -> TaxCitation [$sel:citId:TaxCitation] :: TaxCitation -> Int [$sel:citKey:TaxCitation] :: TaxCitation -> ByteString [$sel:pubmedId:TaxCitation] :: TaxCitation -> Maybe Int [$sel:medlineId:TaxCitation] :: TaxCitation -> Maybe Int [$sel:url:TaxCitation] :: TaxCitation -> ByteString [$sel:text:TaxCitation] :: TaxCitation -> ByteString [$sel:taxIdList:TaxCitation] :: TaxCitation -> [Int] -- | Datastructure for entries of Taxonomy database dump deleted nodes file data TaxDelNode TaxDelNode :: Int -> TaxDelNode [$sel:delTaxId:TaxDelNode] :: TaxDelNode -> Int -- | Datastructure for entries of Taxonomy database dump division file data TaxDivision TaxDivision :: Int -> ByteString -> ByteString -> ByteString -> TaxDivision [$sel:divisionId:TaxDivision] :: TaxDivision -> Int [$sel:divisionCDE:TaxDivision] :: TaxDivision -> ByteString [$sel:divisonName:TaxDivision] :: TaxDivision -> ByteString [$sel:divisionComments:TaxDivision] :: TaxDivision -> ByteString -- | Datastructure for entries of Taxonomy database dump gencode file data TaxGenCode TaxGenCode :: Int -> ByteString -> ByteString -> ByteString -> ByteString -> TaxGenCode [$sel:geneticCodeId:TaxGenCode] :: TaxGenCode -> Int [$sel:abbreviation:TaxGenCode] :: TaxGenCode -> ByteString [$sel:geneCodeName:TaxGenCode] :: TaxGenCode -> ByteString [$sel:cde:TaxGenCode] :: TaxGenCode -> ByteString [$sel:starts:TaxGenCode] :: TaxGenCode -> ByteString -- | Datastructure for entries of Taxonomy database dump mergednodes file data TaxMergedNode TaxMergedNode :: Int -> Int -> TaxMergedNode [$sel:oldTaxId:TaxMergedNode] :: TaxMergedNode -> Int [$sel:newTaxId:TaxMergedNode] :: TaxMergedNode -> Int -- | Datastructure for entries of Taxonomy database dump names file data TaxName TaxName :: Int -> Text -> ByteString -> ByteString -> TaxName [$sel:nameTaxId:TaxName] :: TaxName -> Int [$sel:nameTxt:TaxName] :: TaxName -> Text [$sel:uniqueName:TaxName] :: TaxName -> ByteString [$sel:nameClass:TaxName] :: TaxName -> ByteString -- | Taxonomic ranks: NCBI uses the uncommon Speciessubgroup data Rank Norank :: Rank Form :: Rank Variety :: Rank Infraspecies :: Rank Subspecies :: Rank Speciessubgroup :: Rank Species :: Rank Speciesgroup :: Rank Superspecies :: Rank Series :: Rank Section :: Rank Subgenus :: Rank Genus :: Rank Subtribe :: Rank Tribe :: Rank Supertribe :: Rank Subfamily :: Rank Family :: Rank Superfamily :: Rank Parvorder :: Rank Infraorder :: Rank Suborder :: Rank Order :: Rank Superorder :: Rank Magnorder :: Rank Cohort :: Rank Legion :: Rank Parvclass :: Rank Infraclass :: Rank Subclass :: Rank Class :: Rank Superclass :: Rank Microphylum :: Rank Infraphylum :: Rank Subphylum :: Rank Phylum :: Rank Superphylum :: Rank Infrakingdom :: Rank Subkingdom :: Rank Kingdom :: Rank Superkingdom :: Rank Domain :: Rank readsRank :: String -> [(Rank, String)] -- | Datastructure for entries of Taxonomy database dump nodes file data TaxNode TaxNode :: Int -> Int -> Rank -> ByteString -> Int -> Bool -> Int -> Bool -> Int -> Bool -> Bool -> Bool -> ByteString -> TaxNode [$sel:taxId:TaxNode] :: TaxNode -> Int [$sel:parentTaxId:TaxNode] :: TaxNode -> Int [$sel:rank:TaxNode] :: TaxNode -> Rank [$sel:emblCode:TaxNode] :: TaxNode -> ByteString [$sel:nodeDivisionId:TaxNode] :: TaxNode -> Int [$sel:inheritedDivFlag:TaxNode] :: TaxNode -> Bool [$sel:nodeGeneticCodeId:TaxNode] :: TaxNode -> Int [$sel:inheritedGCFlag:TaxNode] :: TaxNode -> Bool [$sel:mitochondrialGeneticCodeId:TaxNode] :: TaxNode -> Int [$sel:inheritedMGCFlag:TaxNode] :: TaxNode -> Bool [$sel:genBankHiddenFlag:TaxNode] :: TaxNode -> Bool [$sel:hiddenSubtreeRootFlag:TaxNode] :: TaxNode -> Bool [$sel:nodeComments:TaxNode] :: TaxNode -> ByteString -- | Simple Gene2Accession table data SimpleGene2Accession SimpleGene2Accession :: Int -> ByteString -> SimpleGene2Accession [$sel:simpleTaxIdEntry:SimpleGene2Accession] :: SimpleGene2Accession -> Int [$sel:simpleGenomicNucleotideAccessionVersion:SimpleGene2Accession] :: SimpleGene2Accession -> ByteString -- | Datastructure for Gene2Accession table data Gene2Accession Gene2Accession :: Int -> Int -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> ByteString -> Gene2Accession [$sel:taxIdEntry:Gene2Accession] :: Gene2Accession -> Int [$sel:geneID:Gene2Accession] :: Gene2Accession -> Int [$sel:status:Gene2Accession] :: Gene2Accession -> ByteString [$sel:rnaNucleotideAccessionVersion:Gene2Accession] :: Gene2Accession -> ByteString [$sel:rnaNucleotideGi:Gene2Accession] :: Gene2Accession -> ByteString [$sel:proteinAccessionVersion:Gene2Accession] :: Gene2Accession -> ByteString [$sel:proteinGi:Gene2Accession] :: Gene2Accession -> ByteString [$sel:genomicNucleotideAccessionVersion:Gene2Accession] :: Gene2Accession -> ByteString [$sel:genomicNucleotideGi:Gene2Accession] :: Gene2Accession -> ByteString [$sel:startPositionOnTheGenomicAccession:Gene2Accession] :: Gene2Accession -> ByteString [$sel:endPositionOnTheGenomicAccession:Gene2Accession] :: Gene2Accession -> ByteString [$sel:orientation:Gene2Accession] :: Gene2Accession -> ByteString [$sel:assembly:Gene2Accession] :: Gene2Accession -> ByteString [$sel:maturePeptideAccessionVersion:Gene2Accession] :: Gene2Accession -> ByteString [$sel:maturePeptideGi:Gene2Accession] :: Gene2Accession -> ByteString simpleTaxonJSONValue :: Gr SimpleTaxon Double -> Node -> Value instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxonName instance GHC.Show.Show Biobase.Taxonomy.Types.TaxonName instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxCitation instance GHC.Read.Read Biobase.Taxonomy.Types.TaxCitation instance GHC.Show.Show Biobase.Taxonomy.Types.TaxCitation instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxDelNode instance GHC.Read.Read Biobase.Taxonomy.Types.TaxDelNode instance GHC.Show.Show Biobase.Taxonomy.Types.TaxDelNode instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxDivision instance GHC.Read.Read Biobase.Taxonomy.Types.TaxDivision instance GHC.Show.Show Biobase.Taxonomy.Types.TaxDivision instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxGenCode instance GHC.Read.Read Biobase.Taxonomy.Types.TaxGenCode instance GHC.Show.Show Biobase.Taxonomy.Types.TaxGenCode instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxMergedNode instance GHC.Read.Read Biobase.Taxonomy.Types.TaxMergedNode instance GHC.Show.Show Biobase.Taxonomy.Types.TaxMergedNode instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxName instance GHC.Read.Read Biobase.Taxonomy.Types.TaxName instance GHC.Show.Show Biobase.Taxonomy.Types.TaxName instance GHC.Enum.Enum Biobase.Taxonomy.Types.Rank instance GHC.Enum.Bounded Biobase.Taxonomy.Types.Rank instance GHC.Show.Show Biobase.Taxonomy.Types.Rank instance GHC.Classes.Ord Biobase.Taxonomy.Types.Rank instance GHC.Classes.Eq Biobase.Taxonomy.Types.Rank instance GHC.Classes.Eq Biobase.Taxonomy.Types.LineageTaxon instance GHC.Show.Show Biobase.Taxonomy.Types.LineageTaxon instance GHC.Classes.Eq Biobase.Taxonomy.Types.Lineage instance GHC.Show.Show Biobase.Taxonomy.Types.Lineage instance GHC.Classes.Eq Biobase.Taxonomy.Types.Taxon instance GHC.Show.Show Biobase.Taxonomy.Types.Taxon instance GHC.Classes.Eq Biobase.Taxonomy.Types.CompareTaxon instance GHC.Read.Read Biobase.Taxonomy.Types.CompareTaxon instance GHC.Show.Show Biobase.Taxonomy.Types.CompareTaxon instance GHC.Classes.Eq Biobase.Taxonomy.Types.SimpleTaxon instance GHC.Read.Read Biobase.Taxonomy.Types.SimpleTaxon instance GHC.Show.Show Biobase.Taxonomy.Types.SimpleTaxon instance GHC.Classes.Eq Biobase.Taxonomy.Types.TaxNode instance GHC.Read.Read Biobase.Taxonomy.Types.TaxNode instance GHC.Show.Show Biobase.Taxonomy.Types.TaxNode instance GHC.Classes.Eq Biobase.Taxonomy.Types.NCBITaxDump instance GHC.Read.Read Biobase.Taxonomy.Types.NCBITaxDump instance GHC.Show.Show Biobase.Taxonomy.Types.NCBITaxDump instance GHC.Read.Read Biobase.Taxonomy.Types.SimpleGene2Accession instance GHC.Classes.Eq Biobase.Taxonomy.Types.SimpleGene2Accession instance GHC.Show.Show Biobase.Taxonomy.Types.SimpleGene2Accession instance GHC.Read.Read Biobase.Taxonomy.Types.Gene2Accession instance GHC.Classes.Eq Biobase.Taxonomy.Types.Gene2Accession instance GHC.Show.Show Biobase.Taxonomy.Types.Gene2Accession instance Data.Aeson.Types.ToJSON.ToJSON (Data.Graph.Inductive.Tree.Gr Biobase.Taxonomy.Types.SimpleTaxon GHC.Types.Double) instance GHC.Read.Read Biobase.Taxonomy.Types.Rank -- | Functions for parsing, processing and visualization of taxonomy data. -- --

Usage example:

-- -- -- -- module Biobase.Taxonomy.Import -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree with scientific names for each node is -- generated. readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double) -- | parse NCBITaxCitations from input string parseNCBITaxCitations :: String -> Either ParseError [TaxCitation] -- | parse NCBITaxCitations from input filePath readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation]) -- | parse NCBITaxDelNodes from input string parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode] -- | parse NCBITaxDelNodes from input filePath readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode]) -- | parse NCBITaxDivisons from input string parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision] -- | parse NCBITaxDivisons from input filePath readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision]) -- | parse NCBITaxGenCodes from input string parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode] -- | parse NCBITaxGenCodes from input filePath readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode]) -- | parse NCBITaxMergedNodes from input string parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode] -- | parse NCBITaxMergedNodes from input filePath readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode]) -- | parse NCBITaxNames from input string parseNCBITaxNames :: String -> Either ParseError [TaxName] -- | parse NCBITaxNames from input filePath readNCBITaxNames :: String -> IO (Either ParseError [TaxName]) -- | parse NCBITaxNames from input string parseNCBITaxNodes :: String -> Either ParseError TaxNode -- | parse NCBITaxCitations from input filePath readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode]) -- | parse NCBISimpleTaxNames from input string parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon -- | parse NCBITaxCitations from input filePath readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon]) -- | Parse the input as NCBITax datatype readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump) -- | Functions for processing of taxonomy data. module Biobase.Taxonomy.Utils -- | Extract a subtree correpsonding to input node paths to root. Only -- nodes in level number distance to root are included. Used in -- Ids2TreeCompare tool. compareSubTrees :: [Gr SimpleTaxon Double] -> (Int, Gr CompareTaxon Double) -- | Extract a subtree corresponding to input node paths to root. Only -- nodes in level number distance to root are included. Used in Ids2Tree -- tool. extractTaxonomySubTreebyLevel :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. Only -- nodes in level number distance to root are included. Used in Ids2Tree -- tool. extractTaxonomySubTreebyLevelNew :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. If a Rank -- is provided, all node that are less or equal are omitted extractTaxonomySubTreebyRank :: [Node] -> Gr SimpleTaxon Double -> Maybe Rank -> Gr SimpleTaxon Double -- | Returns path between 2 maybe nodes. Used in TreeDistance tool. safeNodePath :: Maybe Node -> Gr SimpleTaxon Double -> Maybe Node -> Either String Path -- | Extract parent node with specified Rank getParentbyRank :: Node -> Gr SimpleTaxon Double -> Maybe Rank -> Maybe (Node, SimpleTaxon) -- | Functions for visualization of taxonomy data. module Biobase.Taxonomy.Visualization -- | Draw tree comparison graph in dot format. Used in Ids2TreeCompare -- tool. drawTaxonomyComparison :: Bool -> (Int, Gr CompareTaxon Double) -> String -- | Draw graph in dot format. Used in Ids2Tree tool. drawTaxonomy :: Bool -> Gr SimpleTaxon Double -> String -- | Write tree representation either as dot or json to provided file path writeTree :: String -> String -> Bool -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as dot to provided file path. Graphviz tools -- like dot can be applied to the written .dot file to generate e.g. -- svg-format images. writeDotTree :: String -> Bool -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as json to provided file path. You can -- visualize the result for example with 3Djs. writeJsonTree :: String -> Gr SimpleTaxon Double -> IO () -- | Functions for parsing, processing and visualization of taxonomy data. module Biobase.Taxonomy