-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Libary for parsing, processing and vizualization of taxonomy data -- @package Taxonomy @version 1.0.1 -- | This module contains data structures for taxonomy data module Bio.TaxonomyData -- | SimpleTaxon only contains the most relevant fields of a taxonomy -- entry. For all annotaded fields use the Taxon datatype and its -- associated functions data SimpleTaxon SimpleTaxon :: Int -> ByteString -> Int -> Rank -> SimpleTaxon simpleTaxId :: SimpleTaxon -> Int simpleScientificName :: SimpleTaxon -> ByteString simpleParentTaxId :: SimpleTaxon -> Int simpleRank :: SimpleTaxon -> Rank -- | Datastructure for tree comparisons data CompareTaxon CompareTaxon :: ByteString -> Rank -> [Int] -> CompareTaxon compareScientificName :: CompareTaxon -> ByteString compareRank :: CompareTaxon -> Rank inTree :: CompareTaxon -> [Int] -- | Data structure for Entrez taxonomy fetch result data Taxon Taxon :: Int -> String -> Int -> Rank -> String -> TaxGenCode -> TaxGenCode -> String -> [LineageTaxon] -> String -> String -> String -> Taxon taxonTaxId :: Taxon -> Int taxonScientificName :: Taxon -> String taxonParentTaxId :: Taxon -> Int taxonRank :: Taxon -> Rank division :: Taxon -> String geneticCode :: Taxon -> TaxGenCode mitoGeneticCode :: Taxon -> TaxGenCode lineage :: Taxon -> String lineageEx :: Taxon -> [LineageTaxon] createDate :: Taxon -> String updateDate :: Taxon -> String pubDate :: Taxon -> String data TaxonName TaxonName :: String -> String -> TaxonName classCDE :: TaxonName -> String dispName :: TaxonName -> String -- | Lineage Taxons denote all parent Taxonomy nodes of a node retrieved by -- Entrez fetch data LineageTaxon LineageTaxon :: Int -> String -> Rank -> LineageTaxon lineageTaxId :: LineageTaxon -> Int lineageScienticName :: LineageTaxon -> String lineageRank :: LineageTaxon -> Rank -- | NCBI Taxonomy database dump hierachichal data structure as defined in -- ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_readme.txt data NCBITaxDump NCBITaxDump :: [TaxCitation] -> [TaxDelNode] -> [TaxDivision] -> [TaxGenCode] -> [TaxMergedNode] -> [TaxName] -> [TaxNode] -> NCBITaxDump taxCitations :: NCBITaxDump -> [TaxCitation] taxDelNodes :: NCBITaxDump -> [TaxDelNode] taxDivisions :: NCBITaxDump -> [TaxDivision] taxGenCodes :: NCBITaxDump -> [TaxGenCode] taxMergedNodes :: NCBITaxDump -> [TaxMergedNode] taxNames :: NCBITaxDump -> [TaxName] taxNodes :: NCBITaxDump -> [TaxNode] -- | Datastructure for entries of Taxonomy database dump citations file data TaxCitation TaxCitation :: Int -> Maybe String -> Maybe Int -> Maybe Int -> Maybe String -> Maybe String -> Maybe [Int] -> TaxCitation citId :: TaxCitation -> Int citKey :: TaxCitation -> Maybe String pubmedId :: TaxCitation -> Maybe Int medlineId :: TaxCitation -> Maybe Int url :: TaxCitation -> Maybe String text :: TaxCitation -> Maybe String taxIdList :: TaxCitation -> Maybe [Int] -- | Datastructure for entries of Taxonomy database dump deleted nodes file data TaxDelNode TaxDelNode :: Int -> TaxDelNode delTaxId :: TaxDelNode -> Int -- | Datastructure for entries of Taxonomy database dump division file data TaxDivision TaxDivision :: Int -> String -> String -> Maybe String -> TaxDivision divisionId :: TaxDivision -> Int divisionCDE :: TaxDivision -> String divisonName :: TaxDivision -> String divisionComments :: TaxDivision -> Maybe String -- | Datastructure for entries of Taxonomy database dump gencode file data TaxGenCode TaxGenCode :: Int -> Maybe String -> String -> String -> String -> TaxGenCode geneticCodeId :: TaxGenCode -> Int abbreviation :: TaxGenCode -> Maybe String geneCodeName :: TaxGenCode -> String cde :: TaxGenCode -> String starts :: TaxGenCode -> String -- | Datastructure for entries of Taxonomy database dump mergednodes file data TaxMergedNode TaxMergedNode :: Int -> Int -> TaxMergedNode oldTaxId :: TaxMergedNode -> Int newTaxId :: TaxMergedNode -> Int -- | Datastructure for entries of Taxonomy database dump names file data TaxName TaxName :: Int -> ByteString -> ByteString -> ByteString -> TaxName nameTaxId :: TaxName -> Int nameTxt :: TaxName -> ByteString uniqueName :: TaxName -> ByteString nameClass :: TaxName -> ByteString -- | Taxonomic ranks: NCBI uses the uncommon Speciessubgroup data Rank Norank :: Rank Form :: Rank Variety :: Rank Infraspecies :: Rank Subspecies :: Rank Speciessubgroup :: Rank Species :: Rank Speciesgroup :: Rank Superspecies :: Rank Series :: Rank Section :: Rank Subgenus :: Rank Genus :: Rank Subtribe :: Rank Tribe :: Rank Supertribe :: Rank Subfamily :: Rank Family :: Rank Superfamily :: Rank Parvorder :: Rank Infraorder :: Rank Suborder :: Rank Order :: Rank Superorder :: Rank Magnorder :: Rank Cohort :: Rank Legion :: Rank Parvclass :: Rank Infraclass :: Rank Subclass :: Rank Class :: Rank Superclass :: Rank Microphylum :: Rank Infraphylum :: Rank Subphylum :: Rank Phylum :: Rank Superphylum :: Rank Infrakingdom :: Rank Subkingdom :: Rank Kingdom :: Rank Superkingdom :: Rank Domain :: Rank readsRank :: String -> [(Rank, String)] -- | Datastructure for entries of Taxonomy database dump nodes file data TaxNode TaxNode :: Int -> Int -> Rank -> Maybe String -> String -> Bool -> String -> Bool -> String -> Bool -> Bool -> Bool -> Maybe String -> TaxNode taxId :: TaxNode -> Int parentTaxId :: TaxNode -> Int rank :: TaxNode -> Rank emblCode :: TaxNode -> Maybe String nodeDivisionId :: TaxNode -> String inheritedDivFlag :: TaxNode -> Bool nodeGeneticCodeId :: TaxNode -> String inheritedGCFlag :: TaxNode -> Bool mitochondrialGeneticCodeId :: TaxNode -> String inheritedMGCFlag :: TaxNode -> Bool genBankHiddenFlag :: TaxNode -> Bool hiddenSubtreeRootFlag :: TaxNode -> Bool nodeComments :: TaxNode -> Maybe String -- | Simple Gene2Accession table data SimpleGene2Accession SimpleGene2Accession :: Int -> String -> SimpleGene2Accession simpleTaxIdEntry :: SimpleGene2Accession -> Int simpleGenomicNucleotideAccessionVersion :: SimpleGene2Accession -> String -- | Datastructure for Gene2Accession table data Gene2Accession Gene2Accession :: Int -> Int -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> Gene2Accession taxIdEntry :: Gene2Accession -> Int geneID :: Gene2Accession -> Int status :: Gene2Accession -> String rnaNucleotideAccessionVersion :: Gene2Accession -> String rnaNucleotideGi :: Gene2Accession -> String proteinAccessionVersion :: Gene2Accession -> String proteinGi :: Gene2Accession -> String genomicNucleotideAccessionVersion :: Gene2Accession -> String genomicNucleotideGi :: Gene2Accession -> String startPositionOnTheGenomicAccession :: Gene2Accession -> String endPositionOnTheGenomicAccession :: Gene2Accession -> String orientation :: Gene2Accession -> String assembly :: Gene2Accession -> String maturePeptideAccessionVersion :: Gene2Accession -> String maturePeptideGi :: Gene2Accession -> String simpleTaxonJSONValue :: Gr SimpleTaxon Double -> Node -> Value instance Show TaxonName instance Eq TaxonName instance Show TaxCitation instance Read TaxCitation instance Eq TaxCitation instance Show TaxDelNode instance Read TaxDelNode instance Eq TaxDelNode instance Show TaxDivision instance Read TaxDivision instance Eq TaxDivision instance Show TaxGenCode instance Read TaxGenCode instance Eq TaxGenCode instance Show TaxMergedNode instance Read TaxMergedNode instance Eq TaxMergedNode instance Show TaxName instance Read TaxName instance Eq TaxName instance Eq Rank instance Ord Rank instance Show Rank instance Bounded Rank instance Enum Rank instance Show LineageTaxon instance Eq LineageTaxon instance Show Taxon instance Eq Taxon instance Show CompareTaxon instance Read CompareTaxon instance Eq CompareTaxon instance Show SimpleTaxon instance Read SimpleTaxon instance Eq SimpleTaxon instance Show TaxNode instance Read TaxNode instance Eq TaxNode instance Show NCBITaxDump instance Read NCBITaxDump instance Eq NCBITaxDump instance Show SimpleGene2Accession instance Eq SimpleGene2Accession instance Read SimpleGene2Accession instance Show Gene2Accession instance Eq Gene2Accession instance Read Gene2Accession instance ToJSON (Gr SimpleTaxon Double) instance Read Rank -- | Functions for parsing, processing and visualization of taxonomy data. -- --

Usage example:

-- -- -- -- module Bio.Taxonomy -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree with scientific names for each node is -- generated. readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double)) -- | NCBI taxonomy dump nodes and names in the input directory path are -- parsed and a SimpleTaxon tree is generated. parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double) -- | parse NCBITaxCitations from input string parseNCBITaxCitations :: String -> Either ParseError [TaxCitation] -- | parse NCBITaxCitations from input filePath readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation]) -- | parse NCBITaxDelNodes from input string parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode] -- | parse NCBITaxDelNodes from input filePath readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode]) -- | parse NCBITaxDivisons from input string parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision] -- | parse NCBITaxDivisons from input filePath readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision]) -- | parse NCBITaxGenCodes from input string parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode] -- | parse NCBITaxGenCodes from input filePath readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode]) -- | parse NCBITaxMergedNodes from input string parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode] -- | parse NCBITaxMergedNodes from input filePath readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode]) -- | parse NCBITaxNames from input string parseNCBITaxNames :: String -> Either ParseError [TaxName] -- | parse NCBITaxNames from input filePath readNCBITaxNames :: String -> IO (Either ParseError [TaxName]) -- | parse NCBITaxNames from input string parseNCBITaxNodes :: String -> Either ParseError TaxNode -- | parse NCBITaxCitations from input filePath readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode]) -- | parse NCBISimpleTaxNames from input string parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon -- | parse NCBITaxCitations from input filePath readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon]) -- | Parse the input as NCBITax datatype readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump) -- | Extract a subtree correpsonding to input node paths to root. Only -- nodes in level number distance to root are included. Used in -- Ids2TreeCompare tool. compareSubTrees :: [Gr SimpleTaxon Double] -> (Int, Gr CompareTaxon Double) -- | Extract a subtree corresponding to input node paths to root. Only -- nodes in level number distance to root are included. Used in Ids2Tree -- tool. extractTaxonomySubTreebyLevel :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double -- | Extract a subtree corresponding to input node paths to root. If a Rank -- is provided, all node that are less or equal are omitted extractTaxonomySubTreebyRank :: [Node] -> Gr SimpleTaxon Double -> Maybe Rank -> Gr SimpleTaxon Double -- | Returns path between 2 maybe nodes. Used in TreeDistance tool. safeNodePath :: Maybe Node -> Gr SimpleTaxon Double -> Maybe Node -> Either String [Node] -- | Extract parent node with specified Rank getParentbyRank :: Node -> Gr SimpleTaxon Double -> Maybe Rank -> Maybe (Node, SimpleTaxon) -- | Draw tree comparison graph in dot format. Used in Ids2TreeCompare -- tool. drawTreeComparison :: (Int, Gr CompareTaxon Double) -> String -- | Draw graph in dot format. Used in Ids2Tree tool. drawTaxonomy :: Gr SimpleTaxon Double -> String -- | Write tree representation either as dot or json to provided file path writeTree :: String -> String -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as dot to provided file path. Graphviz tools -- like dot can be applied to the written .dot file to generate e.g. -- svg-format images. writeDotTree :: String -> Gr SimpleTaxon Double -> IO () -- | Write tree representation as json to provided file path. You can -- visualize the result for example with 3Djs. writeJsonTree :: String -> Gr SimpleTaxon Double -> IO ()