{- | Module : ELynx.Import.Tree.NewickSpec Copyright : (c) Dominik Schrempf 2019 License : GPL-3 Maintainer : dominik.schrempf@gmail.com Stability : unstable Portability : portable Creation date: Fri Jan 18 10:14:04 2019. -} module ELynx.Import.Tree.NewickSpec (spec) where import qualified Data.ByteString.Lazy.Char8 as L import Data.Tree import ELynx.Data.Tree.PhyloTree import ELynx.Import.Tree.Newick import Test.Hspec import Test.Hspec.Megaparsec import Text.Megaparsec sampleLabelByteString :: L.ByteString sampleLabelByteString = L.pack "name:0.3" sampleLabel :: PhyloByteStringLabel sampleLabel = PhyloLabel (L.pack "name") Nothing 0.3 sampleForestByteString :: L.ByteString sampleForestByteString = L.pack "(l,l,(a,b))" sampleForest :: [Tree PhyloByteStringLabel] sampleForest = [ Node { rootLabel = PhyloLabel (L.pack "l") Nothing 0 , subForest = [] } , Node { rootLabel = PhyloLabel (L.pack "l") Nothing 0 , subForest = [] } , Node { rootLabel = PhyloLabel (L.pack "") Nothing 0 , subForest = [ Node {rootLabel = PhyloLabel (L.pack "a") Nothing 0, subForest = []} , Node {rootLabel = PhyloLabel (L.pack "b") Nothing 0, subForest = []} ] } ] sampleNewickByteString1 :: L.ByteString sampleNewickByteString1 = L.pack "(Aeropyrum0:0.5478645225,(((((((((Arabidopsi:0.0701001024,Oryza_sati:0.0765988261):0.0309636193,Gymnosperm:0.0520325624):0.0338982245,Physcomitr:0.0768008916):0.0895714685,(Chlamydomo:0.1136227755,Dunaliella:0.1406347323):0.1117340620):0.0818876186,Rhodophyta:0.3405656487):0.0363527066,((((((Babesia_bo:0.1646969208,Theileria0:0.1519889486):0.1908081096,Plasmodium:0.3250696762):0.0637865908,(Toxoplasma:0.1153570425,Eimeria000:0.1671916078):0.0980136930):0.0518956330,Cryptospor:0.3175062809):0.1607708388,Ciliophora:0.5687502950):0.0624078848,(Phytophtho:0.2016424948,((Thalassios:0.1202730781,Phaeodacty:0.1290341329):0.1772775509,Phaeophyce:0.1989260715):0.0312359673):0.1154768302):0.0311952864):0.0149160316,(((((((((Candida_al:0.1027755272,Saccharomy:0.1190206560):0.1333487870,Neurospora:0.1977309079):0.0522926266,Schizosacc:0.2019603227):0.0567441011,(Cryptococc:0.1948614959,Ustilago_m:0.1564451295):0.0775729694):0.0323959951,Glomus_int:0.1573670796):0.0194701292,Chytridiom:0.2228415254):0.0384370601,Encephalit:1.4622174644):0.0416231688,(((Drosophila:0.2160627753,(Mammalians:0.1080484094,Tunicates0:0.1739253014):0.0289624371):0.0346633757,Hydrozoa00:0.2058137032):0.0480963050,Monosiga_b:0.3020637584):0.0654894239):0.0380915725,(Dictyostel:0.3453588998,Mastigamoe:0.3844779231):0.0478795653):0.0129578395):1.7592083381,((Archaeoglo:0.5402784445,Methanococ:0.4088567459):0.0993669265,Pyrococcus:0.4058713829):0.1734405968):0.2193511807,Pyrobaculu:0.7507718047):0.1646616482,Sulfolobus:0.5404967897);" sampleNewickByteString2 :: L.ByteString sampleNewickByteString2 = L.pack "(Caenorhabd:0.0176707431,C0briggsae:0.0142817073,(Ancylostom:0.0711440844,(Pristionch:0.1301309005,((Brugia_mal:0.0757534325,Ascaris0su:0.0482660407)1:0.0563924634,(((Meloidogyn:0.1239621893,Heteroderi:0.0987968800)1:0.1136879428,Strongyloi:0.2483437292)1:0.0252467381,(Trichoceph:0.2985037612,((((((Coleoptera:0.0907850846,(Apis0melli:0.0754058285,Hemiptera0:0.1675359618)0.93:0.0085703192)1:0.0146980945,(Siphonapte:0.0556805916,Bombyx0mor:0.0968983509)1:0.0127867903)1:0.0167360185,((Drosophila:0.0492149086,Glossina0m:0.0534390467)1:0.0583462602,Anopheles0:0.0968919941)1:0.0431343553)1:0.0535616453,Crustacea0:0.2247268999)1:0.0252755187,Chelicerat:0.1537491558)1:0.0212497286,((Echinoderm:0.1803896615,(Cephalocho:0.1492264574,(Urochordat:0.2194747834,(Mammalia00:0.0393008407,Actinopter:0.0491700096):0.0858550024)1:0.0157515969)1:0.0132516777)1:0.0203423736,((((((((Neurospora:0.0721607581,Magnaporth:0.0814182810)1:0.0198940548,Gibberella:0.0858192964)1:0.0533872590,Eurotiomyc:0.1058840539)1:0.1266302603,(Candida0al:0.1349957509,Saccharomy:0.1553464572)1:0.1791344287)1:0.0529664967,Schizosacc:0.2550087905)1:0.0723650615,(Ustilago0m:0.2031812772,(Homobasidi:0.1473391802,Cryptococc:0.2070743149)1:0.0347868586)1:0.0790327507)1:0.0727415175,Glomales00:0.1779430068)1:0.0169066667,Chytridiom:0.3028920870)1:0.3311420273)1:0.0278566156)1:0.1049569161)1:0.1366217350)1:0.0171168289)1:0.0345725378)1:0.0542036935)1:0.0879337167)1;" sampleNewickEmptyByteString :: L.ByteString sampleNewickEmptyByteString = L.pack "(,(,,),);" sampleNewickEmpty :: Tree PhyloByteStringLabel sampleNewickEmpty = Node { rootLabel = PhyloLabel (L.pack "") Nothing 0 , subForest = [ Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = []} , Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = [ Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = []} , Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = []} , Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = []} ] } , Node {rootLabel = PhyloLabel (L.pack "") Nothing 0, subForest = []} ] } spec :: Spec spec = do describe "branchLength" $ do it "parses a colon and a branch length" $ parse branchLength "" (L.pack ":13.2") `shouldParse` 13.2 it "returns 0 if no branch length is given" $ parse branchLength "" (L.pack "") `shouldParse` 0 describe "name" $ do it "parses a string of printable characters" $ parse name "" (L.pack "aName") `shouldParse` L.pack "aName" it "parses blanks, colons, semicolons, parentheses, and sequare brackets" $ parse name "" (L.pack "aName bla") `shouldParse` L.pack "aName" it "allows empty names" $ parse name "" (L.pack "") `shouldParse` L.pack "" describe "node" $ do it "parses a tree node" $ parse node "" sampleLabelByteString `shouldParse` sampleLabel it "parses tree nodes with empty names and branch lengths" $ parse node "" (L.pack "") `shouldParse` PhyloLabel (L.pack "") Nothing 0 describe "leaf" $ it "parses a leaf of a tree" $ parse leaf "" sampleLabelByteString `shouldParse` Node sampleLabel [] describe "forest" $ it "parses a set of trees within brackets" $ parse forest "" sampleForestByteString `shouldParse` sampleForest describe "newick" $ do it "parses a newick trees" $ do parse newick "" `shouldSucceedOn` sampleNewickByteString1 parse newick "" `shouldSucceedOn` sampleNewickByteString2 it "parses a weird newick tree without node labels nor branch lengths" $ parse newick "" sampleNewickEmptyByteString `shouldParse` sampleNewickEmpty