-- |
--
-- NOTE We need to make sure to be compatible to everything here:
-- @http://evolution.genetics.washington.edu/phylip/newicktree.html@ In
-- particular, we do not do any conversion from @_@ to @(space)@ right now.

module Biobase.Newick.Import where

import           Control.Applicative
import           Data.Attoparsec.Text (Parser,(<?>))
import           Data.Char (isAlpha)
import           Data.Text (Text)
import           Data.Tree
import qualified Data.Attoparsec.Text as A
import qualified Data.Text as T
import qualified Data.Text.IO as T

import Biobase.Newick.Types



type PNT = Parser (Tree Info)

newicksFromFile :: FilePath -> IO (Either String [NewickTree])
newicksFromFile f = newicksFromText <$> T.readFile f

newicksFromText :: Text -> Either String [NewickTree]
newicksFromText = A.parseOnly manyNewick

manyNewick = some (newick <* A.skipSpace) <* A.endOfInput

newick :: Parser NewickTree
newick = NewickTree <$> tree <* ";" <?> "newick"

tree :: PNT
tree = A.skipSpace *> (branched <|> leaf) <?> "tree"

branched :: PNT
branched = flip Node <$ "(" <*> tree `A.sepBy1` "," <* ")" <*> info <?> "branched"

leaf :: PNT
leaf = (flip Node []) <$> info <?> "leaf"

info :: Parser Info
info = Info <$> name <*> plength <?> "info"

-- |
--
-- NOTE http://evolution.genetics.washington.edu/phylip/newicktree.html
--
-- A name can be any string of printable characters except blanks, colons,
-- semicolons, parentheses, and square brackets.
--
-- I am excluding commas as well.

name :: Parser Text
name = A.takeWhile accept <?> "name"
  where accept a = not $ A.isHorizontalSpace a || A.inClass ",:;()[]" a -- isAlpha a || A.inClass "_." a || A.isHorizontalSpace a

plength :: Parser Double
plength = ":" *> A.double <|> pure 0 <?> "plength"


{-
test = mapM_ (\x -> (either error (putStr . drawTree . fmap show . head . map getNewickTree) $ A.parseOnly manyNewick x) >> putStrLn "\n\n\n")
        [ "B:0.2;"
        , "A;"
        , ";"
        , "(,);"
        , "((raccoon:19.19959,bear:6.80041):0.84600,((sea_lion:11.99700, seal:12.00300):7.52973,((monkey:100.85930,cat:47.14069):20.59201, weasel:18.87953):2.09460):3.87382,dog:25.46154);"
        , "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460):0.10;"
        , "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);"
        , "A;"
        , "((A,B),(C,D));"
        , "(Alpha,Beta,Gamma,Delta,,Epsilon,,,);"
        ]
-}