{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Importing clan data is probably never time-critical as the total file size
-- is extremely small. Should this ever change, swap to iteratee. The file is
-- read /strictly/.

module Biobase.Infernal.Clan.Import where

import qualified Data.ByteString.Char8 as BS
import Data.List

import Biobase.Infernal.Clan
import Biobase.Infernal.Types



-- | Import the complete data from an uncompressed source file.

fromFile :: FilePath -> IO [Clan]
fromFile fp = fromByteString `fmap` BS.readFile fp

-- | Transform a bytestring into a list of 'Clan's.

fromByteString :: BS.ByteString -> [Clan]
fromByteString s = map mkClan
                 . groupBy (\x y -> "AC"/=(head . BS.words $y))
                 . BS.lines
                 $ s

-- | Given a list of bytestrings, create one Clan.
--
-- TODO return Maybe, make crash-safe (not really high on the list...)

mkClan :: [BS.ByteString] -> Clan
mkClan xs = Clan
  { cAccession  = ClanAccession . f . BS.drop 2 . (!!1) . BS.words . head . filter ((=="AC") . BS.take 2) $ xs
  , cIdentifier = ClanIdentification . (!!1) . BS.words . head . filter ((=="ID") . BS.take 2) $ xs
  , cMembers    = map (ModelAccession . f . BS.drop 2 . BS.init . (!!1)) . filter ((=="MB") . (!!0)) . map BS.words $ xs
  , cStrings    = xs
  } where
      f s
        | Just (k, _) <- BS.readInt s = k
        | otherwise = error $ "mkClan: " ++ BS.unpack s