module Bio.Talos.PhiPsi( parsePhiPsi
, parsePhiPsiFile
, PhiPsi(..) ) where
import Control.Monad.Instances()
import Data.Typeable
import Data.Data
import Data.List(partition)
import Data.Either(partitionEithers)
import qualified Data.ByteString.Char8 as BS
data SS = Alpha
| Beta
| Random
deriving (Eq, Ord, Enum, Typeable, Data)
s `withoutPrefix` p | (r, rest) <- splitAt (length p) s, r == p = Just rest
_ `withoutPrefix` _ = Nothing
instance Read SS where
readsPrec _ s | Just r <- s `withoutPrefix` "random" = [(Random, r)]
readsPrec _ s | Just r <- s `withoutPrefix` "beta" = [(Beta, r)]
readsPrec _ s | Just r <- s `withoutPrefix` "alpha" = [(Alpha, r)]
readsPrec _ s = []
instance Show SS where
showsPrec _ Random = ("random"++)
showsPrec _ Beta = ("beta" ++)
showsPrec _ Alpha = ("alpha" ++)
data PhiPsi = PhiPsi { phi, psi :: Double
, resId :: Int
, resName :: BS.ByteString
, ss :: SS
}
deriving (Eq, Show)
parsePhiPsi :: BS.ByteString -> BS.ByteString -> ( [PhiPsi]
, [String] )
parsePhiPsi fname input = (goodRecords, errors)
where
errors = reports "Expected five columns in PhiPsi record, but found: " recordsOfInvalidLength ++
map withErrorSource recordParseErrors
reports msg ls = map (\x -> withErrorSource $ msg ++ show x) ls
withErrorSource msg = "Error parsing " ++ BS.unpack fname ++ ": " ++ msg
(_comments, maybeRecords) = partition ((=='#') . BS.head) $ BS.lines input
records = filter (/=[]) . map BS.words $ maybeRecords
(recordsOfGoodLength, recordsOfInvalidLength) = partition ((==5) . length) records
(recordParseErrors, goodRecords ) = partitionEithers . map parseRecord $ recordsOfGoodLength
parseRecord [phiStr, psiStr, resIdStr, resNameStr, ssStr] =
do aPhi <- parseCol phiStr
aPsi <- parseCol psiStr
aResId <- parseCol resIdStr
aSS <- parseCol ssStr
return $ PhiPsi aPhi aPsi aResId resNameStr aSS
parseCol :: (Read a, Typeable a) => BS.ByteString -> Either String a
parseCol c = case reads $ BS.unpack c of
((result, []):_) -> return result
l -> let typeHolder = fst $ head l
in colError c typeHolder
colError c t = fail ( "Cannot parse column " ++
(show . BS.unpack) c ++
" as " ++
(show . typeOf) t )
parsePhiPsiFile fname = parsePhiPsi (BS.pack fname) `fmap` BS.readFile fname