{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, DeriveDataTypeable, PatternGuards #-} module Bio.Talos.PhiPsi( parsePhiPsi , parsePhiPsiFile , PhiPsi(..) ) where import Control.Monad.Instances() import Data.Typeable import Data.Data import Data.List(partition) import Data.Either(partitionEithers) import qualified Data.ByteString.Char8 as BS data SS = Alpha | Beta | Random deriving (Eq, Ord, Enum, Typeable, Data) s `withoutPrefix` p | (r, rest) <- splitAt (length p) s, r == p = Just rest _ `withoutPrefix` _ = Nothing instance Read SS where readsPrec _ s | Just r <- s `withoutPrefix` "random" = [(Random, r)] readsPrec _ s | Just r <- s `withoutPrefix` "beta" = [(Beta, r)] readsPrec _ s | Just r <- s `withoutPrefix` "alpha" = [(Alpha, r)] readsPrec _ s = [] instance Show SS where showsPrec _ Random = ("random"++) showsPrec _ Beta = ("beta" ++) showsPrec _ Alpha = ("alpha" ++) data PhiPsi = PhiPsi { phi, psi :: Double , resId :: Int , resName :: BS.ByteString , ss :: SS } deriving (Eq, Show) -- add read later... parsePhiPsi :: BS.ByteString -> BS.ByteString -> ( [PhiPsi] , [String] ) parsePhiPsi fname input = (goodRecords, errors) where -- error handling: errors = reports "Expected five columns in PhiPsi record, but found: " recordsOfInvalidLength ++ map withErrorSource recordParseErrors reports msg ls = map (\x -> withErrorSource $ msg ++ show x) ls withErrorSource msg = "Error parsing " ++ BS.unpack fname ++ ": " ++ msg -- filter out comments (_comments, maybeRecords) = partition ((=='#') . BS.head) $ BS.lines input -- filter out empty lines records = filter (/=[]) . map BS.words $ maybeRecords -- filter out records of bad length (recordsOfGoodLength, recordsOfInvalidLength) = partition ((==5) . length) records -- split off parse errors (recordParseErrors, goodRecords ) = partitionEithers . map parseRecord $ recordsOfGoodLength parseRecord [phiStr, psiStr, resIdStr, resNameStr, ssStr] = do aPhi <- parseCol phiStr aPsi <- parseCol psiStr aResId <- parseCol resIdStr aSS <- parseCol ssStr return $ PhiPsi aPhi aPsi aResId resNameStr aSS parseCol :: (Read a, Typeable a) => BS.ByteString -> Either String a parseCol c = case reads $ BS.unpack c of ((result, []):_) -> return result l -> let typeHolder = fst $ head l in colError c typeHolder colError c t = fail ( "Cannot parse column " ++ (show . BS.unpack) c ++ " as " ++ (show . typeOf) t ) -- t is never evaluated - just provides the type. parsePhiPsiFile fname = parsePhiPsi (BS.pack fname) `fmap` BS.readFile fname