module Bio.Sequence.GOA where
import Data.ByteString.Lazy.Char8 (ByteString,pack,unpack,copy)
import qualified Data.ByteString.Lazy.Char8 as B
readGOA :: FilePath -> IO [Annotation]
readGOA f = B.readFile f >>=
return . map mkAnn . decomment
readGO :: FilePath -> IO [GoDef]
readGO f = B.readFile f >>=
return . map mkGoDef . decomment
decomment :: ByteString -> [ByteString]
decomment = filter (\l -> not (B.null l) && B.head l /= '!') . B.lines
newtype GoTerm = GO Int deriving (Eq,Ord)
type UniProtAcc = ByteString
data GoClass = Func | Proc | Comp
instance Read GoTerm where
readsPrec n ('G':'O':':':xs) = map (\(i,s)-> (GO i,s)) (readsPrec n xs)
readsPrec n e = error ("couldn't parse GO term: "++show e)
instance Show GoTerm where show (GO x) = "GO:"++show x
instance Read GoClass where
readsPrec _ ('F':xs) = [(Func,xs)]
readsPrec _ ('P':xs) = [(Proc,xs)]
readsPrec _ ('C':xs) = [(Comp,xs)]
readsPrec _ _ = []
instance Show GoClass where
show Func = "F"
show Proc = "P"
show Comp = "C"
data Annotation = Ann !UniProtAcc !GoTerm !EvidenceCode deriving (Show)
mkAnn :: ByteString -> Annotation
mkAnn = pick . B.words
where pick (_db:up:rest) = pick' up $ getGo rest
pick' up' (go:_:ev:_) = Ann (copy up') (read $ unpack go) (read $ unpack ev)
getGo = dropWhile (not . B.isPrefixOf (pack "GO:"))
data GoDef = GoDef !GoTerm !ByteString !GoClass deriving (Show)
mkGoDef :: ByteString -> GoDef
mkGoDef = pick . B.split '\t'
where pick [go,desc,cls] = GoDef (read $ unpack go) (copy desc) (read $ unpack cls)
pick _xs = error ("Couldn't decipher GO definition from: "++show _xs)
data EvidenceCode = IC
| IDA
| IEA
| IEP
| IGC
| IGI
| IMP
| IPI
| ISS
| NAS
| ND
| RCA
| TAS
| NR
deriving (Read,Show,Eq)
isCurated :: EvidenceCode -> Bool
isCurated = not . (`elem` [ND,IEA])