module Data.STAR.ChemShifts(ChemShift(..)
,extractChemShifts
,parse
,extractSequenceFromChemShifts
,showSequenceWithChain)
where
import Prelude hiding(String)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Binary as B
import Data.ByteString.Nums.Careless.Float as F
import Data.ByteString.Nums.Careless.Int as I
import Data.Binary (Binary(..))
import Control.DeepSeq (NFData(..))
import Data.Typeable
import Control.Monad.Trans (lift)
import Control.Arrow ((&&&))
import Data.List (groupBy, nub)
import Data.STAR.Parser (parseFile)
import Data.STAR.Type
import Data.STAR.Path
import Data.STAR.ResidueCodes(toSingleLetterCode)
data ChemShift = ChemShift { cs_id :: !Int,
seq_id :: !Int,
entity_id :: !Int,
comp_id :: !String,
atom_id :: !String,
atom_type :: !String,
isotope :: !Int,
chemshift :: !Float,
sigma :: !Float,
entry_id :: !String
}
deriving (Eq, Ord, Show, Typeable)
extractChemShifts :: STAR -> [ChemShift]
extractChemShifts (STAR l) = concatMap extract' l
where
extract' (Global l) = []
extract' (Data _ l) = concatMap chemShiftFrame l
chemShiftFrame :: STAREntry -> [ChemShift]
chemShiftFrame (Frame name elts) | frameCategory elts == "assigned_chemical_shifts" = concatMap chemShiftLoop elts
where
frameCategory elts = emptyHead $ elts ->// entriesByName "Assigned_chem_shift_list.Sf_category" ./ entryValue
emptyHead [] = ""
emptyHead l = head l
chemShiftFrame _ = []
chemShiftLoop :: STAREntry -> [ChemShift]
chemShiftLoop (Loop elts@((Entry e v:_):_)) | "Atom_chem_shift" `BSC.isPrefixOf` e = map extractChemShift elts
chemShiftLoop _ = []
emptyChemShift :: ChemShift
emptyChemShift = ChemShift { cs_id = 1,
entity_id = 1,
seq_id = 1,
comp_id = "<UNKNOWN COMPOUND>",
atom_id = "<UNKNOWN ID>",
atom_type = "<UNKNOWN TYPE>",
isotope = 1,
chemshift = 999.999,
sigma = 999.999,
entry_id = "<UNKNOWN ENTRY>" }
isFilledChemShift :: ChemShift -> Bool
isFilledChemShift cs = all (\f -> f cs) [is_good cs_id,
is_good seq_id,
is_good comp_id,
is_good atom_type,
is_good isotope,
is_good chemshift,
is_good sigma,
is_good entry_id]
where
is_good :: (Eq a) => (ChemShift -> a) -> ChemShift -> Bool
is_good f cs = f cs /= f emptyChemShift
compose = foldl apply
where
apply e f = f e
extractChemShift :: [STAREntry]
-> ChemShift
extractChemShift entries = if isFilledChemShift entry
then entry
else error $ "Cannot fill entry from: " ++ show entries
where
entryUpdate (Entry "Atom_chem_shift.ID" v) cs = cs { cs_id = I.int v }
entryUpdate (Entry "Atom_chem_shift.Entity_ID" v) cs = cs { entity_id = I.int v }
entryUpdate (Entry "Atom_chem_shift.Seq_ID" v) cs = cs { seq_id = I.int v }
entryUpdate (Entry "Atom_chem_shift.Atom_ID" v) cs = cs { atom_id = v }
entryUpdate (Entry "Atom_chem_shift.Comp_ID" v) cs = cs { comp_id = v }
entryUpdate (Entry "Atom_chem_shift.Atom_type" v) cs = cs { atom_type = v }
entryUpdate (Entry "Atom_chem_shift.Atom_isotope_number" v) cs = cs { isotope = I.int v }
entryUpdate (Entry "Atom_chem_shift.Val" v) cs = cs { chemshift = F.float v }
entryUpdate (Entry "Atom_chem_shift.Val_err" v) cs = cs { sigma = F.float v }
entryUpdate (Entry "Atom_chem_shift.Entry_ID" v) cs = cs { entry_id = v }
entryUpdate _ cs = cs
updates :: [ChemShift -> ChemShift] = map entryUpdate entries
entry :: ChemShift = compose emptyChemShift updates
extractSequenceFromChemShifts :: [ChemShift]
-> [(Int, [Char])]
extractSequenceFromChemShifts = map ( (trd3 . head) &&&
map (seqCode . fst3)) .
groupBy thirdEq .
fillGaps .
nub .
map extract
where
extract cs = (comp_id cs,
seq_id cs,
entity_id cs)
fillGaps ((a,i,e):(b,j,f):rs) | e /= f = (a,i,e):fillGaps ( (b,j,f):rs)
fillGaps ((a,i,e):(b,j,f):rs) | i + 1 >= j = (a,i,e):fillGaps ( (b,j,f):rs)
fillGaps ((a,i,e):(b,j,f):rs) = (a,i,e):fillGaps (("-",i+1,e):(b,j,f):rs)
fillGaps rs = rs
thirdEq (_, _, a) (_, _, b) = a == b
fst3 (a, _, _) = a
trd3 (_, _, c) = c
seqCode x | BSC.length x == 1 && BSC.head x `BSC.elem` "ACGUT-" = BSC.head x
seqCode x = toSingleLetterCode x
showSequenceWithChain :: [Char]
-> (Int, [Char])
-> [Char]
showSequenceWithChain fname (chain, seq) = concat [">",
fname,
":",
show chain,
"\n",
seq]
parse :: [Char]
-> IO (Either [Char] [ChemShift])
parse input = do dat <- Data.STAR.Parser.parseFile input
return $ case dat of
Right parsed -> Right $ extractChemShifts parsed
Left e -> Left e