{-# Language BangPatterns #-} module Bio.Location.Test (tests) where import Control.Monad import Control.Monad.Error import qualified Data.ByteString.Lazy.Char8 as LBS import Data.Int (Int64) import Data.Ix (inRange) import Data.List import Data.Maybe import Test.QuickCheck import qualified Bio.Location.ContigLocation as CLoc import qualified Bio.Location.LocMap as LM import qualified Bio.Location.Location as Loc import qualified Bio.Location.Position as Pos import Bio.Location.OnSeq import qualified Bio.Location.SeqLocation as SeqLoc import qualified Bio.Location.SeqLocMap as SLM import Bio.Location.Strand import Bio.Sequence.SeqData import Bio.Util.TestBase tests :: [Test] tests = [ T "Strand revCompl" test_Strand_revCompl , T "Char revCompl" test_Char_revCompl , T "SeqData revCompl" property_SeqData_revCompl , T "Sequence revCompl" property_Sequence_revCompl , T "Pos revCompl" test_Pos_revCompl , T "Pos seqNt" property_Pos_seqNt , T "Pos seqNtPadded" property_Pos_seqNtPadded , T "Contig revCompl" test_Contig_RevCompl , T "Contig into/outof inversion" property_ContigIntoOutof , T "Contig outof/into inversion" property_ContigOutofInto , T "Contig into based on bounds" test_Contig_IntoBounds , T "Contig outof based on bounds" test_Contig_OutofBounds , T "Contig seqData" property_Contig_seqData , T "Contig seqDataPadded" property_Contig_seqDataPadded , T "Contig extend/revCompl" property_Contig_extendRevCompl , T "Contig fromStartEnd" property_Contig_fromStartEnd , T "Loc revCompl" test_Loc_RevCompl , T "Loc into/outof inversion" property_LocIntoOutof , T "Loc outof/into inversion" property_LocOutofInto , T "Loc outof based on bounds" test_Loc_OutofBounds , T "Loc within" property_Loc_Within , T "Loc seqData" property_Loc_seqData , T "Loc seqDataPadded" property_Loc_seqDataPadded , T "LocMap Within" property_LocMap_Within , T "LocMap Overlaps" property_LocMap_Overlaps , T "SeqLocMap Within" property_SeqLocMap_Within , T "SeqLocMap Overlaps" property_SeqLocMap_Overlaps ] -- Bio.Location.Stranded genNtSeqData :: Int -> Gen SeqData genNtSeqData = liftM LBS.pack . flip replicateM (elements "ACGT") test_revCompl :: (Eq s, Stranded s) => s -> Bool test_revCompl s = (revCompl . revCompl) s == s test_Strand_revCompl :: Strand -> Bool test_Strand_revCompl = test_revCompl test_Char_revCompl :: Char -> Bool test_Char_revCompl = test_revCompl property_SeqData_revCompl :: Property property_SeqData_revCompl = forAll (sized genNtSeqData) test_revCompl property_Sequence_revCompl :: Property property_Sequence_revCompl = forAll arbitrary $ \name -> let mkSeq s = Seq name s Nothing in forAll (sized genNtSeqData) $ \sequ -> ((revcompl . mkSeq) sequ) == ((mkSeq . revCompl) sequ) -- Bio.Location.Position test_Pos_revCompl :: Pos.Pos -> Bool test_Pos_revCompl = test_revCompl property_Pos_seqNt :: Pos.Pos -> Property property_Pos_seqNt pos@(Pos.Pos off str) = let pos = Pos.Pos off str in forAll genPositiveOffset $ \seqlen -> forAll (genNtSeqData $ fromIntegral seqlen) $ \sequ -> let actual = Pos.seqNt sequ pos in if inRange (0, seqlen - 1) off then let fwdNt = LBS.index sequ off in if str == Fwd then actual == Right fwdNt else actual == Right (compl fwdNt) else isLeft actual where isLeft :: Either String Char -> Bool isLeft = either (const True) (const False) property_Pos_seqNtPadded :: Pos.Pos -> Property property_Pos_seqNtPadded pos@(Pos.Pos off str) = forAll genPositiveOffset $ \seqlen -> forAll (genNtSeqData $ fromIntegral seqlen) $ \sequ -> (Pos.seqNt sequ pos `catchError` returnN) == (Right $ Pos.seqNtPadded sequ pos) where returnN :: String -> Either String Char returnN _ = return 'N' -- Bio.Location.ContigLocation instance Arbitrary Strand where arbitrary = elements [Fwd, RevCompl] instance Arbitrary Pos.Pos where arbitrary = liftM2 Pos.Pos genOffset arbitrary instance Arbitrary CLoc.ContigLoc where arbitrary = liftM3 CLoc.ContigLoc genOffset genPositiveOffset arbitrary instance Arbitrary LBS.ByteString where arbitrary = liftM LBS.pack $ arbitrary test_Contig_RevCompl :: CLoc.ContigLoc -> Bool test_Contig_RevCompl = test_revCompl property_ContigIntoOutof :: CLoc.ContigLoc -> Pos.Pos -> Property property_ContigIntoOutof contig pos = let !mInpos = CLoc.posInto pos contig !mOutpos = mInpos >>= flip CLoc.posOutof contig in (isJust mInpos) ==> mOutpos == (Just pos) property_ContigOutofInto :: CLoc.ContigLoc -> Pos.Pos -> Property property_ContigOutofInto contig pos = let !mOutpos = CLoc.posOutof pos contig !mInpos = mOutpos >>= flip CLoc.posInto contig in (isJust mOutpos) ==> mInpos == (Just pos) test_Contig_IntoBounds :: CLoc.ContigLoc -> Pos.Pos -> Bool test_Contig_IntoBounds contig pos = let !mInpos = CLoc.posInto pos contig !offset = Pos.offset pos !(cstart, cend) = CLoc.bounds contig in (isJust mInpos) == (offset >= cstart && offset <= cend) test_Contig_OutofBounds :: CLoc.ContigLoc -> Pos.Pos -> Bool test_Contig_OutofBounds contig pos = let !offset = Pos.offset pos in (isJust $ CLoc.posOutof pos contig) == (offset >= 0 && offset < CLoc.length contig) property_Contig_seqData :: CLoc.ContigLoc -> Property property_Contig_seqData contig = forAll (genNonNegOffset >>= genNtSeqData . fromIntegral) $ \sequ -> let seqData :: Either String SeqData seqData = CLoc.seqData sequ contig padded = CLoc.seqDataPadded sequ contig in case seqData of (Right subsequ) -> and [ padded == subsequ, 'N' `LBS.notElem` padded ] (Left _) -> 'N' `LBS.elem` padded property_Contig_seqDataPadded :: CLoc.ContigLoc -> Property property_Contig_seqDataPadded contig = forAll (genNonNegOffset >>= genNtSeqData . fromIntegral) $ \sequ -> (LBS.pack $ map (Pos.seqNtPadded sequ) contigPoses) == CLoc.seqDataPadded sequ contig where contigPoses = mapMaybe (flip CLoc.posOutof contig . flip Pos.Pos Fwd) [0..(CLoc.length contig - 1)] property_Contig_extendRevCompl :: CLoc.ContigLoc -> Property property_Contig_extendRevCompl contig = forAll (liftM2 (,) genNonNegOffset genNonNegOffset) $ \(ext5, ext3) -> (revCompl $ CLoc.extend (ext5, ext3) contig) == (CLoc.extend (ext3, ext5) $ revCompl contig) property_Contig_fromStartEnd :: CLoc.ContigLoc -> Property property_Contig_fromStartEnd contig = (CLoc.length contig > 1) ==> (CLoc.fromStartEnd (Pos.offset $ CLoc.startPos contig) (Pos.offset $ CLoc.endPos contig)) == contig -- Bio.Location.Location genInvertibleLoc :: Gen Loc.Loc genInvertibleLoc = sized $ \sz -> do ncontigs <- choose (1, sz + 1) fwdloc <- liftM Loc.Loc $ genContigs ncontigs rc <- arbitrary if rc then return $ revCompl fwdloc else return fwdloc where genContigs = liftM (reverse . foldl' intervalsToContigs []) . genIntervals genIntervals nints = replicateM nints $ liftM2 (,) genPositiveOffset genPositiveOffset intervalsToContigs [] (init5, len) = [CLoc.ContigLoc init5 len Fwd] intervalsToContigs prevs@(prev:_) (nextoffset, nextlen) = let !prevend = CLoc.offset5 prev + CLoc.length prev in (CLoc.ContigLoc (prevend + nextoffset) nextlen Fwd):prevs instance Arbitrary Loc.Loc where arbitrary = sized $ \sz -> do nintervals <- choose (1, sz + 1) liftM Loc.Loc $ vector nintervals test_Loc_RevCompl :: Loc.Loc -> Bool test_Loc_RevCompl = test_revCompl property_LocIntoOutof :: Loc.Loc -> Pos.Pos -> Property property_LocIntoOutof loc pos = let !mInpos = Loc.posInto pos loc !mOutpos = mInpos >>= flip Loc.posOutof loc in (isJust mInpos) ==> mOutpos == (Just pos) property_LocOutofInto :: Pos.Pos -> Property property_LocOutofInto pos = forAll genInvertibleLoc $ \loc -> let !mOutpos = Loc.posOutof pos loc !mInpos = mOutpos >>= flip Loc.posInto loc in (isJust mOutpos) ==> mInpos == (Just pos) test_Loc_OutofBounds :: Loc.Loc -> Pos.Pos -> Bool test_Loc_OutofBounds loc pos = let !offset = Pos.offset pos in (isJust $ Loc.posOutof pos loc) == (offset >= 0 && offset < Loc.length loc) property_Loc_seqData :: Loc.Loc -> Property property_Loc_seqData loc = forAll (genNonNegOffset >>= genNtSeqData . fromIntegral) $ \sequ -> let seqData :: Either String SeqData seqData = Loc.seqData sequ loc padded = Loc.seqDataPadded sequ loc in case seqData of (Right subsequ) -> and [ padded == subsequ, 'N' `LBS.notElem` padded ] (Left _) -> 'N' `LBS.elem` padded property_Loc_seqDataPadded :: Loc.Loc -> Property property_Loc_seqDataPadded loc = forAll (genNonNegOffset >>= genNtSeqData . fromIntegral) $ \sequ -> (LBS.pack $ map (Pos.seqNtPadded sequ) locPoses) == Loc.seqDataPadded sequ loc where locPoses = mapMaybe (flip Loc.posOutof loc . flip Pos.Pos Fwd) [0..(Loc.length loc - 1)] property_Loc_Within :: Pos.Pos -> Property property_Loc_Within pos = forAll genInvertibleLoc $ \loc -> (pos `Loc.isWithin` loc) == (maybe False ((/= RevCompl) . Pos.strand) $ Loc.posInto pos loc) -- class Checkable a where addCheck :: a -> a instance Checkable () where addCheck = id instance Checkable Int where addCheck = id instance Checkable Char where addCheck = id instance (Checkable a, Checkable b) => Checkable (a, b) where addCheck (x, y) = (addCheck x, addCheck y) instance (Checkable a) => Checkable [a] where addCheck = map addCheck instance (Checkable a, Checkable b) => Checkable (a -> b) where addCheck f = \x -> (addCheck . f) (addCheck x) instance Checkable (LM.LocMap a) where addCheck x = case LM.checkInvariants x of [] -> x errs -> error $ unlines errs instance Checkable Int64 where addCheck = id instance Checkable Pos.Pos where addCheck = id instance Checkable Loc.Loc where addCheck = id genLocs :: Gen [Loc.Loc] genLocs = sized $ \sz -> choose (0, sz) >>= vector property_LocMap_Within :: Pos.Pos -> Property property_LocMap_Within seqpos = forAll genLocs $ \locs -> forAll genPositiveOffset $ \zonesize -> let !locents = zip locs ['0'..] !locmap = (addCheck LM.fromList) zonesize locents !hits = filter (Loc.isWithin seqpos . fst) locents !maphits = (addCheck LM.lookupWithin) seqpos locmap in -- collect (length hits) $ sort hits == sort maphits property_LocMap_Overlaps :: Loc.Loc -> Property property_LocMap_Overlaps loc = forAll genLocs $ \locs -> forAll genPositiveOffset $ \zonesize -> let !locents = zip locs ['0'..] !locmap = (addCheck LM.fromList) zonesize locents !hits = filter (Loc.overlaps loc . fst) locents !maphits = (addCheck LM.lookupOverlaps) loc locmap in -- collect (length hits) $ sort hits == sort maphits -- genSeqs :: Gen [SeqName] genSeqs = sized $ \sz -> choose (1, sz + 1) >>= vector genSeqLocs :: [SeqName] -> Gen [SeqLoc.SeqLoc] genSeqLocs seqNames = genLocs >>= mapM genSeqLoc where genSeqLoc loc = liftM (flip OnSeq loc) $ elements seqNames property_SeqLocMap_Within = forAll genSeqs $ \seqs -> forAll (genSeqLocs seqs) $ \slocs -> forAll (liftM2 OnSeq (elements seqs) arbitrary) $ \spos -> let !slocents = zip slocs ['0'..] !slocmap = SLM.fromList slocents !hits = filter (andSameSeq Loc.isWithin spos . fst) slocents !maphits = SLM.lookupWithin spos slocmap in -- collect (length hits, length slocents) $ sort hits == sort maphits property_SeqLocMap_Overlaps = forAll genSeqs $ \seqs -> forAll (genSeqLocs seqs) $ \slocs -> forAll (liftM2 OnSeq (elements seqs) arbitrary) $ \sloc -> let !slocents = zip slocs ['0'..] !slocmap = SLM.fromList slocents !hits = filter (andSameSeq Loc.overlaps sloc . fst) slocents !maphits = SLM.lookupOverlaps sloc slocmap in -- collect (length hits, length slocents) $ sort hits == sort maphits