-- Test for sequence functionality module Bio.Sequence.Test where import Test.QuickCheck import System.IO.Unsafe import Data.Maybe (isJust) import Bio.Sequence.HashWord import Bio.Util.TestBase import Bio.Sequence tests_io :: [Test] -- .........o.........o.........o tests_io = [ T "serializing" prop_serialize , T "serialize qual" prop_serialize_qual , T "serialize fasta+qual" prop_serialize_fastaQual , T "serialize fasta+qual multi" prop_serialize_fastaQual2 , T "serialize fastq" prop_serialize_fastq , T "serialize fastq multi" prop_serialize_fastq_multi ] prop_serialize (E s) = let [s'] = unsafePerformIO (do writeFasta "/tmp/serialize_test" [s] readFasta "/tmp/serialize_test") in s == castToNuc s' prop_serialize_qual (Eq s@(Seq h d q)) = let [(Seq h' d' q')] = unsafePerformIO (do writeQual "/tmp/serialize_qual" [s] readQual "/tmp/serialize_qual") in h == h' && q == q' prop_serialize_fastaQual (Eq s) = let [s'] = unsafePerformIO (do writeFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual" [s] readFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual") in s == castToNuc s' prop_serialize_fastaQual2 :: [ESTq] -> Bool prop_serialize_fastaQual2 es = let ests = map (\(Eq x) -> x) es ests' = unsafePerformIO (do writeFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual" ests readFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual") in ests == map castToNuc ests' prop_serialize_fastq (Eq s) = let [s'] = unsafePerformIO (do writeFastQ "/tmp/serialize_fastq" [s] readFastQ "/tmp/serialize_fastq") in s' == s prop_serialize_fastq_multi :: [ESTq] -> Bool prop_serialize_fastq_multi es = let ests = map (\(Eq x) -> x) es ests' = unsafePerformIO (do writeFastQ "/tmp/serialize_fastq" ests readFastQ "/tmp/serialize_fastq") in ests' == ests -- ---------------------------------------------------------- -- Tests for HashWord tests_hw :: [Test] -- .........o.........o.........o tests_hw = [ T "n2k vs k2n" prop_n2k_k2n , T "contigous_0" prop_contigous_0 , T "prop_rcontig_0" prop_rcontig_0 , T "prop_rcontig_1" prop_rcontig_1 , T "prop_rclast" prop_rclast ] prop_n2k_k2n :: Int -> Bool prop_n2k_k2n i' = let i = abs i' `mod` 65536 in (n2k 8 . k2n 8) i == i -- check that hashes is equal to hash over all indices prop_contigous_0 k (E s) = k > 0 ==> hashes (contigous k) (seqdata s) == let indices = [0..seqlength s-fromIntegral k] in map (\(Just i,j)->(i,j)) $ filter (isJust.fst) $ zipWith (,) (map (hash (contigous k) (seqdata s)) indices) indices -- rcontig is the minimum of hash of each forward word and its reverse complement prop_rcontig_0 k (E s) = k > 0 ==> zipWith min (map fst . hashes (contigous k) . seqdata $ s) (map fst . reverse . hashes (contigous k) . seqdata . revcompl $ s) == (map fst $ hashes (rcontig k) (seqdata s)) -- check that reverse (hashes . reverse) == id prop_rcontig_1 k (E s) = k > 0 ==> (reverse . map fst . hashes (rcontig k) . seqdata . revcompl $ s) == (map fst . hashes (rcontig k) . seqdata $ s) -- remove duplicates, and check key values vs rcontig prop_rcpacked_1 = undefined -- last hash is equal to first hash on revcompl seq. -- see hashcount below prop_rclast k (E s) = k > 0 && (not . null . hs $ s) ==> rcl k s where hs = map fst . hashes (rcontig k) . seqdata -- really only Nuc rcl :: Int -> Sequence Nuc -> Bool rcl k s = ((last . hs $ s) == (head . hs . revcompl $ s)) where hs = map fst . hashes (rcontig k) . seqdata -- benchmarks: todo: time hash generation for contig, rcontig, and gapped (when implemented) bench = [ T "rc hash counts int (8)" (hashcount_int 8) , T "rc hash counts int (16)" (hashcount_int 16) , T "rc hash counts (16)" (hashcount 16) , T "rc hash counts (32)" (hashcount 32) ] hashcount, hashcount_int :: Int -> EST_set -> Property hashcount k es' = k > 0 ==> let ESet es = es' hs :: Sequence Nuc -> [Integer] hs = map fst . hashes (rcontig k) . seqdata in and $ map (\e -> null (hs e) || rcl k e || error (show k ++"\n" ++ toStr (seqdata e))) es hashcount_int k es' = k > 0 ==> let ESet es = es' hs :: Sequence Nuc -> [Int] hs = map fst . hashes (rcontig k) . seqdata in and $ map (\e -> null (hs e) || rcl k e || error (toStr $ seqdata e)) es