{-# LANGUAGE FlexibleContexts, OverloadedStrings #-} module Data.Conduit.TextSpec (spec) where import qualified Data.Conduit.Text as CT import qualified Data.Conduit as C import qualified Data.Conduit.Lift as C import qualified Data.Conduit.List as CL import Test.Hspec import Test.Hspec.QuickCheck import Data.Monoid import Control.Monad.ST import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding.Error as TEE import qualified Data.Text.Lazy.Encoding as TLE import Data.Functor.Identity import Control.Arrow import Control.Applicative import Control.Monad.Trans.Resource import qualified Data.ByteString as S import qualified Data.Text.Lazy as TL import qualified Data.ByteString.Lazy as L import Control.Monad.Trans.Resource (runExceptionT_) spec :: Spec spec = describe "Data.Conduit.Text" $ do describe "text" $ do let go enc tenc tdec cenc = describe enc $ do prop "single chunk" $ \chars -> runST $ runExceptionT_ $ do let tl = TL.pack chars lbs = tenc tl src = CL.sourceList $ L.toChunks lbs ts <- src C.$= CT.decode cenc C.$$ CL.consume return $ TL.fromChunks ts == tl prop "many chunks" $ \chars -> runIdentity $ runExceptionT_ $ do let tl = TL.pack chars lbs = tenc tl src = mconcat $ map (CL.sourceList . return . S.singleton) $ L.unpack lbs ts <- src C.$= CT.decode cenc C.$$ CL.consume return $ TL.fromChunks ts == tl -- Check whether raw bytes are decoded correctly, in -- particular that Text decoding produces an error if -- and only if Conduit does. prop "raw bytes" $ \bytes -> let lbs = L.pack bytes src = CL.sourceList $ L.toChunks lbs etl = runException $ src C.$= CT.decode cenc C.$$ CL.consume tl' = tdec lbs in case etl of (Left _) -> (return $! TL.toStrict tl') `shouldThrow` anyException (Right tl) -> TL.fromChunks tl `shouldBe` tl' prop "encoding" $ \chars -> runIdentity $ runExceptionT_ $ do let tss = map T.pack chars lbs = tenc $ TL.fromChunks tss src = mconcat $ map (CL.sourceList . return) tss bss <- src C.$= CT.encode cenc C.$$ CL.consume return $ L.fromChunks bss == lbs prop "valid then invalid" $ \x y chars -> runIdentity $ runExceptionT_ $ do let tss = map T.pack ([x, y]:chars) ts = T.concat tss lbs = tenc (TL.fromChunks tss) `L.append` "\0\0\0\0\0\0\0" src = mapM_ C.yield $ L.toChunks lbs Just x' <- src C.$$ CT.decode cenc C.=$ C.await return $ x' `T.isPrefixOf` ts go "utf8" TLE.encodeUtf8 TLE.decodeUtf8 CT.utf8 go "utf16_le" TLE.encodeUtf16LE TLE.decodeUtf16LE CT.utf16_le go "utf16_be" TLE.encodeUtf16BE TLE.decodeUtf16BE CT.utf16_be go "utf32_le" TLE.encodeUtf32LE TLE.decodeUtf32LE CT.utf32_le go "utf32_be" TLE.encodeUtf32BE TLE.decodeUtf32BE CT.utf32_be it "mixed utf16 and utf8" $ do let bs = "8\NUL:\NULu\NUL\215\216\217\218" src = C.yield bs C.$= CT.decode CT.utf16_le text <- src C.$$ C.await text `shouldBe` Just "8:u" (src C.$$ CL.sinkNull) `shouldThrow` anyException it "invalid utf8" $ do let bs = S.pack [0..255] src = C.yield bs C.$= CT.decode CT.utf8 text <- src C.$$ C.await text `shouldBe` Just (T.pack $ map toEnum [0..127]) (src C.$$ CL.sinkNull) `shouldThrow` anyException it "catch UTF8 exceptions" $ do let badBS = "this is good\128\128\0that was bad" grabExceptions inner = C.catchC (inner C.=$= CL.map Right) (\e -> C.yield (Left (e :: CT.TextException))) res <- C.yield badBS C.$$ (,) <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume) <*> CL.consume first (map (either (Left . show) Right)) res `shouldBe` ( [ Right "this is good" , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t" ] , ["\128\128\0that was bad"] ) it "catch UTF8 exceptions, pure" $ do let badBS = "this is good\128\128\0that was bad" grabExceptions inner = do res <- C.runCatchC $ inner C.=$= CL.map Right case res of Left e -> C.yield $ Left e Right () -> return () let res = runIdentity $ C.yield badBS C.$$ (,) <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume) <*> CL.consume first (map (either (Left . show) Right)) res `shouldBe` ( [ Right "this is good" , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t" ] , ["\128\128\0that was bad"] ) it "catch UTF8 exceptions, catchExceptionC" $ do let badBS = "this is good\128\128\0that was bad" grabExceptions inner = C.catchCatchC (inner C.=$= CL.map Right) (\e -> C.yield $ Left e) let res = runException_ $ C.yield badBS C.$$ (,) <$> (grabExceptions (CT.decode CT.utf8) C.=$ CL.consume) <*> CL.consume first (map (either (Left . show) Right)) res `shouldBe` ( [ Right "this is good" , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t" ] , ["\128\128\0that was bad"] ) it "catch UTF8 exceptions, catchExceptionC, decodeUtf8" $ do let badBS = "this is good\128\128\0that was bad" grabExceptions inner = C.catchCatchC (inner C.=$= CL.map Right) (\e -> C.yield $ Left e) let res = runException_ $ C.yield badBS C.$$ (,) <$> (grabExceptions CT.decodeUtf8 C.=$ CL.consume) <*> CL.consume first (map (either (Left . show) Right)) res `shouldBe` ( [ Right "this is good" , Left $ show $ CT.NewDecodeException "UTF-8" 12 "\128\128\0t" ] , ["\128\128\0that was bad"] ) prop "lenient UTF8 decoding" $ \good1 good2 -> do let bss = [TE.encodeUtf8 $ T.pack good1, "\128\129\130", TE.encodeUtf8 $ T.pack good2] bs = S.concat bss expected = TE.decodeUtf8With TEE.lenientDecode bs actual = runIdentity $ mapM_ C.yield bss C.$$ CT.decodeUtf8Lenient C.=$ CL.consume T.concat actual `shouldBe` expected describe "text lines" $ do it "yields nothing given nothing" $ (CL.sourceList [] C.$= CT.lines C.$$ CL.consume) == [[]] it "yields nothing given only empty text" $ (CL.sourceList [""] C.$= CT.lines C.$$ CL.consume) == [[]] it "works across split lines" $ (CL.sourceList ["abc", "d\nef"] C.$= CT.lines C.$$ CL.consume) == [["abcd", "ef"]] it "works with multiple lines in an item" $ (CL.sourceList ["ab\ncd\ne"] C.$= CT.lines C.$$ CL.consume) == [["ab", "cd", "e"]] it "works with ending on a newline" $ (CL.sourceList ["ab\n"] C.$= CT.lines C.$$ CL.consume) == [["ab"]] it "works with ending a middle item on a newline" $ (CL.sourceList ["ab\n", "cd\ne"] C.$= CT.lines C.$$ CL.consume) == [["ab", "cd", "e"]] it "works with empty text" $ (CL.sourceList ["ab", "", "cd"] C.$= CT.lines C.$$ CL.consume) == [["abcd"]] it "works with empty lines" $ (CL.sourceList ["\n\n"] C.$= CT.lines C.$$ CL.consume) == [["", ""]] describe "text lines bounded" $ do it "yields nothing given nothing" $ (CL.sourceList [] C.$= CT.linesBounded 80 C.$$ CL.consume) == [[]] it "yields nothing given only empty text" $ (CL.sourceList [""] C.$= CT.linesBounded 80 C.$$ CL.consume) == [[]] it "works across split lines" $ (CL.sourceList ["abc", "d\nef"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["abcd", "ef"]] it "works with multiple lines in an item" $ (CL.sourceList ["ab\ncd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["ab", "cd", "e"]] it "works with ending on a newline" $ (CL.sourceList ["ab\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["ab"]] it "works with ending a middle item on a newline" $ (CL.sourceList ["ab\n", "cd\ne"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["ab", "cd", "e"]] it "works with empty text" $ (CL.sourceList ["ab", "", "cd"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["abcd"]] it "works with empty lines" $ (CL.sourceList ["\n\n"] C.$= CT.linesBounded 80 C.$$ CL.consume) == [["", ""]] it "throws an exception when lines are too long" $ do x <- runExceptionT $ CL.sourceList ["hello\nworld"] C.$$ CT.linesBounded 4 C.=$ CL.consume show x `shouldBe` show (Left $ CT.LengthExceeded 4 :: Either CT.TextException ()) it "works with infinite input" $ do x <- runExceptionT $ CL.sourceList (cycle ["hello"]) C.$$ CT.linesBounded 256 C.=$ CL.consume show x `shouldBe` show (Left $ CT.LengthExceeded 256 :: Either CT.TextException ()) describe "text decode" $ do it' "doesn't throw runtime exceptions" $ do let x = runIdentity $ runExceptionT $ C.yield "\x89\x243" C.$$ CT.decode CT.utf8 C.=$ CL.consume case x of Left _ -> return () Right t -> error $ "This should have failed: " ++ show t it "is not too eager" $ do x <- CL.sourceList ["foobarbaz", error "ignore me"] C.$$ CT.decode CT.utf8 C.=$ CL.head x `shouldBe` Just "foobarbaz" it' :: String -> IO () -> Spec it' = it