{- Copyright 2008-2010 Mario Blazevic This file is part of the Streaming Component Combinators (SCC) project. The SCC project is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. SCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SCC. If not, see . -} -- | Module "Primitives" defines primitive components of 'Producer', 'Consumer', 'Transducer' and 'Splitter' types, -- defined in the "Types" module. {-# LANGUAGE ScopedTypeVariables, Rank2Types #-} {-# OPTIONS_HADDOCK hide #-} module Control.Concurrent.SCC.Primitives ( -- * I/O components -- ** I/O producers fromFile, fromHandle, fromStdIn, fromBinaryHandle, -- ** I/O consumers appendFile, toFile, toHandle, toStdOut, toBinaryHandle, -- * Generic components fromList, -- ** Generic consumers suppress, erroneous, toList, -- ** Generic transducers parse, unparse, parseSubstring, OccurenceTag, count, toString, -- *** List stream transducers -- | The following laws hold: -- -- * 'group' '>>>' 'concatenate' == 'id' -- -- * 'concatenate' == 'concatSeparate' [] group, concatenate, concatSeparate, -- ** Generic splitters everything, nothing, marked, markedContent, markedWith, contentMarkedWith, one, substring, -- * Character stream components lowercase, uppercase, whitespace, letters, digits, line, nonEmptyLine, ) where import Prelude hiding (appendFile) import Control.Category ((>>>)) import Control.Exception (assert) import Control.Monad (liftM, when, unless) import Control.Monad.Trans.Class (lift) import qualified Control.Monad as Monad import Data.ByteString (ByteString) import Data.Char (isAlpha, isDigit, isPrint, isSpace, toLower, toUpper) import Data.List (delete, isPrefixOf, stripPrefix) import Data.Maybe (fromJust) import qualified Data.ByteString as ByteString import qualified Data.Foldable as Foldable import qualified Data.Sequence as Seq import Data.Sequence (Seq, (|>), (><), ViewL (EmptyL, (:<)), ViewR (EmptyR, (:>))) import Debug.Trace (trace) import System.IO (Handle, IOMode (ReadMode, WriteMode, AppendMode), openFile, hClose, getLine, hGetLine, hPutStr, hFlush, hIsEOF, hClose, putStr, isEOF, stdout) import Control.Cofunctor.Ticker (tickPrefixOf) import Control.Monad.Coroutine import Control.Monad.Coroutine.SuspensionFunctors import Control.Monad.Coroutine.Nested import Control.Concurrent.SCC.Streams import Control.Concurrent.SCC.Types -- | Collects the entire input source into a list. toList :: forall m x. Monad m => Consumer m x [x] toList = Consumer getList -- | Produces the contents of the given list argument. fromList :: forall m x. Monad m => [x] -> Producer m x () fromList l = Producer ((>> return ()) . putList l) -- | Consumer 'toStdOut' copies the given source into the standard output. toStdOut :: Consumer IO Char () toStdOut = Consumer (mapMStreamChunks_ (lift . putStr)) -- | Producer 'fromStdIn' feeds the given sink from the standard input. fromStdIn :: Producer IO Char () fromStdIn = Producer (unmapMStreamChunks_ (lift $ isEOF >>= cond (return []) (liftM (++ "\n") getLine))) -- | Reads the named file and feeds the given sink from its contents. fromFile :: String -> Producer IO Char () fromFile path = Producer $ \sink-> do handle <- lift (openFile path ReadMode) produce (fromHandle handle) sink lift (hClose handle) -- | Feeds the given sink from the open text file /handle/. fromHandle :: Handle -> Producer IO Char () fromHandle handle = Producer (unmapMStreamChunks_ (lift $ hIsEOF handle >>= cond (return []) (liftM (++ "\n") $ hGetLine handle))) -- | Feeds the given sink from the open binary file /handle/. The argument /chunkSize/ determines the size of the chunks -- read from the handle. fromBinaryHandle :: Handle -> Int -> Producer IO ByteString () fromBinaryHandle handle chunkSize = Producer produce where produce sink = lift (ByteString.hGet handle chunkSize) >>= \chunk-> unless (ByteString.null chunk) (tryPut sink chunk >>= flip when (produce sink)) -- | Creates the named text file and writes the entire given source to it. toFile :: String -> Consumer IO Char () toFile path = Consumer $ \source-> do handle <- lift (openFile path WriteMode) consume (toHandle handle) source lift (hClose handle) -- | Appends the given source to the named text file. appendFile :: String -> Consumer IO Char () appendFile path = Consumer $ \source-> do handle <- lift (openFile path AppendMode) consume (toHandle handle) source lift (hClose handle) -- | Copies the given source into the open text file /handle/. toHandle :: Handle -> Consumer IO Char () toHandle handle = Consumer (mapMStreamChunks_ (lift . hPutStr handle)) -- | Copies the given source into the open binary file /handle/. toBinaryHandle :: Handle -> Consumer IO ByteString () toBinaryHandle handle = Consumer (mapMStream_ (lift . ByteString.hPut handle)) -- | Transducer 'unparse' removes all markup from its input and passes the content through. unparse :: forall m x b. Monad m => Transducer m (Markup b x) x unparse = statelessTransducer removeTag where removeTag (Content x) = [x] removeTag _ = [] -- | Transducer 'parse' prepares input content for subsequent parsing. parse :: forall m x y. Monad m => Transducer m x (Markup y x) parse = oneToOneTransducer Content -- | The 'suppress' consumer suppresses all input it receives. It is equivalent to 'substitute' [] suppress :: forall m x y. Monad m => Consumer m x () suppress = Consumer (\(src :: Source m a x)-> pour src (nullSink :: Sink m a x)) -- | The 'erroneous' consumer reports an error if any input reaches it. erroneous :: forall m x. Monad m => String -> Consumer m x () erroneous message = Consumer (getWith (const (error message))) -- | The 'lowercase' transforms all uppercase letters in the input to lowercase, leaving the rest unchanged. lowercase :: forall m. Monad m => Transducer m Char Char lowercase = oneToOneTransducer toLower -- | The 'uppercase' transforms all lowercase letters in the input to uppercase, leaving the rest unchanged. uppercase :: forall m. Monad m => Transducer m Char Char uppercase = oneToOneTransducer toUpper -- | The 'count' transducer counts all its input values and outputs the final tally. count :: forall m x. Monad m => Transducer m x Integer count = Transducer (\source sink-> foldStream (\count _-> succ count) 0 source >>= put sink) -- | Converts each input value @x@ to @show x@. toString :: forall m x. (Monad m, Show x) => Transducer m x String toString = oneToOneTransducer show -- | Transducer 'group' collects all its input values into a single list. group :: forall m x. Monad m => Transducer m x [x] group = Transducer (\source sink-> getList source >>= put sink) -- | Transducer 'concatenate' flattens the input stream of lists of values into the output stream of values. concatenate :: forall m x. Monad m => Transducer m [x] x concatenate = statelessTransducer id -- | Same as 'concatenate' except it inserts the given separator list between every two input lists. concatSeparate :: forall m x. Monad m => [x] -> Transducer m [x] x concatSeparate separator = statefulTransducer (\seen list-> (True, if seen then separator ++ list else list)) False -- | Splitter 'whitespace' feeds all white-space characters into its /true/ sink, all others into /false/. whitespace :: forall m. Monad m => Splitter m Char () whitespace = statelessSplitter isSpace -- | Splitter 'letters' feeds all alphabetical characters into its /true/ sink, all other characters into -- | /false/. letters :: forall m. Monad m => Splitter m Char () letters = statelessSplitter isAlpha -- | Splitter 'digits' feeds all digits into its /true/ sink, all other characters into /false/. digits :: forall m. Monad m => Splitter m Char () digits = statelessSplitter isDigit -- | Splitter 'nonEmptyLine' feeds line-ends into its /false/ sink, and all other characters into /true/. nonEmptyLine :: forall m. Monad m => Splitter m Char () nonEmptyLine = statelessSplitter (\ch-> ch /= '\n' && ch /= '\r') -- | The sectioning splitter 'line' feeds line-ends into its /false/ sink, and line contents into /true/. A single -- line-end can be formed by any of the character sequences \"\\n\", \"\\r\", \"\\r\\n\", or \"\\n\\r\". line :: forall m. Monad m => Splitter m Char () line = Splitter $ \source true false boundaries-> let loop = peek source >>= maybe (return ()) (( >> loop) . line) line c = put boundaries () >> if c == '\r' || c == '\n' then lineEnd c else pourUntil (\x-> x == '\n' || x == '\r') source true >>= maybe (return ()) lineEnd lineEnd '\n' = pourTicked (tickPrefixOf "\n\r") source false lineEnd '\r' = pourTicked (tickPrefixOf "\r\n") source false in loop -- | Splitter 'everything' feeds its entire input into its /true/ sink. everything :: forall m x. Monad m => Splitter m x () everything = Splitter (\source true false edge-> put edge () >> pour source true) -- | Splitter 'nothing' feeds its entire input into its /false/ sink. nothing :: forall m x. Monad m => Splitter m x () nothing = Splitter (\source true false edge-> pour source false) -- | Splitter 'one' feeds all input values to its /true/ sink, treating every value as a separate section. one :: forall m x. Monad m => Splitter m x () one = Splitter (\source true false edge-> mapMStream_ (\x-> put edge () >> put true x) source) -- | Splitter 'marked' passes all marked-up input sections to its /true/ sink, and all unmarked input to its -- /false/ sink. marked :: forall m x y. (Monad m, Eq y) => Splitter m (Markup y x) () marked = markedWith (const True) -- | Splitter 'markedContent' passes the content of all marked-up input sections to its /true/ sink, while the -- outermost tags and all unmarked input go to its /false/ sink. markedContent :: forall m x y. (Monad m, Eq y) => Splitter m (Markup y x) () markedContent = contentMarkedWith (const True) -- | Splitter 'markedWith' passes input sections marked-up with the appropriate tag to its /true/ sink, and the -- rest of the input to its /false/ sink. The argument /select/ determines if the tag is appropriate. markedWith :: forall m x y. (Monad m, Eq y) => (y -> Bool) -> Splitter m (Markup y x) () markedWith select = statefulSplitter transition ([], False) where transition s@([], _) Content{} = (s, False) transition s@(_, truth) Content{} = (s, truth) transition s@([], _) (Markup (Point y)) = (s, select y) transition s@(_, truth) (Markup (Point y)) = (s, truth) transition ([], _) (Markup (Start y)) = (([y], select y), select y) transition (open, truth) (Markup (Start y)) = ((y:open, truth), truth) transition (open, truth) (Markup (End y)) = assert (elem y open) ((delete y open, truth), truth) -- | Splitter 'contentMarkedWith' passes the content of input sections marked-up with the appropriate tag to -- its /true/ sink, and the rest of the input to its /false/ sink. The argument /select/ determines if the tag is -- appropriate. contentMarkedWith :: forall m x y. (Monad m, Eq y) => (y -> Bool) -> Splitter m (Markup y x) () contentMarkedWith select = statefulSplitter transition ([], False) where transition s@(_, truth) Content{} = (s, truth) transition s@(_, truth) (Markup Point{}) = (s, truth) transition ([], _) (Markup (Start y)) = (([y], select y), False) transition (open, truth) (Markup (Start y)) = ((y:open, truth), truth) transition (open, truth) (Markup (End y)) = assert (elem y open) (let open' = delete y open truth' = not (null open') && truth in ((open', truth'), truth')) -- | Used by 'parseSubstring' to distinguish between overlapping substrings. data OccurenceTag = Occurence Int deriving (Eq, Show) instance Enum OccurenceTag where succ (Occurence n) = Occurence (succ n) pred (Occurence n) = Occurence (pred n) toEnum = Occurence fromEnum (Occurence n) = n -- | Performs the same task as the 'substring' splitter, but instead of splitting it outputs the input as @'Markup' x -- 'OccurenceTag'@ in order to distinguish overlapping strings. parseSubstring :: forall m x y. (Monad m, Eq x) => [x] -> Parser m x OccurenceTag parseSubstring [] = Transducer $ \ source sink -> put sink marker >> mapMStream_ (\x-> put sink (Content x) >> put sink marker) source where marker = Markup (Point (toEnum 1)) parseSubstring list = Transducer $ \ source sink -> let findFirst = pourUntil (== head list) source (mapSink Content sink) >>= maybe (return ()) (const test) test = getTicked (tickPrefixOf list) source >>= \prefix-> let Just rest = stripPrefix prefix list head:tail = map Content list in if null rest then put sink (Markup (Start (toEnum 0))) >> put sink head >> fallback 0 (Seq.fromList tail |> Markup (End (toEnum 0))) else getNext 0 rest (Seq.fromList $ map Content prefix) getNext id rest q = get source >>= maybe (flush q) (advance id rest q) advance id rest@(head:tail) q x = let q' = q |> Content x view@(qh@Content{} :< qt) = Seq.viewl q' id' = succ id in if x == head then if null tail then put sink (Markup (Start (toEnum id'))) >> put sink qh >> (fallback id' (qt |> Markup (End (toEnum id')))) else getNext id tail q' else fallback id q' fallback id q = case Seq.viewl q of EmptyL -> findFirst head@(Markup (End id')) :< tail -> put sink head >> fallback (if id == fromEnum id' then 0 else id) tail view@(head@Content{} :< tail) -> case stripPrefix (remainingContent q) list of Just rest -> getNext id rest q Nothing -> put sink head >> fallback id tail flush q = putQueue q sink >> return () remainingContent :: Seq (Markup OccurenceTag x) -> [x] remainingContent q = extractContent (Seq.viewl q) extractContent :: Foldable.Foldable f => f (Markup b x) -> [x] extractContent = Foldable.concatMap (\e-> case e of {Content x -> [x]; _ -> []}) in findFirst -- | Splitter 'substring' feeds to its /true/ sink all input parts that match the contents of the given list -- argument. If two overlapping parts of the input both match the argument, both are sent to /true/ and each is preceded -- by an edge. substring :: forall m x. (Monad m, Eq x) => [x] -> Splitter m x () substring [] = Splitter $ \ source true false edge -> split one source false true edge >> put edge () substring list = Splitter $ \ source true false edge -> let findFirst = pourUntil (== head list) source false >>= maybe (return ()) (const test) test = getTicked (tickPrefixOf list) source >>= \prefix-> let Just rest = stripPrefix prefix list head:tail = list in if null rest then put edge () >> put true head >> fallback (Seq.fromList tail) Seq.empty else getNext rest Seq.empty (Seq.fromList prefix) getNext rest qt qf = get source >>= maybe (putQueue qt true >> putQueue qf false >> return ()) (advance rest qt qf) advance rest@(head:tail) qt qf x = let qf' = qf |> x view@(qqh :< qqt) = Seq.viewl (qt >< qf') in if x == head then if null tail then put edge () >> put true qqh >> fallback qqt Seq.empty else getNext tail qt qf' else fallback qt qf' fallback qt qf = case Seq.viewl (qt >< qf) of EmptyL -> findFirst view@(head :< tail) -> case stripPrefix (Foldable.toList view) list of Just rest -> getNext rest qt qf Nothing -> if Seq.null qt then put false head >> fallback Seq.empty tail else put true head >> fallback (Seq.drop 1 qt) qf in findFirst -- | A utility function wrapping if-then-else, useful for handling monadic truth values cond :: a -> a -> Bool -> a cond x y test = if test then x else y