{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, FlexibleContexts, GeneralizedNewtypeDeriving, ParallelListComp, TypeFamilies, OverloadedStrings, UndecidableInstances #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Monoid.Lexical.Words -- Copyright : (c) Edward Kmett 2009 -- License : BSD-style -- Maintainer : ekmett@gmail.com -- Stability : experimental -- Portability : non-portable (MPTCs, OverloadedStrings) -- -- A simple demonstration of tokenizing a 'Generator' into distinct words -- and/or lines using a word-parsing 'Monoid' that accumulates partial -- information about words and then builds up a token stream. -- ----------------------------------------------------------------------------- module Data.Monoid.Lexical.Words ( module Data.Monoid.Reducer.Char -- * Words , Words , runWords , Unspaced(runUnspaced) , wordsFrom -- * Lines , Lines , runLines , Unlined(runUnlined) , linesFrom ) where import Data.String import Data.Char (isSpace) import Data.Maybe (maybeToList) import Data.Monoid.Reducer.Char import Data.Monoid.Generator import Control.Functor.Pointed -- | A 'CharReducer' transformer that breaks a 'Char' 'Generator' into distinct words, feeding a 'Char' 'Reducer' each line in turn data Words m = Chunk (Maybe m) | Segment (Maybe m) [m] (Maybe m) deriving (Show,Read) -- | Extract the matched words from the 'Words' 'Monoid' runWords :: Words m -> [m] runWords (Chunk m) = maybeToList m runWords (Segment l m r) = maybeToList l ++ m ++ maybeToList r instance Monoid m => Monoid (Words m) where mempty = Chunk mempty Chunk l `mappend` Chunk r = Chunk (l `mappend` r) Chunk l `mappend` Segment l' m r = Segment (l `mappend` l') m r Segment l m r `mappend` Chunk r' = Segment l m (r `mappend` r') Segment l m r `mappend` Segment l' m' r' = Segment l (m ++ maybeToList (r `mappend` l') ++ m') r' instance Reducer Char m => Reducer Char (Words m) where unit c | isSpace c = Segment (Just (unit c)) [] mempty | otherwise = Chunk (Just (unit c)) instance Functor Words where fmap f (Chunk m) = Chunk (fmap f m) fmap f (Segment m ms m') = Segment (fmap f m) (fmap f ms) (fmap f m') instance (CharReducer m) => CharReducer (Words m) where invalidChar xs = Segment (Just (invalidChar xs)) [] mempty instance Reducer Char m => IsString (Words m) where fromString = reduce -- | A 'CharReducer' transformer that breaks a 'Char' 'Generator' into distinct lines, feeding a 'Char' 'Reducer' each line in turn. newtype Lines m = Lines (Words m) deriving (Show,Read,Monoid,Functor) instance Reducer Char m => Reducer Char (Lines m) where unit '\n' = Lines $ Segment (Just (unit '\n')) [] mempty unit c = Lines $ Chunk (Just (unit c)) instance (CharReducer m) => CharReducer (Lines m) where invalidChar xs = Lines $ Segment (Just (invalidChar xs)) [] mempty instance Reducer Char m => IsString (Lines m) where fromString = reduce -- | Extract the matched lines from the 'Lines' 'Monoid' runLines :: Lines m -> [m] runLines (Lines x) = runWords x -- | A 'CharReducer' transformer that strips out any character matched by `isSpace` newtype Unspaced m = Unspaced { runUnspaced :: m } deriving (Eq,Ord,Show,Read,Monoid) instance Reducer Char m => Reducer Char (Unspaced m) where unit c | isSpace c = mempty | otherwise = Unspaced (unit c) instance CharReducer m => CharReducer (Unspaced m) where invalidChar = Unspaced . invalidChar instance Functor Unspaced where fmap f (Unspaced x) = Unspaced (f x) instance Pointed Unspaced where point = Unspaced instance Copointed Unspaced where extract = runUnspaced instance Reducer Char m => IsString (Unspaced m) where fromString = reduce -- | A 'CharReducer' transformer that strips out newlines newtype Unlined m = Unlined { runUnlined :: m } deriving (Eq,Ord,Show,Read,Monoid) instance Reducer Char m => Reducer Char (Unlined m) where unit '\n' = mempty unit c = Unlined (unit c) instance CharReducer m => CharReducer (Unlined m) where invalidChar = Unlined . invalidChar instance Functor Unlined where fmap f (Unlined x) = Unlined (f x) instance Pointed Unlined where point = Unlined instance Copointed Unlined where extract = runUnlined instance Reducer Char m => IsString (Unlined m) where fromString = reduce -- | Utility function to extract words using accumulator, inside-word, and until-next-word monoids wordsFrom :: (Generator c, Elem c ~ Char, Char `Reducer` m, Char `Reducer` n, Char `Reducer` o) => m -> c -> [(m,n,o)] wordsFrom s c = [(x,runUnlined y,z) | x <- scanl mappend s ls | (y,z) <- rs ] where (ls,rs) = unzip (runWords (mapReduce id c)) -- | Utility function to extract lines using accumulator, inside-line, and until-next-line monoids linesFrom :: (Generator c, Elem c ~ Char, Char `Reducer` m, Char `Reducer` n, Char `Reducer` o) => m -> c -> [(m,n,o)] linesFrom s c = [(x,runUnlined y,z) | x <- scanl mappend s ls | (y,z) <- rs ] where (ls,rs) = unzip (runLines (mapReduce id c))