{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, FlexibleContexts, GeneralizedNewtypeDeriving, ParallelListComp, TypeFamilies, OverloadedStrings, UndecidableInstances #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Monoid.Lexical.Words
-- Copyright : (c) Edward Kmett 2009
-- License : BSD-style
-- Maintainer : ekmett@gmail.com
-- Stability : experimental
-- Portability : non-portable (MPTCs, OverloadedStrings)
--
-- A simple demonstration of tokenizing a 'Generator' into distinct words
-- and/or lines using a word-parsing 'Monoid' that accumulates partial
-- information about words and then builds up a token stream.
--
-----------------------------------------------------------------------------
module Data.Monoid.Lexical.Words
( module Data.Monoid.Reducer.Char
-- * Words
, Words
, runWords
, Unspaced(runUnspaced)
, wordsFrom
-- * Lines
, Lines
, runLines
, Unlined(runUnlined)
, linesFrom
) where
import Data.String
import Data.Char (isSpace)
import Data.Maybe (maybeToList)
import Data.Monoid.Reducer.Char
import Data.Generator
import Control.Functor.Pointed
-- | A 'CharReducer' transformer that breaks a 'Char' 'Generator' into distinct words, feeding a 'Char' 'Reducer' each line in turn
data Words m = Chunk (Maybe m)
| Segment (Maybe m) [m] (Maybe m)
deriving (Show,Read)
-- | Extract the matched words from the 'Words' 'Monoid'
runWords :: Words m -> [m]
runWords (Chunk m) = maybeToList m
runWords (Segment l m r) = maybeToList l ++ m ++ maybeToList r
instance Monoid m => Monoid (Words m) where
mempty = Chunk mempty
Chunk l `mappend` Chunk r = Chunk (l `mappend` r)
Chunk l `mappend` Segment l' m r = Segment (l `mappend` l') m r
Segment l m r `mappend` Chunk r' = Segment l m (r `mappend` r')
Segment l m r `mappend` Segment l' m' r' = Segment l (m ++ maybeToList (r `mappend` l') ++ m') r'
instance Reducer Char m => Reducer Char (Words m) where
unit c | isSpace c = Segment (Just (unit c)) [] mempty
| otherwise = Chunk (Just (unit c))
instance Functor Words where
fmap f (Chunk m) = Chunk (fmap f m)
fmap f (Segment m ms m') = Segment (fmap f m) (fmap f ms) (fmap f m')
instance (CharReducer m) => CharReducer (Words m) where
invalidChar xs = Segment (Just (invalidChar xs)) [] mempty
instance Reducer Char m => IsString (Words m) where
fromString = reduce
-- | A 'CharReducer' transformer that breaks a 'Char' 'Generator' into distinct lines, feeding a 'Char' 'Reducer' each line in turn.
newtype Lines m = Lines (Words m) deriving (Show,Read,Monoid,Functor)
instance Reducer Char m => Reducer Char (Lines m) where
unit '\n' = Lines $ Segment (Just (unit '\n')) [] mempty
unit c = Lines $ Chunk (Just (unit c))
instance (CharReducer m) => CharReducer (Lines m) where
invalidChar xs = Lines $ Segment (Just (invalidChar xs)) [] mempty
instance Reducer Char m => IsString (Lines m) where
fromString = reduce
-- | Extract the matched lines from the 'Lines' 'Monoid'
runLines :: Lines m -> [m]
runLines (Lines x) = runWords x
-- | A 'CharReducer' transformer that strips out any character matched by `isSpace`
newtype Unspaced m = Unspaced { runUnspaced :: m } deriving (Eq,Ord,Show,Read,Monoid)
instance Reducer Char m => Reducer Char (Unspaced m) where
unit c | isSpace c = mempty
| otherwise = Unspaced (unit c)
instance CharReducer m => CharReducer (Unspaced m) where
invalidChar = Unspaced . invalidChar
instance Functor Unspaced where
fmap f (Unspaced x) = Unspaced (f x)
instance Pointed Unspaced where
point = Unspaced
instance Copointed Unspaced where
extract = runUnspaced
instance Reducer Char m => IsString (Unspaced m) where
fromString = reduce
-- | A 'CharReducer' transformer that strips out newlines
newtype Unlined m = Unlined { runUnlined :: m } deriving (Eq,Ord,Show,Read,Monoid)
instance Reducer Char m => Reducer Char (Unlined m) where
unit '\n' = mempty
unit c = Unlined (unit c)
instance CharReducer m => CharReducer (Unlined m) where
invalidChar = Unlined . invalidChar
instance Functor Unlined where
fmap f (Unlined x) = Unlined (f x)
instance Pointed Unlined where
point = Unlined
instance Copointed Unlined where
extract = runUnlined
instance Reducer Char m => IsString (Unlined m) where
fromString = reduce
-- | Utility function to extract words using accumulator, inside-word, and until-next-word monoids
wordsFrom :: (Generator c, Elem c ~ Char, Char `Reducer` m, Char `Reducer` n, Char `Reducer` o) => m -> c -> [(m,n,o)]
wordsFrom s c = [(x,runUnlined y,z) | x <- scanl mappend s ls | (y,z) <- rs ] where
(ls,rs) = unzip (runWords (mapReduce id c))
-- | Utility function to extract lines using accumulator, inside-line, and until-next-line monoids
linesFrom :: (Generator c, Elem c ~ Char, Char `Reducer` m, Char `Reducer` n, Char `Reducer` o) => m -> c -> [(m,n,o)]
linesFrom s c = [(x,runUnlined y,z) | x <- scanl mappend s ls | (y,z) <- rs ] where
(ls,rs) = unzip (runLines (mapReduce id c))