module Data.Vector.Split.Internal ( split , Splitter , splitOn , splitOneOf , splitWhen , endBy , oneOf , endByOneOf , wordsBy , linesBy , onSublist , whenElt , dropDelims , keepDelimsL , keepDelimsR , condense , dropInitBlank , dropInnerBlanks , dropFinalBlank , dropBlanks , startsWith , startsWithOneOf , endsWith , endsWithOneOf ) where import Data.Vector.Generic (Vector) import qualified Data.Vector.Generic as V import qualified Data.Vector as BV -- | A delimiter is a list of predicates on elements, matched by some -- contiguous subsequence of a list. newtype Delimiter a = Delimiter (BV.Vector (a -> Bool)) -- | Try to match a delimiter at the start of a list, either failing -- or decomposing the list into the portion which matched the delimiter -- and the remainder. matchDelim :: Vector v a => Delimiter a -> v a -> Maybe (v a, v a) matchDelim (Delimiter ds) xs = if match && lengthOk then Just (V.splitAt (V.length ds) xs) else Nothing where match = V.and $ V.zipWith ($) ds (V.convert xs) lengthOk = V.length ds <= V.length xs data Chunk v a = Delim (v a) | Text (v a) deriving (Show, Eq) type SplitList v a = [Chunk v a] type Splitter v a = v a -> SplitList v a toSplitList :: Vector v a => Delimiter a -> v a -> SplitList v a toSplitList d v = go 0 where go i | i < V.length v = case matchDelim d (V.drop i v) of Just (l,r) -> Text (V.take i v) : Delim l : toSplitList d r Nothing -> go (i+1) | otherwise = [Text v] split :: Vector v a => Splitter v a -> v a -> [v a] split s v = map go $ s v where go (Text t) = t go (Delim d) = d -- Convenience functions -- | Split on the given sublist. Equivalent to split . dropDelims . onSublist. For example: -- -- >>> splitOn (BV.fromList "..") (BV.fromList "a..b...c....d..") -- ["a","b",".c","","d",""] -- -- In some parsing combinator frameworks this is also known as sepBy. -- -- Note that this is the right inverse of the intercalate function from Data.List, that is, -- -- >> \xs -> (intercalate xs . splitOn xs) === id -- -- splitOn x . intercalate x is the identity on certain lists, but it is -- tricky to state the precise conditions under which this holds. (For -- example, it is not enough to say that x does not occur in any elements -- of the input list. Working out why is left as an exercise for the -- reader.) splitOn :: (Vector v a, Eq a) => v a -> v a -> [v a] splitOn xs = split (dropDelims . onSublist xs) -- | Split on any of the given elements. Equivalent to split . dropDelims . oneOf. For example: -- -- >>> splitOneOf (BV.fromList ";.,") (BV.fromList "foo,bar;baz.glurk") -- ["foo","bar","baz","glurk"] splitOneOf :: (Vector v a, Eq a) => v a -> v a -> [v a] splitOneOf xs = split (dropDelims . oneOf xs) -- | Split on elements satisfying the given predicate. Equivalent to split . dropDelims . whenElt. For example: -- -- >>> splitWhen (<0) (BV.fromList [1,3,-4,5,7,-9,0,2]) -- [[1,3],[5,7],[0,2]] splitWhen :: (Vector v a) => (a -> Bool) -> v a -> [v a] splitWhen p = split (dropDelims . whenElt p) -- | Split into chunks terminated by the given subsequence. Equivalent to split . dropFinalBlank . dropDelims . onSublist. For example: -- -- >>> endBy (BV.fromList ";") (BV.fromList "foo;bar;baz;") -- ["foo","bar","baz"] -- -- Note also that the lines function from Data.List is equivalent to endBy "\n". endBy :: (Vector v a, Eq a) => v a -> v a -> [v a] endBy xs = split (dropFinalBlank . dropDelims . onSublist xs) -- Basic Strategies -- | A splitting strategy that splits on any one of the given elements. For example: -- >>> split (oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aa","z","b","x","","y","","z","c","x","d"] oneOf :: (Vector v a, Eq a) => v a -> Splitter v a oneOf xs = toSplitList delim where delim = Delimiter (BV.singleton (`V.elem` xs)) -- | Split into chunks terminated by one of the given elements. Equivalent to split . dropFinalBlank . dropDelims . oneOf. For example: -- -- >>> endByOneOf (BV.fromList ";,") (BV.fromList "foo;bar,baz;") -- ["foo","bar","baz"] endByOneOf :: (Vector v a, Eq a) => v a -> v a -> [v a] endByOneOf xs = split (dropFinalBlank . dropDelims . oneOf xs) -- | Split into "words", with word boundaries indicated by the given predicate. Satisfies words === wordsBy isSpace; equivalent to split . dropBlanks . dropDelims . whenElt. For example: -- -- >>> wordsBy (=='x') (BV.fromList "dogxxxcatxbirdxx") -- ["dog","cat","bird"] wordsBy :: Vector v a => (a -> Bool) -> v a -> [v a] wordsBy p = split (dropBlanks . dropDelims . whenElt p) -- | Split into "lines", with line boundaries indicated by the given predicate. Satisfies lines === linesBy (=='\n'); equivalent to split . dropFinalBlank . dropDelims . whenElt. For example: -- -- >>> linesBy (=='x') (BV.fromList "dogxxxcatxbirdxx") -- ["dog","","","cat","bird",""] linesBy :: Vector v a => (a -> Bool) -> v a -> [v a] linesBy p = split (dropFinalBlank . dropDelims . whenElt p) -- | A splitting strategy that splits on the given list, when it is -- encountered as an exact subsequence. For example: -- -- >>> split (onSublist (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aazb","xyz","cxd"] -- -- Note that splitting on the empty list is not allowed in `vector-split`. -- This is a major difference between `split` and `vector-split`. In any -- case nobody should use `vector-split` to do this anyway. onSublist :: (Vector v a, Eq a) => v a -> Splitter v a onSublist xs = toSplitList delim where delim = Delimiter (V.map (==) . V.convert $ xs) -- | A splitting strategy that splits on any elements that satisfy the given predicate. For example: -- -- >>> split (whenElt (<0)) (BV.fromList [2,4,-3,6,-9,1]) -- [[2,4],[-3],[6],[-9],[1]] whenElt :: (Vector v a) => (a -> Bool) -> Splitter v a whenElt p = toSplitList delim where delim = Delimiter (V.singleton p) -- Strategy Transformers -- | Drop delimiters from the output (the default is to keep them). For example, -- -- >>> split (oneOf (BV.fromList ":")) (BV.fromList "a:b:c") -- ["a",":","b",":","c"] -- >>> split (dropDelims . oneOf (BV.fromList ":")) (BV.fromList "a:b:c") -- ["a","b","c"] dropDelims :: Vector v a => SplitList v a -> SplitList v a dropDelims = filter go where go (Delim _) = False go (Text _ ) = True -- | Keep delimiters in the output by prepending them to adjacent chunks. For example: -- -- >>> split (keepDelimsL . oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aa","zb","x","y","zc","xd"] keepDelimsL :: Vector v a => SplitList v a -> SplitList v a keepDelimsL (Delim d : Text t : rst) = Text (d V.++ t) : keepDelimsL rst keepDelimsL (rs : rst) = rs : keepDelimsL rst keepDelimsL [] = [] -- | Keep delimiters in the output by appending them to adjacent chunks. For example: -- -- >>> split (keepDelimsR . oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aaz","bx","y","z","cx","d"] keepDelimsR :: Vector v a => SplitList v a -> SplitList v a keepDelimsR (Text t : Delim d : rst) = Text (t V.++ d) : keepDelimsR rst keepDelimsR (rs : rst) = rs : keepDelimsR rst keepDelimsR [] = [] -- FIXME really not sure about the delim : text : delim case ... -- this was inserted to be compatible with the split package -- | Condense multiple consecutive delimiters into one. For example: -- -- >>> split (condense . oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aa","z","b","xyz","c","x","d"] -- >>> split (dropDelims . oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aa","b","","","c","d"] -- >>> split (condense . dropDelims . oneOf (BV.fromList "xyz")) (BV.fromList "aazbxyzcxd") -- ["aa","b","c","d"] -- -- FIXME this function is not fully compatible with the Data.List.Split version. condense :: Vector v a => SplitList v a -> SplitList v a condense (Delim a : Delim b : rst) = condense (Delim (a V.++ b) : rst) condense (Delim a : Text t : Delim b : rst) | V.null t = condense (Delim (a V.++ b) : rst) condense (t : rst) = t : condense rst condense [] = [] -- | Don't generate a blank chunk if there is a delimiter at the beginning. For example: -- -- >>> split (oneOf (BV.fromList ":")) (BV.fromList ":a:b") -- ["",":","a",":","b"] -- >>> split (dropInitBlank . oneOf (BV.fromList ":")) (BV.fromList ":a:b") -- [":","a",":","b"] dropInitBlank :: Vector v a => SplitList v a -> SplitList v a dropInitBlank (Text t : rst) | V.null t = rst dropInitBlank rst = rst -- | Don't generate a blank chunk if there is a delimiter at the end. For example: -- -- split (oneOf (BV.fromList ":")) (BV.fromList "a:b:") -- ["a",":","b",":",""] -- split (dropFinalBlank . oneOf (BV.fromList ":")) (BV.fromList "a:b:") -- ["a",":","b",":"] dropFinalBlank :: Vector v a => SplitList v a -> SplitList v a dropFinalBlank [Text t] | V.null t = [] dropFinalBlank (rs : rst) = rs : dropFinalBlank rst dropFinalBlank [] = [] -- | Don't generate blank chunks between consecutive delimiters. For example: -- -- >>> split (oneOf (BV.fromList ":")) (BV.fromList "::b:::a") -- ["",":","",":","b",":","",":","",":","a"] -- >>> split (dropInnerBlanks . oneOf (BV.fromList ":")) (BV.fromList "::b:::a") -- ["",":",":","b",":",":",":","a"] dropInnerBlanks :: Vector v a => SplitList v a -> SplitList v a dropInnerBlanks (Text a : rst) = Text a : dropInnerBlanksGo rst dropInnerBlanks rst = dropInnerBlanksGo rst dropInnerBlanksGo :: Vector v a => [Chunk v a] -> [Chunk v a] dropInnerBlanksGo [Text a] = [Text a] dropInnerBlanksGo (Text a : rst) | V.null a = dropInnerBlanksGo rst dropInnerBlanksGo (chunk : rst) = chunk : dropInnerBlanksGo rst dropInnerBlanksGo [] = [] -- Derived combinators -- | Drop all blank chunks from the output, and condense consecutive -- delimiters into one. -- Equivalent to dropInitBlank . dropFinalBlank . condense. For example: -- -- >>> split (oneOf (BV.fromList ":")) (BV.fromList "::b:::a") -- ["",":","",":","b",":","",":","",":","a"] -- >>> split (dropBlanks . oneOf (BV.fromList ":")) (BV.fromList "::b:::a") -- ["::","b",":::","a"] dropBlanks :: Vector v a => SplitList v a -> SplitList v a dropBlanks = condense . filter go where go (Text t) | V.null t = False go _ = True -- | Make a strategy that splits a list into chunks that all start with the -- given subsequence (except possibly the first). Equivalent to -- dropInitBlank . keepDelimsL . onSublist. For example: -- -- >>> split (startsWith (BV.fromList "app")) (BV.fromList "applyapplicativeapplaudapproachapple") -- ["apply","applicative","applaud","approach","apple"] startsWith :: (Vector v a, Eq a) => v a -> Splitter v a startsWith xs = dropInitBlank . keepDelimsL . onSublist xs -- | Make a strategy that splits a list into chunks that all start with one -- of the given elements (except possibly the first). Equivalent to -- dropInitBlank . keepDelimsL . oneOf. For example: -- -- >>> split (startsWithOneOf (BV.fromList ['A'..'Z'])) (BV.fromList "ACamelCaseIdentifier") -- ["A","Camel","Case","Identifier"] startsWithOneOf :: (Vector v a, Eq a) => v a -> Splitter v a startsWithOneOf xs = dropInitBlank . keepDelimsL . oneOf xs -- | Make a strategy that splits a list into chunks that all end with the -- given subsequence, except possibly the last. Equivalent to -- dropFinalBlank . keepDelimsR . onSublist. For example: -- -- >>> split (endsWith (BV.fromList "ly")) (BV.fromList "happilyslowlygnarlylily") -- ["happily","slowly","gnarly","lily"] endsWith :: (Vector v a, Eq a) => v a -> Splitter v a endsWith xs = dropFinalBlank . keepDelimsR . onSublist xs -- Note: this function is not consistent with the Data.List.Split version -- | Make a strategy that splits a list into chunks that all end with one of -- the given elements, except possibly the last. Equivalent to -- dropFinalBlank . keepDelimsR . oneOf. For example: -- -- >>> split (condense . endsWithOneOf (BV.fromList ".,?! ")) (BV.fromList "Hi, there! How are you?") -- ["Hi, ","there! ","How ","are ","you?"] endsWithOneOf :: (Vector v a, Eq a) => v a -> Splitter v a endsWithOneOf xs = dropFinalBlank . keepDelimsR . oneOf xs