turtle-1.2.8: Shell programming, Haskell-style

Safe HaskellNone
LanguageHaskell2010

Turtle.Pattern

Contents

Description

Use this module to either:

  • match Text with light-weight backtracking patterns, or:
  • parse structured values from Text.

Example usage:

>>> :set -XOverloadedStrings
>>> match ("can" <|> "cat") "cat"
["cat"]
>>> match ("can" <|> "cat") "dog"
[]
>>> match (decimal `sepBy` ",") "1,2,3"
[[1,2,3]]

This pattern has unlimited backtracking, and will return as many solutions as possible:

>>> match (prefix (star anyChar)) "123"
["123","12","1",""]

Use do notation to structure more complex patterns:

>>> :{
let bit = ("0" *> pure False) <|> ("1" *> pure True) :: Pattern Bool;
    portableBitMap = do
        { "P1"
        ; width  <- spaces1 *> decimal
        ; height <- spaces1 *> decimal
        ; count width (count height (spaces1 *> bit))
        };
in  match (prefix portableBitMap) "P1\n2 2\n0 0\n1 0\n"
:}
[[[False,False],[True,False]]]

Synopsis

Pattern

data Pattern a Source

A fully backtracking pattern that parses an 'a' from some Text

Instances

Monad Pattern Source 
Functor Pattern Source 
Applicative Pattern Source 
Alternative Pattern Source 
MonadPlus Pattern Source 
Monoid a => Num (Pattern a) Source

Pattern forms a semiring, this is the closest approximation

(~) * a Text => IsString (Pattern a) Source 
Monoid a => Monoid (Pattern a) Source 

match :: Pattern a -> Text -> [a] Source

Match a Pattern against a Text input, returning all possible solutions

The Pattern must match the entire Text

Primitive patterns

anyChar :: Pattern Char Source

Match any character

>>> match anyChar "1"
"1"
>>> match anyChar ""
""

eof :: Pattern () Source

Matches the end of input

>>> match eof "1"
[]
>>> match eof ""
[()]

Character patterns

dot :: Pattern Char Source

Synonym for anyChar

satisfy :: (Char -> Bool) -> Pattern Char Source

Match any character that satisfies the given predicate

>>> match (satisfy (== '1')) "1"
"1"
>>> match (satisfy (== '2')) "1"
""

char :: Char -> Pattern Char Source

Match a specific character

>>> match (char '1') "1"
"1"
>>> match (char '2') "1"
""

notChar :: Char -> Pattern Char Source

Match any character except the given one

>>> match (notChar '2') "1"
"1"
>>> match (notChar '1') "1"
""

text :: Text -> Pattern Text Source

Match a specific string

>>> match (text "123") "123"
["123"]

You can also omit the text function if you enable the OverloadedStrings extension:

>>> match "123" "123"
["123"]

asciiCI :: Text -> Pattern Text Source

Match a specific string in a case-insensitive way

This only handles ASCII strings

>>> match (asciiCI "abc") "ABC"
["ABC"]

oneOf :: [Char] -> Pattern Char Source

Match any one of the given characters

>>> match (oneOf "1a") "1"
"1"
>>> match (oneOf "2a") "1"
""

noneOf :: [Char] -> Pattern Char Source

Match anything other than the given characters

>>> match (noneOf "2a") "1"
"1"
>>> match (noneOf "1a") "1"
""

space :: Pattern Char Source

Match a whitespace character

>>> match space " "
" "
>>> match space "1"
""

spaces :: Pattern Text Source

Match zero or more whitespace characters

>>> match spaces "  "
["  "]
>>> match spaces ""
[""]

spaces1 :: Pattern Text Source

Match one or more whitespace characters

>>> match spaces1 "  "
["  "]
>>> match spaces1 ""
[]

tab :: Pattern Char Source

Match the tab character ('t')

>>> match tab "\t"
"\t"
>>> match tab " "
""

newline :: Pattern Char Source

Match the newline character ('n')

>>> match newline "\n"
"\n"
>>> match newline " "
""

crlf :: Pattern Text Source

Matches a carriage return ('r') followed by a newline ('n')

>>> match crlf "\r\n"
["\r\n"]
>>> match crlf "\n\r"
[]

upper :: Pattern Char Source

Match an uppercase letter

>>> match upper "A"
"A"
>>> match upper "a"
""

lower :: Pattern Char Source

Match a lowercase letter

>>> match lower "a"
"a"
>>> match lower "A"
""

alphaNum :: Pattern Char Source

Match a letter or digit

>>> match alphaNum "1"
"1"
>>> match alphaNum "a"
"a"
>>> match alphaNum "A"
"A"
>>> match alphaNum "."
""

letter :: Pattern Char Source

Match a letter

>>> match letter "A"
"A"
>>> match letter "a"
"a"
>>> match letter "1"
""

digit :: Pattern Char Source

Match a digit

>>> match digit "1"
"1"
>>> match digit "a"
""

hexDigit :: Pattern Char Source

Match a hexadecimal digit

>>> match hexDigit "1"
"1"
>>> match hexDigit "A"
"A"
>>> match hexDigit "a"
"a"
>>> match hexDigit "g"
""

octDigit :: Pattern Char Source

Match an octal digit

>>> match octDigit "1"
"1"
>>> match octDigit "9"
""

Numbers

decimal :: Num n => Pattern n Source

Match an unsigned decimal number

>>> match decimal  "123"
[123]
>>> match decimal "-123"
[]

signed :: Num a => Pattern a -> Pattern a Source

Transform a numeric parser to accept an optional leading '+' or '-' sign

>>> match (signed decimal) "+123"
[123]
>>> match (signed decimal) "-123"
[-123]
>>> match (signed decimal)  "123"
[123]

Combinators

prefix :: Pattern a -> Pattern a Source

Use this to match the prefix of a string

>>> match         "A"  "ABC"
[]
>>> match (prefix "A") "ABC"
["A"]

suffix :: Pattern a -> Pattern a Source

Use this to match the suffix of a string

>>> match         "C"  "ABC"
[]
>>> match (suffix "C") "ABC"
["C"]

has :: Pattern a -> Pattern a Source

Use this to match the interior of a string

>>> match      "B"  "ABC"
[]
>>> match (has "B") "ABC"
["B"]

begins :: Pattern Text -> Pattern Text Source

Match the entire string if it begins with the given pattern

This returns the entire string, not just the matched prefix

>>> match (begins  "A"             ) "ABC"
["ABC"]
>>> match (begins ("A" *> pure "1")) "ABC"
["1BC"]

ends :: Pattern Text -> Pattern Text Source

Match the entire string if it ends with the given pattern

This returns the entire string, not just the matched prefix

>>> match (ends  "C"             ) "ABC"
["ABC"]
>>> match (ends ("C" *> pure "1")) "ABC"
["AB1"]

contains :: Pattern Text -> Pattern Text Source

Match the entire string if it contains the given pattern

This returns the entire string, not just the interior pattern

>>> match (contains  "B"             ) "ABC"
["ABC"]
>>> match (contains ("B" *> pure "1")) "ABC"
["A1C"]

invert :: Pattern a -> Pattern () Source

(invert p) succeeds if p fails and fails if p succeeds

>>> match (invert "A") "A"
[]
>>> match (invert "A") "B"
[()]

once :: Pattern Char -> Pattern Text Source

Match a Char, but return Text

>>> match (once (char '1')) "1"
["1"]
>>> match (once (char '1')) ""
[]

star :: Pattern Char -> Pattern Text Source

Parse 0 or more occurrences of the given character

>>> match (star anyChar) "123"
["123"]
>>> match (star anyChar) ""
[""]

See also: chars

plus :: Pattern Char -> Pattern Text Source

Parse 1 or more occurrences of the given character

>>> match (plus digit) "123"
["123"]
>>> match (plus digit) ""
[]

See also: chars1

selfless :: Pattern a -> Pattern a Source

Patterns that match multiple times are greedy by default, meaning that they try to match as many times as possible. The selfless combinator makes a pattern match as few times as possible

This only changes the order in which solutions are returned, by prioritizing less greedy solutions

>>> match (prefix (selfless (some anyChar))) "123"
["1","12","123"]
>>> match (prefix           (some anyChar) ) "123"
["123","12","1"]

choice :: [Pattern a] -> Pattern a Source

Apply the patterns in the list in order, until one of them succeeds

>>> match (choice ["cat", "dog", "egg"]) "egg"
["egg"]
>>> match (choice ["cat", "dog", "egg"]) "cat"
["cat"]
>>> match (choice ["cat", "dog", "egg"]) "fan"
[]

count :: Int -> Pattern a -> Pattern [a] Source

Apply the given pattern a fixed number of times, collecting the results

>>> match (count 3 anyChar) "123"
["123"]
>>> match (count 4 anyChar) "123"
[]

lowerBounded :: Int -> Pattern a -> Pattern [a] Source

Apply the given pattern at least the given number of times, collecting the results

>>> match (lowerBounded 5 dot) "123"
[]
>>> match (lowerBounded 2 dot) "123"
["123"]

upperBounded :: Int -> Pattern a -> Pattern [a] Source

Apply the given pattern 0 or more times, up to a given bound, collecting the results

>>> match (upperBounded 5 dot) "123"
["123"]
>>> match (upperBounded 2 dot) "123"
[]
>>> match ((,) <$> upperBounded 2 dot <*> chars) "123"
[("12","3"),("1","23")]

bounded :: Int -> Int -> Pattern a -> Pattern [a] Source

Apply the given pattern a number of times restricted by given lower and upper bounds, collecting the results

>>> match (bounded 2 5 "cat") "catcatcat"
[["cat","cat","cat"]]
>>> match (bounded 2 5 "cat") "cat"
[]
>>> match (bounded 2 5 "cat") "catcatcatcatcatcat"
[]

bounded could be implemented naively as follows:

bounded m n p = do
  x <- choice (map pure [m..n])
  count x p

option :: Monoid a => Pattern a -> Pattern a Source

Transform a parser to a succeed with an empty value instead of failing

See also: optional

>>> match (option "1" <> "2") "12"
["12"]
>>> match (option "1" <> "2") "2"
["2"]

between :: Pattern a -> Pattern b -> Pattern c -> Pattern c Source

(between open close p) matches 'p' in between 'open' and 'close'

>>> match (between (char '(') (char ')') (star anyChar)) "(123)"
["123"]
>>> match (between (char '(') (char ')') (star anyChar)) "(123"
[]

skip :: Pattern a -> Pattern () Source

Discard the pattern's result

>>> match (skip anyChar) "1"
[()]
>>> match (skip anyChar) ""
[]

within :: Int -> Pattern a -> Pattern a Source

Restrict the pattern to consume no more than the given number of characters

>>> match (within 2 decimal) "12"
[12]
>>> match (within 2 decimal) "1"
[1]
>>> match (within 2 decimal) "123"
[]

fixed :: Int -> Pattern a -> Pattern a Source

Require the pattern to consume exactly the given number of characters

>>> match (fixed 2 decimal) "12"
[12]
>>> match (fixed 2 decimal) "1"
[]

sepBy :: Pattern a -> Pattern b -> Pattern [a] Source

p sepBy sep matches zero or more occurrences of p separated by sep

>>> match (decimal `sepBy` char ',') "1,2,3"
[[1,2,3]]
>>> match (decimal `sepBy` char ',') ""
[[]]

sepBy1 :: Pattern a -> Pattern b -> Pattern [a] Source

p sepBy1 sep matches one or more occurrences of p separated by sep

>>> match (decimal `sepBy1` ",") "1,2,3"
[[1,2,3]]
>>> match (decimal `sepBy1` ",") ""
[]

High-efficiency primitives

chars :: Pattern Text Source

Like star dot or star anyChar, except more efficient

chars1 :: Pattern Text Source

Like plus dot or plus anyChar, except more efficient