-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Tools for processing unstructured text data
--   
--   The lightweight and easy to use functions for text tokenizing and
--   parsing. It aimed for parsing mostly unstructured data, but the
--   structured formats may be parsed as well. It may be used in different
--   sutiations, for DSL, tex markups or even for parsing simple grammars
--   easier and sometimes faster than in case of usage mainstream parsing
--   combinators or parser generators. See the README.markdown, examples
--   and modules documentation for more.
@package fuzzy-parse
@version 0.1.2.0

module Data.Text.Fuzzy.Attoparsec.Day
dayDMY :: Parser Day
dayYMD :: Parser Day
dayYYYYMMDD :: Parser Day
dayDMonY :: Parser Day
day :: Parser Day

module Data.Text.Fuzzy.Attoparsec.Month
fuzzyMonth :: Parser Int
fuzzyMonthFromText :: Text -> Maybe Int


-- | Dates fuzzy parsing. Supports a number of dates format and tries to
--   recover the incomplete dates from text, with use of some reasonable
--   assumptions. Does not support locales, i.e assums only English for
--   dates yet.
--   
--   <h2>Examples</h2>
--   
--   <pre>
--   parseMaybeDay "01.01.1979"
--   Just 1979-01-01
--   parseMaybeDay "01.01.01"
--   Just 2001-01-01
--   parseMaybeDay "13/01/2019"
--   Just 2019-01-13
--   parseMaybeDay "2019-12-1"
--   Just 2019-12-01
--   parseMaybeDay "21-feb-79"
--   Just 1979-02-21
--   parseMaybeDay "21-feb-01"
--   Just 2001-02-21
--   parseMaybeDay "29feb04"
--   Just 2004-02-29
--   parseMaybeDay "21feb28"
--   Just 2028-02-21
--   </pre>
module Data.Text.Fuzzy.Dates

-- | Tries to parse a date from the text.
parseMaybeDay :: Text -> Maybe Day

module Data.Text.Fuzzy.Section
cutSectionBy :: (Text -> Bool) -> (Text -> Bool) -> [Text] -> [Text]
cutSectionOn :: Text -> Text -> [Text] -> [Text]


-- | The lightweight and multi-functional text tokenizer allowing different
--   types of text tokenization depending on it's settings.
--   
--   It may be used in different sutiations, for DSL, text markups or even
--   for parsing simple grammars easier and sometimes faster than in case
--   of usage mainstream parsing combinators or parser generators.
--   
--   The primary goal of this package is to parse unstructured text data,
--   however it may be used for parsing such data formats as CSV with ease.
--   
--   Currently it supports the following types of entities: atoms, string
--   literals (currently with the minimal set of escaped characters),
--   punctuation characters and delimeters.
--   
--   <h2>Examples</h2>
--   
--   <h3>Simple CSV-like tokenization</h3>
--   
--   <pre>
--   &gt;&gt;&gt; tokenize (delims ":") "aaa : bebeb : qqq ::::" :: [Text]
--   ["aaa "," bebeb "," qqq "]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; tokenize (delims ":"&lt;&gt;sq&lt;&gt;emptyFields ) "aaa : bebeb : qqq ::::" :: [Text]
--   ["aaa "," bebeb "," qqq ","","","",""]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; &gt; tokenize (delims ":"&lt;&gt;sq&lt;&gt;emptyFields ) "aaa : bebeb : qqq ::::" :: [Maybe Text]
--   [Just "aaa ",Just " bebeb ",Just " qqq ",Nothing,Nothing,Nothing,Nothing]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; tokenize (delims ":"&lt;&gt;sq&lt;&gt;emptyFields ) "aaa : 'bebeb:colon inside' : qqq ::::" :: [Maybe Text]
--   [Just "aaa ",Just " ",Just "bebeb:colon inside",Just " ",Just " qqq ",Nothing,Nothing,Nothing,Nothing]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; let spec = sl&lt;&gt;delims ":"&lt;&gt;sq&lt;&gt;emptyFields&lt;&gt;noslits
--   
--   &gt;&gt;&gt; tokenize spec "   aaa :   'bebeb:colon inside' : qqq ::::" :: [Maybe Text]
--   [Just "aaa ",Just "bebeb:colon inside ",Just "qqq ",Nothing,Nothing,Nothing,Nothing]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; let spec = delims ":"&lt;&gt;sq&lt;&gt;emptyFields&lt;&gt;uw&lt;&gt;noslits
--   
--   &gt;&gt;&gt; tokenize spec "  a  b  c  : 'bebeb:colon inside' : qqq ::::"  :: [Maybe Text]
--   [Just "a b c",Just "bebeb:colon inside",Just "qqq",Nothing,Nothing,Nothing,Nothing]
--   </pre>
--   
--   <h2>Notes</h2>
--   
--   <h3>About the delimeter tokens</h3>
--   
--   This type of tokens appears during a "delimited" formats processing
--   and disappears in results. Currenly you will never see it unless
--   normalization is turned off by <a>nn</a> option.
--   
--   The delimeters make sense in case of processing the CSV-like formats,
--   but in this case you probably need only values in results.
--   
--   This behavior may be changed later. But right now delimeters seem
--   pointless in results. If you process some sort of grammar where
--   delimeter character is important, you may use punctuation instead,
--   i.e:
--   
--   <pre>
--   &gt;&gt;&gt; let spec = delims " \t"&lt;&gt;punct ",;()" &lt;&gt;emptyFields&lt;&gt;sq
--   
--   &gt;&gt;&gt; tokenize spec "( delimeters , are , important, 'spaces are not');" :: [Text]
--   ["(","delimeters",",","are",",","important",",","spaces are not",")",";"]
--   </pre>
--   
--   <h2>Other</h2>
--   
--   For CSV-like formats it makes sense to split text to lines first,
--   otherwise newline characters may cause to weird results
module Data.Text.Fuzzy.Tokenize

-- | Tokenization settings. Use mempty for an empty value and construction
--   functions for changing the settings.
data TokenizeSpec

-- | Typeclass for token values. Note, that some tokens appear in results
--   only when <a>nn</a> option is set, i.e. sequences of characters turn
--   out to text tokens or string literals and delimeter tokens are just
--   removed from the results
class IsToken a

-- | Create a character token
mkChar :: IsToken a => Char -> a

-- | Create a string literal character token
mkSChar :: IsToken a => Char -> a

-- | Create a punctuation token
mkPunct :: IsToken a => Char -> a

-- | Create a text chunk token
mkText :: IsToken a => Text -> a

-- | Create a string literal token
mkStrLit :: IsToken a => Text -> a

-- | Create a keyword token
mkKeyword :: IsToken a => Text -> a

-- | Create an empty field token
mkEmpty :: IsToken a => a

-- | Create a delimeter token
mkDelim :: IsToken a => a

-- | Creates an indent token
mkIndent :: IsToken a => Int -> a

-- | Creates an EOL token
mkEol :: IsToken a => a

-- | Tokenize a text
tokenize :: IsToken a => TokenizeSpec -> Text -> [a]

-- | Turn on character escaping inside string literals. Currently the
--   following escaped characters are supported: [" ' t n r a b f v ]
esc :: TokenizeSpec

-- | Raise empty field tokens (note mkEmpty method) when no tokens found
--   before a delimeter. Useful for processing CSV-like data in order to
--   distingush empty columns
addEmptyFields :: TokenizeSpec

-- | same as addEmptyFields
emptyFields :: TokenizeSpec

-- | Turns off token normalization. Makes the tokenizer generate character
--   stream. Useful for debugging.
nn :: TokenizeSpec

-- | Turns on single-quoted string literals. Character stream after '\''
--   character will be proceesed as single-quoted stream, assuming all
--   delimeter, comment and other special characters as a part of the
--   string literal until the next unescaped single quote character.
sq :: TokenizeSpec

-- | Enable double-quoted string literals support as <a>sq</a> for
--   single-quoted strings.
sqq :: TokenizeSpec

-- | Disable separate string literals.
--   
--   Useful when processed delimeted data (csv-like formats). Normally,
--   sequential text chunks are concatenated together, but consequent text
--   and string literal will produce the two different tokens and it may
--   cause weird results if data is in csv-like format, i.e:
--   
--   <pre>
--   &gt;&gt;&gt; tokenize (delims ":"&lt;&gt;emptyFields&lt;&gt;sq ) "aaa:bebe:'qq' aaa:next::" :: [Maybe Text]
--   [Just "aaa",Just "bebe",Just "qq",Just " aaa",Just "next",Nothing,Nothing]
--   </pre>
--   
--   look: "qq" and " aaa" are turned into two separate tokens that makes
--   the result of CSV processing looks improper, like it has an
--   extra-column. This behavior may be avoided using this option, if you
--   don't need to distinguish text chunks and string literals:
--   
--   <pre>
--   &gt;&gt;&gt; tokenize (delims ":"&lt;&gt;emptyFields&lt;&gt;sq&lt;&gt;noslits) "aaa:bebe:'qq:foo' aaa:next::" :: [Maybe Text]
--   [Just "aaa",Just "bebe",Just "qq:foo aaa",Just "next",Nothing,Nothing]
--   </pre>
noslits :: TokenizeSpec

-- | Strip spaces on left side of a token. Does not affect string literals,
--   i.e string are processed normally. Useful mostly for processing
--   CSV-like formats, otherwise <a>delims</a> may be used to skip unwanted
--   spaces.
sl :: TokenizeSpec

-- | Strip spaces on right side of a token. Does not affect string
--   literals, i.e string are processed normally. Useful mostly for
--   processing CSV-like formats, otherwise <a>delims</a> may be used to
--   skip unwanted spaces.
sr :: TokenizeSpec

-- | Strips spaces on right and left sides and transforms multiple spaces
--   into the one. Name origins from unwords . words
--   
--   Does not affect string literals, i.e string are processed normally.
--   Useful mostly for processing CSV-like formats, otherwise <a>delims</a>
--   may be used to skip unwanted spaces.
uw :: TokenizeSpec

-- | Specify the list of delimers (characters) to split the character
--   stream into fields. Useful for CSV-like separated formats. Support for
--   empty fields in token stream may be enabled by <a>addEmptyFields</a>
--   function
delims :: String -> TokenizeSpec

-- | Specify the line comment prefix. All text after the line comment
--   prefix will be ignored until the newline character appearance.
--   Multiple line comments are supported.
comment :: Text -> TokenizeSpec

-- | Specify the punctuation characters. Any punctuation character is
--   handled as a separate token. Any token will be breaked on a
--   punctiation character.
--   
--   Useful for handling ... er... punctuaton, like
--   
--   <pre>
--   &gt; function(a,b)
--   </pre>
--   
--   or
--   
--   <pre>
--   &gt; (apply function 1 2 3)
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; tokenize spec "(apply function 1 2 3)" :: [Text]
--   ["(","apply","function","1","2","3",")"]
--   </pre>
punct :: Text -> TokenizeSpec

-- | Enable identation support
indent :: TokenizeSpec

-- | Set tab expanding multiplier i.e. each tab extends into n spaces
--   before processing. It also turns on the indentation. Only the tabs at
--   the beginning of the string are expanded, i.e. before the first
--   non-space character appears.
itabstops :: Int -> TokenizeSpec

-- | Specify the keywords list. Each keyword will be threated as a separate
--   token.
keywords :: [Text] -> TokenizeSpec

-- | Turns on EOL token generation
eol :: TokenizeSpec
instance GHC.Show.Show Data.Text.Fuzzy.Tokenize.Token
instance GHC.Classes.Ord Data.Text.Fuzzy.Tokenize.Token
instance GHC.Classes.Eq Data.Text.Fuzzy.Tokenize.Token
instance GHC.Base.Monoid w => GHC.Base.Monad (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.State.Class.MonadState () (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.Writer.Class.MonadWriter w (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.Reader.Class.MonadReader Data.Text.Fuzzy.Tokenize.TokenizeSpec (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Functor (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => GHC.Base.Applicative (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Show.Show Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Classes.Ord Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Classes.Eq Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance Data.Text.Fuzzy.Tokenize.IsToken (GHC.Maybe.Maybe Data.Text.Internal.Text)
instance Data.Text.Fuzzy.Tokenize.IsToken Data.Text.Internal.Text
instance GHC.Base.Semigroup Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Base.Monoid Data.Text.Fuzzy.Tokenize.TokenizeSpec