-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Tools for processing unstructured text data -- -- The lightweight and easy to use functions for text tokenizing and -- parsing. It aimed for parsing mostly unstructured data, but the -- structured formats may be parsed as well. It may be used in different -- sutiations, for DSL, tex markups or even for parsing simple grammars -- easier and sometimes faster than in case of usage mainstream parsing -- combinators or parser generators. See the README.markdown, examples -- and modules documentation for more. @package fuzzy-parse @version 0.1.2.0 module Data.Text.Fuzzy.Attoparsec.Day dayDMY :: Parser Day dayYMD :: Parser Day dayYYYYMMDD :: Parser Day dayDMonY :: Parser Day day :: Parser Day module Data.Text.Fuzzy.Attoparsec.Month fuzzyMonth :: Parser Int fuzzyMonthFromText :: Text -> Maybe Int -- | Dates fuzzy parsing. Supports a number of dates format and tries to -- recover the incomplete dates from text, with use of some reasonable -- assumptions. Does not support locales, i.e assums only English for -- dates yet. -- --
-- parseMaybeDay "01.01.1979" -- Just 1979-01-01 -- parseMaybeDay "01.01.01" -- Just 2001-01-01 -- parseMaybeDay "13/01/2019" -- Just 2019-01-13 -- parseMaybeDay "2019-12-1" -- Just 2019-12-01 -- parseMaybeDay "21-feb-79" -- Just 1979-02-21 -- parseMaybeDay "21-feb-01" -- Just 2001-02-21 -- parseMaybeDay "29feb04" -- Just 2004-02-29 -- parseMaybeDay "21feb28" -- Just 2028-02-21 --module Data.Text.Fuzzy.Dates -- | Tries to parse a date from the text. parseMaybeDay :: Text -> Maybe Day module Data.Text.Fuzzy.Section cutSectionBy :: (Text -> Bool) -> (Text -> Bool) -> [Text] -> [Text] cutSectionOn :: Text -> Text -> [Text] -> [Text] -- | The lightweight and multi-functional text tokenizer allowing different -- types of text tokenization depending on it's settings. -- -- It may be used in different sutiations, for DSL, text markups or even -- for parsing simple grammars easier and sometimes faster than in case -- of usage mainstream parsing combinators or parser generators. -- -- The primary goal of this package is to parse unstructured text data, -- however it may be used for parsing such data formats as CSV with ease. -- -- Currently it supports the following types of entities: atoms, string -- literals (currently with the minimal set of escaped characters), -- punctuation characters and delimeters. -- --
-- >>> tokenize (delims ":") "aaa : bebeb : qqq ::::" :: [Text] -- ["aaa "," bebeb "," qqq "] ---- --
-- >>> tokenize (delims ":"<>sq<>emptyFields ) "aaa : bebeb : qqq ::::" :: [Text] -- ["aaa "," bebeb "," qqq ","","","",""] ---- --
-- >>> > tokenize (delims ":"<>sq<>emptyFields ) "aaa : bebeb : qqq ::::" :: [Maybe Text] -- [Just "aaa ",Just " bebeb ",Just " qqq ",Nothing,Nothing,Nothing,Nothing] ---- --
-- >>> tokenize (delims ":"<>sq<>emptyFields ) "aaa : 'bebeb:colon inside' : qqq ::::" :: [Maybe Text] -- [Just "aaa ",Just " ",Just "bebeb:colon inside",Just " ",Just " qqq ",Nothing,Nothing,Nothing,Nothing] ---- --
-- >>> let spec = sl<>delims ":"<>sq<>emptyFields<>noslits -- -- >>> tokenize spec " aaa : 'bebeb:colon inside' : qqq ::::" :: [Maybe Text] -- [Just "aaa ",Just "bebeb:colon inside ",Just "qqq ",Nothing,Nothing,Nothing,Nothing] ---- --
-- >>> let spec = delims ":"<>sq<>emptyFields<>uw<>noslits -- -- >>> tokenize spec " a b c : 'bebeb:colon inside' : qqq ::::" :: [Maybe Text] -- [Just "a b c",Just "bebeb:colon inside",Just "qqq",Nothing,Nothing,Nothing,Nothing] ---- --
-- >>> let spec = delims " \t"<>punct ",;()" <>emptyFields<>sq
--
-- >>> tokenize spec "( delimeters , are , important, 'spaces are not');" :: [Text]
-- ["(","delimeters",",","are",",","important",",","spaces are not",")",";"]
--
--
-- -- >>> tokenize (delims ":"<>emptyFields<>sq ) "aaa:bebe:'qq' aaa:next::" :: [Maybe Text] -- [Just "aaa",Just "bebe",Just "qq",Just " aaa",Just "next",Nothing,Nothing] ---- -- look: "qq" and " aaa" are turned into two separate tokens that makes -- the result of CSV processing looks improper, like it has an -- extra-column. This behavior may be avoided using this option, if you -- don't need to distinguish text chunks and string literals: -- --
-- >>> tokenize (delims ":"<>emptyFields<>sq<>noslits) "aaa:bebe:'qq:foo' aaa:next::" :: [Maybe Text] -- [Just "aaa",Just "bebe",Just "qq:foo aaa",Just "next",Nothing,Nothing] --noslits :: TokenizeSpec -- | Strip spaces on left side of a token. Does not affect string literals, -- i.e string are processed normally. Useful mostly for processing -- CSV-like formats, otherwise delims may be used to skip unwanted -- spaces. sl :: TokenizeSpec -- | Strip spaces on right side of a token. Does not affect string -- literals, i.e string are processed normally. Useful mostly for -- processing CSV-like formats, otherwise delims may be used to -- skip unwanted spaces. sr :: TokenizeSpec -- | Strips spaces on right and left sides and transforms multiple spaces -- into the one. Name origins from unwords . words -- -- Does not affect string literals, i.e string are processed normally. -- Useful mostly for processing CSV-like formats, otherwise delims -- may be used to skip unwanted spaces. uw :: TokenizeSpec -- | Specify the list of delimers (characters) to split the character -- stream into fields. Useful for CSV-like separated formats. Support for -- empty fields in token stream may be enabled by addEmptyFields -- function delims :: String -> TokenizeSpec -- | Specify the line comment prefix. All text after the line comment -- prefix will be ignored until the newline character appearance. -- Multiple line comments are supported. comment :: Text -> TokenizeSpec -- | Specify the punctuation characters. Any punctuation character is -- handled as a separate token. Any token will be breaked on a -- punctiation character. -- -- Useful for handling ... er... punctuaton, like -- --
-- > function(a,b) ---- -- or -- --
-- > (apply function 1 2 3) ---- --
-- >>> tokenize spec "(apply function 1 2 3)" :: [Text]
-- ["(","apply","function","1","2","3",")"]
--
punct :: Text -> TokenizeSpec
-- | Enable identation support
indent :: TokenizeSpec
-- | Set tab expanding multiplier i.e. each tab extends into n spaces
-- before processing. It also turns on the indentation. Only the tabs at
-- the beginning of the string are expanded, i.e. before the first
-- non-space character appears.
itabstops :: Int -> TokenizeSpec
-- | Specify the keywords list. Each keyword will be threated as a separate
-- token.
keywords :: [Text] -> TokenizeSpec
-- | Turns on EOL token generation
eol :: TokenizeSpec
instance GHC.Show.Show Data.Text.Fuzzy.Tokenize.Token
instance GHC.Classes.Ord Data.Text.Fuzzy.Tokenize.Token
instance GHC.Classes.Eq Data.Text.Fuzzy.Tokenize.Token
instance GHC.Base.Monoid w => GHC.Base.Monad (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.State.Class.MonadState () (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.Writer.Class.MonadWriter w (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => Control.Monad.Reader.Class.MonadReader Data.Text.Fuzzy.Tokenize.TokenizeSpec (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Functor (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Base.Monoid w => GHC.Base.Applicative (Data.Text.Fuzzy.Tokenize.TokenizeM w)
instance GHC.Show.Show Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Classes.Ord Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Classes.Eq Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance Data.Text.Fuzzy.Tokenize.IsToken (GHC.Maybe.Maybe Data.Text.Internal.Text)
instance Data.Text.Fuzzy.Tokenize.IsToken Data.Text.Internal.Text
instance GHC.Base.Semigroup Data.Text.Fuzzy.Tokenize.TokenizeSpec
instance GHC.Base.Monoid Data.Text.Fuzzy.Tokenize.TokenizeSpec