-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Handling positions in text and position-tagging it. -- @package text-position @version 0.1.0.0 module Data.Position -- | Represents a position in a text. The intended usage is holding the -- next available position in a file. In other words: If a character -- would be appended to the file, what its position would be. data Position Position :: Int -> Int -> Int -> Position -- | Line number, start counting from 1 line :: Position -> Int -- | Column number, start counting from 1 column :: Position -> Int -- | Character index (count of characters in the file so far), start -- counting from 1 char :: Position -> Int -- | Represents an advancement of the next available position marker -- due to reading a character. For example, the letter A moves forward by -- one column, while linefeed ('\n') moves to the beginning of -- the next line. -- -- The character type is a type parameter. -- -- An advance includes a pattern and a change. The pattern determines to -- which characters, or character sequences, this advance applies. The -- change determines how to advance the position in the pattern is -- matched. It can also choose different advances depending on the match, -- e.g. "move 1 column if matched "a" and move 4 columns if matched "t". type Advance s = RE s (Position -> Position) -- | A value with a position attached. data Positioned a Positioned :: a -> Position -> Positioned a -- | Applicative regex (Text.Regex.Applicative) which takes -- position-tagged symbols and returns a position-tagged result. type PosRE s a = RE (Positioned s) (Positioned a) -- | The position before the first character in a file, to be used as an -- initial value before reading actual characters. zeroPosition :: Position -- | The position of the first character in a file. firstPosition :: Position -- | The zero advance. It doesn't match any input and doesn't consume any -- characters. Applying it doesn't change the position. emptyAdvance :: Advance s -- | The default advance when reading a character, e.g. a letter or a -- digit. The new character would have column number higher by 1, and -- character index higher by once (advances by 1 for each character -- read). The pattern accepts any single character. defaultAdvance :: Advance s -- | Create an advance for a single character based on a predicate. psymAdvance :: (s -> Bool) -> (Position -> Position) -> Advance s -- | Create an advance for the given character. symAdvance :: Eq s => s -> (Position -> Position) -> Advance s -- | Create an advance for a line character with the specified width. This -- is mainly useful for tabs and perhaps the various space characters in -- Unicode. Example for tab: -- --
-- tabAdv = linecharAdvance '\t' 8 --linecharAdvance :: Eq s => s -> Int -> Advance s -- | Create an advance for the given character sequence. stringAdvance :: Eq s => [s] -> (Position -> Position) -> Advance s -- | Create an advance for a character or sequence of characters expressing -- a newline, i.e. starting a new line. As the advance expresses the -- position after the character, applying the advance results with -- a position at column 1. newlineAdvance :: Eq s => [s] -> Advance s -- | Create a set of common advances supporting tabs and newlines. More -- advances can easily be added by |ing them to the -- result. The result doesn't include the default advance. commonAdvance :: Int -> Bool -> Bool -> Bool -> Bool -> Advance Char -- | Concatenate two advances into a single advance accepting their -- patterns in order, and applying the advances on top of each other. For -- example, concatenating an advance for a and an -- advance for b results with an advance accepting -- "ab" and moving the position 2 columns forward. (<++>) :: Advance s -> Advance s -> Advance s -- | Given a list of remaining characters to read, the next position in the -- file and a set of advance rules, try to consume characters once and -- determine what is the next position after reading them. Example: -- --
-- >>> tryAdvance defaultAdvance (Position 1 1 1) "abc" -- (Position 1 2 2,"bc") ---- -- If there is no match, it returns the input position and the input -- list, i.e. no characters will be consumed. tryAdvance :: Advance s -> Position -> [s] -> (Position, [s]) -- | Like tryAdvance, but reads one character at most. In the -- general case you'll want to use tryAdvance, because -- tryAdvanceC breaks chains. For example, while tryAdvance -- can recognize "rn" as a single newline, tryAdvanceC -- will consume only the '\r', splitting the string into 2 -- newlines. -- -- If there is no match, the input position is returned. tryAdvanceC :: Advance s -> Position -> s -> Position -- | Given a list of remaining characters to read, the next position in the -- file and a set of advance rules, consume characters once and determine -- what is the next position after reading them. -- -- The defaultAdvance is appended (using <|>) to the -- given advance. Therefore, if the given list isn't empty, at leat -- character will be consumed. The intended use is to encode all the -- special cases (tab, newlines, non-spacing marks, etc.) in the given -- advance, and let the defaultAdvance catch the rest. advance :: Advance s -> Position -> [s] -> (Position, [s]) -- | Like advance, but reads exactly one character. Patterns which -- require more than one character fail to match. Like -- tryAdvanceC, but has the defaultAdvance appended, which -- means is always consumes given a non-empty list. advanceC :: Advance s -> Position -> s -> Position -- | Given the next position and a list matched there, annotate the symbols -- with position information. For a single character, it is simply the -- given position. For a sequence, this annotation assigns all the -- symbols the same line and column, incrementing only the character -- index. -- --
-- >>> defaultAnnotate (Position 1 1 1) "a" -- [Positioned 'a' (Position 1 1 1)] ---- --
-- >>> defaultAnnotate (Position 1 1 1) "\r\n" -- [Positioned '\r' (Position 1 1 1), Positioned '\n' (Position 1 1 2)] ---- -- The last example would give the same positions to any list of the same -- length, e.g. "ab" instead of "rn". defaultAnnotate :: Position -> [s] -> [Positioned s] -- | Given an advance rule, the next available position and a symbol list, -- consume symbols once. Return a list of them, annotated with position -- information, as well as the next position and the rest of the input. -- On empty input, return [], the given position and the input -- list. -- -- If more than one character is matched, the sequence is annotated with -- consecutive character indices, but with the same line and column. -- --
-- >>> enrichOnce (newlineAdvance "\r\n") (Position 1 1 1) "\r\nhello" -- ( [ Positioned '\r' (Position 1 1 1) -- , Positioned '\n' (Position 1 1 2) -- ] -- , Position 2 1 3 -- , "hello" -- ) --enrichOnce :: Advance s -> Position -> [s] -> ([Positioned s], Position, [s]) -- | Given an advance rule, the next available position and a symbol list, -- try to consume symbols once. If consumed, return a list of them, -- annotated with position information, as well as the next position and -- the rest of the input. Otherwise, return [], the given -- position and the input list. -- -- If more than one character is matched, the sequence is annotated using -- the function passed as the first parameter. -- --
-- >>> let ann = defaultAnnotate; adv = empty -- -- >>> enrichOnceD ann adv (newlineAdvance "\r\n") (Position 1 1 1) "\r\nhello" -- ( [ Positioned '\r' (Position 1 1 1) -- , Positioned '\n' (Position 1 1 2) -- ] -- , Position 2 1 3 -- , "hello" -- ) --enrichOnceD :: (Position -> [s] -> [Positioned s]) -> Advance s -> Advance s -> Position -> [s] -> ([Positioned s], Position, [s]) -- | Given a list of symbols, annotate it with position based on advance -- rules. Each symbol is annotated with its position in the text. In -- addition to the annotated list, the next available position is -- returned (i.e. the position of the next symbol, if another symbol were -- appended to the list). -- --
-- >>> enrich defaultAdvance "abc" -- ( [ Positioned 'a' (Position 1 1 1)) -- , Positioned 'b' (Position 1 2 2)) -- ] -- , Position 1 3 3 -- ) ---- -- It is implemented using the defaultAdvance as a default, i.e. -- the entire list is always consumed. enrich :: Advance s -> [s] -> ([Positioned s], Position) -- | Like enrich, but takes an annotation function as the first -- parameter, and a default advance as the second parameter. The rest of -- the parameters are the same ones enrich takes. It allows using -- custom defaults. To have no default advance, pass empty. -- -- Since a match of the whole list isn't guaranteed, there is an -- additional list in the return type, containing the rest of the input. -- If the entire input is matched, that list will be []. If no -- input is matched at all, the annotated list is [], the -- position is firstPosition and the additional list (rest of -- input) is the input list. enrichD :: (Position -> [s] -> [Positioned s]) -> Advance s -> Advance s -> [s] -> ([Positioned s], Position, [s]) -- | Given a regex, create an equivalent position-aware regex. The -- resulting regex reads position-tagged symbols, and returns a -- position-tagged result. bless :: RE s a -> PosRE s a -- | Tokenize an input list and get list of tokens. If there was an error -- (no regex match), get the text position at which it happened. tokens :: Advance s -> RE s a -> [s] -> ([Positioned a], Maybe (Positioned s)) -- | Get some numbers describing the given text (list of symbols): -- --
-- >>> let adv = commonAdvance 4 True True True True -- -- >>> textInfo adv "Hello world!\nHow are you?\nWonderful!" -- (3,11,36) --textInfo :: Advance s -> [s] -> (Int, Int, Int)