-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Handling positions in text and position-tagging it.
--   
@package text-position
@version 0.1.0.0

module Data.Position

-- | Represents a position in a text. The intended usage is holding the
--   next available position in a file. In other words: If a character
--   would be appended to the file, what its position would be.
data Position
Position :: Int -> Int -> Int -> Position

-- | Line number, start counting from 1
line :: Position -> Int

-- | Column number, start counting from 1
column :: Position -> Int

-- | Character index (count of characters in the file so far), start
--   counting from 1
char :: Position -> Int

-- | Represents an advancement of the <i>next available position</i> marker
--   due to reading a character. For example, the letter A moves forward by
--   one column, while linefeed (<tt>'\n'</tt>) moves to the beginning of
--   the next line.
--   
--   The character type is a type parameter.
--   
--   An advance includes a pattern and a change. The pattern determines to
--   which characters, or character sequences, this advance applies. The
--   change determines how to advance the position in the pattern is
--   matched. It can also choose different advances depending on the match,
--   e.g. "move 1 column if matched "a" and move 4 columns if matched "t".
type Advance s = RE s (Position -> Position)

-- | A value with a position attached.
data Positioned a
Positioned :: a -> Position -> Positioned a

-- | Applicative regex (<a>Text.Regex.Applicative</a>) which takes
--   position-tagged symbols and returns a position-tagged result.
type PosRE s a = RE (Positioned s) (Positioned a)

-- | The position before the first character in a file, to be used as an
--   initial value before reading actual characters.
zeroPosition :: Position

-- | The position of the first character in a file.
firstPosition :: Position

-- | The zero advance. It doesn't match any input and doesn't consume any
--   characters. Applying it doesn't change the position.
emptyAdvance :: Advance s

-- | The default advance when reading a character, e.g. a letter or a
--   digit. The new character would have column number higher by 1, and
--   character index higher by once (advances by 1 for each character
--   read). The pattern accepts any single character.
defaultAdvance :: Advance s

-- | Create an advance for a single character based on a predicate.
psymAdvance :: (s -> Bool) -> (Position -> Position) -> Advance s

-- | Create an advance for the given character.
symAdvance :: Eq s => s -> (Position -> Position) -> Advance s

-- | Create an advance for a line character with the specified width. This
--   is mainly useful for tabs and perhaps the various space characters in
--   Unicode. Example for tab:
--   
--   <pre>
--   tabAdv = linecharAdvance '\t' 8
--   </pre>
linecharAdvance :: Eq s => s -> Int -> Advance s

-- | Create an advance for the given character sequence.
stringAdvance :: Eq s => [s] -> (Position -> Position) -> Advance s

-- | Create an advance for a character or sequence of characters expressing
--   a newline, i.e. starting a new line. As the advance expresses the
--   position <i>after</i> the character, applying the advance results with
--   a position at column 1.
newlineAdvance :: Eq s => [s] -> Advance s

-- | Create a set of common advances supporting tabs and newlines. More
--   advances can easily be added by <tt><a>|</a></tt>ing them to the
--   result. The result doesn't include the default advance.
commonAdvance :: Int -> Bool -> Bool -> Bool -> Bool -> Advance Char

-- | Concatenate two advances into a single advance accepting their
--   patterns in order, and applying the advances on top of each other. For
--   example, concatenating an advance for <tt><tt>a</tt></tt> and an
--   advance for <tt><tt>b</tt></tt> results with an advance accepting
--   <tt>"ab"</tt> and moving the position 2 columns forward.
(<++>) :: Advance s -> Advance s -> Advance s

-- | Given a list of remaining characters to read, the next position in the
--   file and a set of advance rules, try to consume characters once and
--   determine what is the next position after reading them. Example:
--   
--   <pre>
--   &gt;&gt;&gt; tryAdvance defaultAdvance (Position 1 1 1) "abc"
--   (Position 1 2 2,"bc")
--   </pre>
--   
--   If there is no match, it returns the input position and the input
--   list, i.e. no characters will be consumed.
tryAdvance :: Advance s -> Position -> [s] -> (Position, [s])

-- | Like <a>tryAdvance</a>, but reads one character at most. In the
--   general case you'll want to use <a>tryAdvance</a>, because
--   <a>tryAdvanceC</a> breaks chains. For example, while <a>tryAdvance</a>
--   can recognize <tt>"rn"</tt> as a single newline, <a>tryAdvanceC</a>
--   will consume only the <tt>'\r'</tt>, splitting the string into 2
--   newlines.
--   
--   If there is no match, the input position is returned.
tryAdvanceC :: Advance s -> Position -> s -> Position

-- | Given a list of remaining characters to read, the next position in the
--   file and a set of advance rules, consume characters once and determine
--   what is the next position after reading them.
--   
--   The <a>defaultAdvance</a> is appended (using <a>&lt;|&gt;</a>) to the
--   given advance. Therefore, if the given list isn't empty, at leat
--   character will be consumed. The intended use is to encode all the
--   special cases (tab, newlines, non-spacing marks, etc.) in the given
--   advance, and let the <a>defaultAdvance</a> catch the rest.
advance :: Advance s -> Position -> [s] -> (Position, [s])

-- | Like <a>advance</a>, but reads exactly one character. Patterns which
--   require more than one character fail to match. Like
--   <a>tryAdvanceC</a>, but has the <a>defaultAdvance</a> appended, which
--   means is always consumes given a non-empty list.
advanceC :: Advance s -> Position -> s -> Position

-- | Given the next position and a list matched there, annotate the symbols
--   with position information. For a single character, it is simply the
--   given position. For a sequence, this annotation assigns all the
--   symbols the same line and column, incrementing only the character
--   index.
--   
--   <pre>
--   &gt;&gt;&gt; defaultAnnotate (Position 1 1 1) "a"
--   [Positioned 'a' (Position 1 1 1)]
--   </pre>
--   
--   <pre>
--   &gt;&gt;&gt; defaultAnnotate (Position 1 1 1) "\r\n"
--   [Positioned '\r' (Position 1 1 1), Positioned '\n' (Position 1 1 2)]
--   </pre>
--   
--   The last example would give the same positions to any list of the same
--   length, e.g. <tt>"ab"</tt> instead of <tt>"rn"</tt>.
defaultAnnotate :: Position -> [s] -> [Positioned s]

-- | Given an advance rule, the next available position and a symbol list,
--   consume symbols once. Return a list of them, annotated with position
--   information, as well as the next position and the rest of the input.
--   On empty input, return <tt>[]</tt>, the given position and the input
--   list.
--   
--   If more than one character is matched, the sequence is annotated with
--   consecutive character indices, but with the same line and column.
--   
--   <pre>
--   &gt;&gt;&gt; enrichOnce (newlineAdvance "\r\n") (Position 1 1 1) "\r\nhello"
--   ( [ Positioned '\r' (Position 1 1 1)
--     , Positioned '\n' (Position 1 1 2)
--     ]
--   , Position 2 1 3
--   , "hello"
--   )
--   </pre>
enrichOnce :: Advance s -> Position -> [s] -> ([Positioned s], Position, [s])

-- | Given an advance rule, the next available position and a symbol list,
--   try to consume symbols once. If consumed, return a list of them,
--   annotated with position information, as well as the next position and
--   the rest of the input. Otherwise, return <tt>[]</tt>, the given
--   position and the input list.
--   
--   If more than one character is matched, the sequence is annotated using
--   the function passed as the first parameter.
--   
--   <pre>
--   &gt;&gt;&gt; let ann = defaultAnnotate; adv = empty
--   
--   &gt;&gt;&gt; enrichOnceD ann adv (newlineAdvance "\r\n") (Position 1 1 1) "\r\nhello"
--   ( [ Positioned '\r' (Position 1 1 1)
--     , Positioned '\n' (Position 1 1 2)
--     ]
--   , Position 2 1 3
--   , "hello"
--   )
--   </pre>
enrichOnceD :: (Position -> [s] -> [Positioned s]) -> Advance s -> Advance s -> Position -> [s] -> ([Positioned s], Position, [s])

-- | Given a list of symbols, annotate it with position based on advance
--   rules. Each symbol is annotated with its position in the text. In
--   addition to the annotated list, the next available position is
--   returned (i.e. the position of the next symbol, if another symbol were
--   appended to the list).
--   
--   <pre>
--   &gt;&gt;&gt; enrich defaultAdvance "abc"
--   ( [ Positioned 'a' (Position 1 1 1))
--     , Positioned 'b' (Position 1 2 2))
--     ]
--   , Position 1 3 3
--   )
--   </pre>
--   
--   It is implemented using the <a>defaultAdvance</a> as a default, i.e.
--   the entire list is always consumed.
enrich :: Advance s -> [s] -> ([Positioned s], Position)

-- | Like <a>enrich</a>, but takes an annotation function as the first
--   parameter, and a default advance as the second parameter. The rest of
--   the parameters are the same ones <a>enrich</a> takes. It allows using
--   custom defaults. To have no default advance, pass <a>empty</a>.
--   
--   Since a match of the whole list isn't guaranteed, there is an
--   additional list in the return type, containing the rest of the input.
--   If the entire input is matched, that list will be <tt>[]</tt>. If no
--   input is matched at all, the annotated list is <tt>[]</tt>, the
--   position is <a>firstPosition</a> and the additional list (rest of
--   input) is the input list.
enrichD :: (Position -> [s] -> [Positioned s]) -> Advance s -> Advance s -> [s] -> ([Positioned s], Position, [s])

-- | Given a regex, create an equivalent position-aware regex. The
--   resulting regex reads position-tagged symbols, and returns a
--   position-tagged result.
bless :: RE s a -> PosRE s a

-- | Tokenize an input list and get list of tokens. If there was an error
--   (no regex match), get the text position at which it happened.
tokens :: Advance s -> RE s a -> [s] -> ([Positioned a], Maybe (Positioned s))

-- | Get some numbers describing the given text (list of symbols):
--   
--   <ul>
--   <li>The total number of lines</li>
--   <li>The length (number of columns) of the last line</li>
--   <li>The total number of characters</li>
--   </ul>
--   
--   Note that this probably isn't the fastest implementation. It's
--   possible to compute directly by counting the lines and the characters.
--   This function is here anyway, as a demonstration of using this
--   library.
--   
--   <pre>
--   &gt;&gt;&gt; let adv = commonAdvance 4 True True True True
--   
--   &gt;&gt;&gt; textInfo adv "Hello world!\nHow are you?\nWonderful!"
--   (3,11,36)
--   </pre>
textInfo :: Advance s -> [s] -> (Int, Int, Int)