module Conllu.Parse
( Parser
, parseConlluWith
, parseConllu
, ParserC(ParserC)
, parserC
, rawSents
, sentence
, comment
, word
, emptyField
, idW
, form
, lemma
, upos
, xpos
, feats
, deprel
, deps
, misc
, commentPair
, listPair
, stringNot
, stringWOSpaces
, stringWSpaces
, keyValue
, maybeEmpty
, orEmpty
, listP )
where
import Conllu.Type
import qualified Conllu.DeprelTagset as D
import qualified Conllu.UposTagset as U
import Control.Monad (void, liftM2)
import Data.Either
import Data.Maybe
import Data.Void (Void)
import qualified Text.Megaparsec as TM
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = TM.Parsec Void String
type RawData t e = [Either (TM.ParseError t e) Sent]
type DEPREL = Maybe (D.EP, Maybe String)
rawSents :: Parser (RawData String Void)
rawSents :: Parser (RawData String Void)
rawSents = Parser Sent -> Parser (RawData String Void)
rawSentsC Parser Sent
sentence
rawSentsC :: Parser Sent -> Parser (RawData String Void)
rawSentsC :: Parser Sent -> Parser (RawData String Void)
rawSentsC Parser Sent
sent = ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> Parser (RawData String Void)
-> Parser (RawData String Void)
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
TM.between ParsecT Void String Identity ()
ws ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
TM.eof (ParsecT Void String Identity (Either (ParseError String Void) Sent)
-> ParsecT Void String Identity () -> Parser (RawData String Void)
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
TM.endBy1 ParsecT Void String Identity (Either (ParseError String Void) Sent)
e ParsecT Void String Identity ()
lineFeed)
where
e :: ParsecT Void String Identity (Either (ParseError String Void) Sent)
e = (ParseError String Void
-> ParsecT
Void String Identity (Either (ParseError String Void) Sent))
-> ParsecT
Void String Identity (Either (ParseError String Void) Sent)
-> ParsecT
Void String Identity (Either (ParseError String Void) Sent)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
(ParseError s e -> m a) -> m a -> m a
TM.withRecovery ParseError String Void
-> ParsecT
Void String Identity (Either (ParseError String Void) Sent)
forall a b. a -> ParsecT Void String Identity (Either a b)
recover (Sent -> Either (ParseError String Void) Sent
forall a b. b -> Either a b
Right (Sent -> Either (ParseError String Void) Sent)
-> Parser Sent
-> ParsecT
Void String Identity (Either (ParseError String Void) Sent)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Sent
sent)
recover :: a -> ParsecT Void String Identity (Either a b)
recover a
err =
a -> Either a b
forall a b. a -> Either a b
Left a
err Either a b
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity (Either a b)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$
ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m end
TM.skipManyTill ParsecT Void String Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
TM.anySingle
(ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
TM.try (ParsecT Void String Identity ()
-> ParsecT Void String Identity ())
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity ()
lineFeed ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
TM.lookAhead ParsecT Void String Identity ()
lineFeed)
lineFeed :: Parser ()
lineFeed :: ParsecT Void String Identity ()
lineFeed = ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall a. Parser a -> Parser a
lexeme (ParsecT Void String Identity ()
-> ParsecT Void String Identity ())
-> (ParsecT Void String Identity Char
-> ParsecT Void String Identity ())
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity Char
-> ParsecT Void String Identity ())
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
newline
sentence :: Parser Sent
sentence :: Parser Sent
sentence = Parser Comment -> Parser (CW AW) -> Parser Sent
sentenceC Parser Comment
comment Parser (CW AW)
word
sentenceC :: Parser Comment -> Parser (CW AW) -> Parser Sent
sentenceC :: Parser Comment -> Parser (CW AW) -> Parser Sent
sentenceC Parser Comment
c Parser (CW AW)
t = ([Comment] -> [CW AW] -> Sent)
-> ParsecT Void String Identity [Comment]
-> ParsecT Void String Identity [CW AW]
-> Parser Sent
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 [Comment] -> [CW AW] -> Sent
Sent (Parser Comment -> ParsecT Void String Identity [Comment]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.many Parser Comment
c) (Parser (CW AW) -> ParsecT Void String Identity [CW AW]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some Parser (CW AW)
t)
comment :: Parser Comment
=
(String -> Parser String
symbol String
"#" Parser String -> String -> Parser String
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"comment starter") Parser String -> Parser Comment -> Parser Comment
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Comment
commentPair Parser Comment -> ParsecT Void String Identity () -> Parser Comment
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
ParsecT Void String Identity ()
lineFeed Parser Comment -> String -> Parser Comment
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"comment content"
word :: Parser (CW AW)
word :: Parser (CW AW)
word =
Parser ID
-> Parser FORM
-> Parser FORM
-> Parser UPOS
-> Parser FORM
-> Parser FEATS
-> Parser DEPREL
-> Parser DEPS
-> Parser FORM
-> Parser (CW AW)
wordC Parser ID
idW Parser FORM
form Parser FORM
lemma Parser UPOS
upos Parser FORM
xpos Parser FEATS
feats Parser DEPREL
deprel Parser DEPS
deps Parser FORM
misc
wordC ::
Parser ID
-> Parser FORM
-> Parser LEMMA
-> Parser UPOS
-> Parser XPOS
-> Parser FEATS
-> Parser DEPREL
-> Parser DEPS
-> Parser MISC
-> Parser (CW AW)
wordC :: Parser ID
-> Parser FORM
-> Parser FORM
-> Parser UPOS
-> Parser FORM
-> Parser FEATS
-> Parser DEPREL
-> Parser DEPS
-> Parser FORM
-> Parser (CW AW)
wordC Parser ID
ixp Parser FORM
fop Parser FORM
lp Parser UPOS
upp Parser FORM
xpp Parser FEATS
fsp Parser DEPREL
drp Parser DEPS
dsp Parser FORM
mp = do
ID
i <- Parser ID
ixp Parser ID -> ParsecT Void String Identity Char -> Parser ID
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
FORM
mf <- Parser FORM
fop Parser FORM -> ParsecT Void String Identity Char -> Parser FORM
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
FORM
ml <- Parser FORM
lp Parser FORM -> ParsecT Void String Identity Char -> Parser FORM
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
UPOS
mup <- Parser UPOS
upp Parser UPOS -> ParsecT Void String Identity Char -> Parser UPOS
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
FORM
mxp <- Parser FORM
xpp Parser FORM -> ParsecT Void String Identity Char -> Parser FORM
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
FEATS
mfs <- Parser FEATS
fsp Parser FEATS -> ParsecT Void String Identity Char -> Parser FEATS
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
Maybe ID
mdh <- ParsecT Void String Identity (Maybe ID)
dhp ParsecT Void String Identity (Maybe ID)
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity (Maybe ID)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
DEPREL
mdr <- Parser DEPREL
drp Parser DEPREL -> ParsecT Void String Identity Char -> Parser DEPREL
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
DEPS
ds <- Parser DEPS
dsp Parser DEPS -> ParsecT Void String Identity Char -> Parser DEPS
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab
FORM
mm <- Parser FORM
mp Parser FORM -> ParsecT Void String Identity () -> Parser FORM
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity ()
lineFeed
CW AW -> Parser (CW AW)
forall (m :: * -> *) a. Monad m => a -> m a
return (CW AW -> Parser (CW AW)) -> CW AW -> Parser (CW AW)
forall a b. (a -> b) -> a -> b
$ ID
-> FORM
-> FORM
-> UPOS
-> FORM
-> FEATS
-> Maybe Rel
-> DEPS
-> FORM
-> CW AW
mkAW ID
i FORM
mf FORM
ml UPOS
mup FORM
mxp FEATS
mfs (Maybe ID -> DEPREL -> Maybe Rel
rel Maybe ID
mdh DEPREL
mdr) DEPS
ds FORM
mm
where
dhp :: ParsecT Void String Identity (Maybe ID)
dhp = Parser ID -> ParsecT Void String Identity (Maybe ID)
forall a. Parser a -> Parser (Maybe a)
maybeEmpty Parser ID
ixp ParsecT Void String Identity (Maybe ID)
-> String -> ParsecT Void String Identity (Maybe ID)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"HEAD"
rel :: Maybe ID -> DEPREL -> Maybe Rel
rel :: Maybe ID -> DEPREL -> Maybe Rel
rel Maybe ID
mdh DEPREL
mdr = do
ID
dh <- Maybe ID
mdh
(EP
dr, FORM
sdr) <- DEPREL
mdr
Rel -> Maybe Rel
forall (m :: * -> *) a. Monad m => a -> m a
return (Rel -> Maybe Rel) -> Rel -> Maybe Rel
forall a b. (a -> b) -> a -> b
$ ID -> EP -> FORM -> Maybe [String] -> Rel
Rel ID
dh EP
dr FORM
sdr Maybe [String]
forall a. Maybe a
Nothing
emptyField :: Parser (Maybe a)
emptyField :: Parser (Maybe a)
emptyField = String -> Parser String
symbol String
"_" Parser String -> Parser (Maybe a) -> Parser (Maybe a)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Maybe a -> Parser (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing Parser (Maybe a) -> String -> Parser (Maybe a)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"empty field (_)"
idW :: Parser ID
idW :: Parser ID
idW = do
Index
ix <- Parser Index
index
Maybe (Either Char Char, Index)
mix <- ParsecT Void String Identity (Either Char Char, Index)
-> ParsecT Void String Identity (Maybe (Either Char Char, Index))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
TM.optional ParsecT Void String Identity (Either Char Char, Index)
metaIndex ParsecT Void String Identity (Maybe (Either Char Char, Index))
-> String
-> ParsecT Void String Identity (Maybe (Either Char Char, Index))
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"meta token ID"
ID -> Parser ID
forall (m :: * -> *) a. Monad m => a -> m a
return (ID -> Parser ID) -> ID -> Parser ID
forall a b. (a -> b) -> a -> b
$
case Maybe (Either Char Char, Index)
mix of
Maybe (Either Char Char, Index)
Nothing -> Index -> ID
SID Index
ix
Just (Left Char
_, Index
eix) -> Index -> Index -> ID
MID Index
ix Index
eix
Just (Right Char
_, Index
eix) -> Index -> Index -> ID
EID Index
ix Index
eix
where
index :: Parser Index
index :: Parser Index
index = do
String
ix <- ParsecT Void String Identity Char -> Parser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
digitChar Parser String -> String -> Parser String
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"ID"
Index -> Parser Index
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Index
forall a. Read a => String -> a
read String
ix :: Int)
indexSep :: Parser (Either IxSep IxSep)
indexSep :: Parser (Either Char Char)
indexSep = ParsecT Void String Identity Char
-> ParsecT Void String Identity Char -> Parser (Either Char Char)
forall (m :: * -> *) a b.
Alternative m =>
m a -> m b -> m (Either a b)
TM.eitherP (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token String
'-') (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token String
'.') Parser (Either Char Char) -> String -> Parser (Either Char Char)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"meta separator"
metaIndex :: Parser (Either IxSep IxSep, Index)
metaIndex :: ParsecT Void String Identity (Either Char Char, Index)
metaIndex = do
Either Char Char
sep <- Parser (Either Char Char)
indexSep
Index
ix <- Parser Index
index
(Either Char Char, Index)
-> ParsecT Void String Identity (Either Char Char, Index)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char Char
sep, Index
ix)
form :: Parser FORM
form :: Parser FORM
form = Parser String -> Parser FORM
orEmpty Parser String
stringWSpaces Parser FORM -> String -> Parser FORM
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"FORM"
lemma :: Parser LEMMA
lemma :: Parser FORM
lemma = Parser String -> Parser FORM
orEmpty Parser String
stringWSpaces Parser FORM -> String -> Parser FORM
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"LEMMA"
upos :: Parser UPOS
upos :: Parser UPOS
upos = Parser POS -> Parser UPOS
forall a. Parser a -> Parser (Maybe a)
maybeEmpty Parser POS
upos'
where
upos' :: Parser U.POS
upos' :: Parser POS
upos' = (String -> POS) -> Parser String -> Parser POS
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> POS
mkUPOS (Parser String -> Parser POS) -> Parser String -> Parser POS
forall a b. (a -> b) -> a -> b
$ [Parser String] -> Parser String
forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
TM.choice ([Parser String] -> Parser String)
-> [Parser String] -> Parser String
forall a b. (a -> b) -> a -> b
$ (POS -> Parser String) -> [POS] -> [Parser String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> Parser String
forall e s (m :: * -> *).
(MonadParsec e s m, FoldCase (Tokens s)) =>
Tokens s -> m (Tokens s)
string' (String -> Parser String)
-> (POS -> String) -> POS -> Parser String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. POS -> String
forall a. Show a => a -> String
show) [POS
U.ADJ .. POS
U.X]
xpos :: Parser XPOS
xpos :: Parser FORM
xpos = Parser String -> Parser FORM
forall a. Parser a -> Parser (Maybe a)
maybeEmpty Parser String
stringWOSpaces Parser FORM -> String -> Parser FORM
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"XPOS"
feats :: Parser FEATS
feats :: Parser FEATS
feats = Parser FEATS -> Parser FEATS
forall a. Parser [a] -> Parser [a]
listP (ParsecT Void String Identity Feat
feat ParsecT Void String Identity Feat -> Parser String -> Parser FEATS
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`TM.sepBy` String -> Parser String
symbol String
"|" Parser FEATS -> String -> Parser FEATS
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"FEATS")
where
feat :: ParsecT Void String Identity Feat
feat = do
String
k <- Parser String -> Parser String
forall a. Parser a -> Parser a
lexeme (ParsecT Void String Identity Char -> Parser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
alphaNumChar Parser String -> String -> Parser String
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"feature key")
FORM
ft <-
Parser String -> Parser FORM
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
TM.optional (Parser String -> Parser FORM) -> Parser String -> Parser FORM
forall a b. (a -> b) -> a -> b
$
Parser String -> Parser String -> Parser String -> Parser String
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
TM.between (String -> Parser String
symbol String
"[") (String -> Parser String
symbol String
"]") (ParsecT Void String Identity Char -> Parser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
alphaNumChar)
String
_ <- String -> Parser String
symbol String
"="
[String]
vs <- Parser String
fvalue Parser String
-> Parser String -> ParsecT Void String Identity [String]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`TM.sepBy1` String -> Parser String
symbol String
","
Feat -> ParsecT Void String Identity Feat
forall (m :: * -> *) a. Monad m => a -> m a
return (Feat -> ParsecT Void String Identity Feat)
-> Feat -> ParsecT Void String Identity Feat
forall a b. (a -> b) -> a -> b
$ String -> [String] -> FORM -> Feat
Feat String
k [String]
vs FORM
ft
fvalue :: Parser String
fvalue = Parser String -> Parser String
forall a. Parser a -> Parser a
lexeme (ParsecT Void String Identity Char -> Parser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
alphaNumChar Parser String -> String -> Parser String
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"feature value")
deprel :: Parser DEPREL
deprel :: Parser DEPREL
deprel = Parser (EP, FORM) -> Parser DEPREL
forall a. Parser a -> Parser (Maybe a)
maybeEmpty Parser (EP, FORM)
deprel'
dep :: Parser D.EP
dep :: Parser EP
dep = (String -> EP) -> Parser String -> Parser EP
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> EP
mkDEP (Parser String -> Parser EP) -> Parser String -> Parser EP
forall a b. (a -> b) -> a -> b
$ [Parser String] -> Parser String
forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
TM.choice ([Parser String] -> Parser String)
-> [Parser String] -> Parser String
forall a b. (a -> b) -> a -> b
$ (EP -> Parser String) -> [EP] -> [Parser String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> Parser String
forall e s (m :: * -> *).
(MonadParsec e s m, FoldCase (Tokens s)) =>
Tokens s -> m (Tokens s)
string' (String -> Parser String) -> (EP -> String) -> EP -> Parser String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. EP -> String
forall a. Show a => a -> String
show) [EP
D.ACL .. EP
D.XCOMP]
deprel' :: Parser (D.EP, Maybe String)
deprel' :: Parser (EP, FORM)
deprel' = (EP -> FORM -> (EP, FORM))
-> Parser EP -> Parser FORM -> Parser (EP, FORM)
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 (,) Parser EP
dep Parser FORM
subdeprel
where
subdeprel :: Parser (Maybe String)
subdeprel :: Parser FORM
subdeprel = Parser String -> Parser FORM
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
TM.optional (String -> Parser String
symbol String
":" Parser String -> Parser String -> Parser String
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser String
letters Parser String -> String -> Parser String
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"DEPREL subtype")
deps :: Parser DEPS
deps :: Parser DEPS
deps = Parser DEPS -> Parser DEPS
forall a. Parser [a] -> Parser [a]
listP (ParsecT Void String Identity Rel
eDep ParsecT Void String Identity Rel -> Parser String -> Parser DEPS
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`TM.sepBy` String -> Parser String
symbol String
"|" Parser DEPS -> String -> Parser DEPS
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"DEPS")
where
eDep :: ParsecT Void String Identity Rel
eDep = do
ID
h <- Parser ID
idW Parser ID -> String -> Parser ID
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"enhanced dependency HEAD"
String
_ <- Parser String
sep
EP
d <- Parser EP
dep Parser EP -> String -> Parser EP
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"enhanced dependency DEPREL"
Maybe [String]
restI <-
ParsecT Void String Identity [String]
-> ParsecT Void String Identity (Maybe [String])
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
TM.optional
(Parser String
sep Parser String
-> ParsecT Void String Identity [String]
-> ParsecT Void String Identity [String]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
String -> Parser String
stringNot String
"\t| :" Parser String
-> Parser String -> ParsecT Void String Identity [String]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`TM.sepBy` Parser String
sep ParsecT Void String Identity [String]
-> String -> ParsecT Void String Identity [String]
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?>
String
"enhanced dependency information")
Rel -> ParsecT Void String Identity Rel
forall (m :: * -> *) a. Monad m => a -> m a
return (Rel -> ParsecT Void String Identity Rel)
-> Rel -> ParsecT Void String Identity Rel
forall a b. (a -> b) -> a -> b
$ ID -> EP -> FORM -> Maybe [String] -> Rel
Rel ID
h EP
d FORM
forall a. Maybe a
Nothing Maybe [String]
restI
sep :: Parser String
sep = String -> Parser String
symbol String
":"
misc :: Parser MISC
misc :: Parser FORM
misc = Parser String -> Parser FORM
orEmpty Parser String
stringWSpaces Parser FORM -> String -> Parser FORM
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
TM.<?> String
"MISC"
commentPair :: Parser Comment
=
String -> Parser String -> Parser String -> Parser Comment
forall a b. String -> Parser a -> Parser b -> Parser (a, b)
keyValue String
"=" (String -> Parser String
stringNot String
"=\n\t") (String -> Parser String -> Parser String
forall (m :: * -> *) a. Alternative m => a -> m a -> m a
TM.option String
"" Parser String
stringWSpaces)
listPair :: String -> Parser a -> Parser b -> Parser [(a, b)]
listPair :: String -> Parser a -> Parser b -> Parser [(a, b)]
listPair String
sep Parser a
p Parser b
q = String -> Parser a -> Parser b -> Parser (a, b)
forall a b. String -> Parser a -> Parser b -> Parser (a, b)
keyValue String
sep Parser a
p Parser b
q Parser (a, b) -> Parser String -> Parser [(a, b)]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`TM.sepBy1` String -> Parser String
symbol String
"|"
stringNot :: String -> Parser String
stringNot :: String -> Parser String
stringNot String
s = Parser String -> Parser String
forall a. Parser a -> Parser a
lexeme (Parser String -> Parser String) -> Parser String -> Parser String
forall a b. (a -> b) -> a -> b
$ FORM
-> (Token String -> Bool)
-> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
FORM -> (Token s -> Bool) -> m (Tokens s)
TM.takeWhile1P FORM
forall a. Maybe a
Nothing (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
s)
stringWOSpaces :: Parser String
stringWOSpaces :: Parser String
stringWOSpaces = String -> Parser String
stringNot String
" \t\n"
stringWSpaces :: Parser String
stringWSpaces :: Parser String
stringWSpaces = String -> Parser String
stringNot String
"\t\n"
letters :: Parser String
letters :: Parser String
letters = Parser String -> Parser String
forall a. Parser a -> Parser a
lexeme (Parser String -> Parser String) -> Parser String -> Parser String
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity Char -> Parser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
TM.some ParsecT Void String Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
letterChar
keyValue :: String -> Parser a -> Parser b -> Parser (a, b)
keyValue :: String -> Parser a -> Parser b -> Parser (a, b)
keyValue String
sep Parser a
p Parser b
q = do
a
key <- Parser a
p
FORM
_ <- Parser String -> Parser FORM
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
TM.optional (Parser String -> Parser FORM) -> Parser String -> Parser FORM
forall a b. (a -> b) -> a -> b
$ String -> Parser String
symbol String
sep
b
value <- Parser b
q
(a, b) -> Parser (a, b)
forall (m :: * -> *) a. Monad m => a -> m a
return (a
key, b
value)
maybeEmpty :: Parser a -> Parser (Maybe a)
maybeEmpty :: Parser a -> Parser (Maybe a)
maybeEmpty Parser a
p = Parser (Maybe a)
forall a. Parser (Maybe a)
emptyField Parser (Maybe a) -> Parser (Maybe a) -> Parser (Maybe a)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
TM.<|> (a -> Maybe a) -> Parser a -> Parser (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> Maybe a
forall a. a -> Maybe a
Just Parser a
p
orEmpty :: Parser String -> Parser (Maybe String)
orEmpty :: Parser String -> Parser FORM
orEmpty Parser String
p = do
String
r <- Parser String
p
case String
r of
String
"_" -> FORM -> Parser FORM
forall (m :: * -> *) a. Monad m => a -> m a
return FORM
forall a. Maybe a
Nothing
String
_ -> FORM -> Parser FORM
forall (m :: * -> *) a. Monad m => a -> m a
return (FORM -> Parser FORM) -> FORM -> Parser FORM
forall a b. (a -> b) -> a -> b
$ String -> FORM
forall a. a -> Maybe a
Just String
r
listP :: Parser [a] -> Parser [a]
listP :: Parser [a] -> Parser [a]
listP Parser [a]
p = [a] -> Maybe [a] -> [a]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [a] -> [a])
-> ParsecT Void String Identity (Maybe [a]) -> Parser [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [a] -> ParsecT Void String Identity (Maybe [a])
forall a. Parser a -> Parser (Maybe a)
maybeEmpty Parser [a]
p
symbol :: String -> Parser String
symbol :: String -> Parser String
symbol = ParsecT Void String Identity ()
-> Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol ParsecT Void String Identity ()
ws
lexeme :: Parser a -> Parser a
lexeme :: Parser a -> Parser a
lexeme = ParsecT Void String Identity () -> Parser a -> Parser a
forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
L.lexeme ParsecT Void String Identity ()
ws
ws :: Parser ()
ws :: ParsecT Void String Identity ()
ws = Parser String -> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser String -> ParsecT Void String Identity ())
-> Parser String -> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ FORM
-> (Token String -> Bool)
-> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
FORM -> (Token s -> Bool) -> m (Tokens s)
TM.takeWhileP (String -> FORM
forall a. a -> Maybe a
Just String
"space") (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ')
data ParserC = ParserC
{ :: Parser Comment
, ParserC -> Parser ID
_idP :: Parser ID
, ParserC -> Parser FORM
_formP :: Parser FORM
, ParserC -> Parser FORM
_lemmaP :: Parser LEMMA
, ParserC -> Parser UPOS
_upostagP :: Parser UPOS
, ParserC -> Parser FORM
_xpostagP :: Parser XPOS
, ParserC -> Parser FEATS
_featsP :: Parser FEATS
, ParserC -> Parser DEPREL
_deprelP :: Parser DEPREL
, ParserC -> Parser DEPS
_depsP :: Parser DEPS
, ParserC -> Parser FORM
_miscP :: Parser MISC
} deriving ()
customC :: ParserC
customC :: ParserC
customC = ParserC :: Parser Comment
-> Parser ID
-> Parser FORM
-> Parser FORM
-> Parser UPOS
-> Parser FORM
-> Parser FEATS
-> Parser DEPREL
-> Parser DEPS
-> Parser FORM
-> ParserC
ParserC
{ $sel:_commentP:ParserC :: Parser Comment
_commentP = Parser Comment
comment
, $sel:_idP:ParserC :: Parser ID
_idP = Parser ID
idW
, $sel:_formP:ParserC :: Parser FORM
_formP = Parser FORM
form
, $sel:_lemmaP:ParserC :: Parser FORM
_lemmaP = Parser FORM
lemma
, $sel:_upostagP:ParserC :: Parser UPOS
_upostagP = Parser UPOS
upos
, $sel:_xpostagP:ParserC :: Parser FORM
_xpostagP = Parser FORM
xpos
, $sel:_featsP:ParserC :: Parser FEATS
_featsP = Parser FEATS
feats
, $sel:_deprelP:ParserC :: Parser DEPREL
_deprelP = Parser DEPREL
deprel
, $sel:_depsP:ParserC :: Parser DEPS
_depsP = Parser DEPS
deps
, $sel:_miscP:ParserC :: Parser FORM
_miscP = Parser FORM
misc
}
parserC :: ParserC -> Parser Sent
parserC :: ParserC -> Parser Sent
parserC ParserC
p =
let i :: Parser ID
i = ParserC -> Parser ID
_idP ParserC
p
f :: Parser FORM
f = ParserC -> Parser FORM
_formP ParserC
p
l :: Parser FORM
l = ParserC -> Parser FORM
_lemmaP ParserC
p
up :: Parser UPOS
up = ParserC -> Parser UPOS
_upostagP ParserC
p
xp :: Parser FORM
xp = ParserC -> Parser FORM
_xpostagP ParserC
p
fs :: Parser FEATS
fs = ParserC -> Parser FEATS
_featsP ParserC
p
dr :: Parser DEPREL
dr = ParserC -> Parser DEPREL
_deprelP ParserC
p
ds :: Parser DEPS
ds = ParserC -> Parser DEPS
_depsP ParserC
p
m :: Parser FORM
m = ParserC -> Parser FORM
_miscP ParserC
p
c :: Parser Comment
c = ParserC -> Parser Comment
_commentP ParserC
p
w :: Parser (CW AW)
w = Parser ID
-> Parser FORM
-> Parser FORM
-> Parser UPOS
-> Parser FORM
-> Parser FEATS
-> Parser DEPREL
-> Parser DEPS
-> Parser FORM
-> Parser (CW AW)
wordC Parser ID
i Parser FORM
f Parser FORM
l Parser UPOS
up Parser FORM
xp Parser FEATS
fs Parser DEPREL
dr Parser DEPS
ds Parser FORM
m
s :: Parser Sent
s = Parser Comment -> Parser (CW AW) -> Parser Sent
sentenceC Parser Comment
c Parser (CW AW)
w
in Parser Sent
s
parseConlluWith
:: Parser Sent
-> FilePath
-> String
-> Either String Doc
parseConlluWith :: Parser Sent -> String -> String -> Either String Doc
parseConlluWith Parser Sent
p String
fp String
s =
case Parser (RawData String Void)
-> String
-> String
-> Either (ParseErrorBundle String Void) (RawData String Void)
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
TM.parse Parser (RawData String Void)
doc String
fp String
s of
Left ParseErrorBundle String Void
err -> String -> Either String Doc
forall a b. a -> Either a b
Left (String -> Either String Doc) -> String -> Either String Doc
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle String Void -> String
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> String
TM.errorBundlePretty ParseErrorBundle String Void
err
Right RawData String Void
d ->
let ([ParseError String Void]
ls, Doc
rs) = RawData String Void -> ([ParseError String Void], Doc)
forall a b. [Either a b] -> ([a], [b])
partitionEithers RawData String Void
d
in if [ParseError String Void] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ParseError String Void]
ls
then Doc -> Either String Doc
forall a b. b -> Either a b
Right Doc
rs
else String -> Either String Doc
forall a b. a -> Either a b
Left (String -> Either String Doc) -> String -> Either String Doc
forall a b. (a -> b) -> a -> b
$ (ParseError String Void -> String)
-> [ParseError String Void] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ParseError String Void -> String
forall s e.
(VisualStream s, ShowErrorComponent e) =>
ParseError s e -> String
TM.parseErrorPretty [ParseError String Void]
ls
where
doc :: Parser (RawData String Void)
doc = Parser Sent -> Parser (RawData String Void)
rawSentsC Parser Sent
p
parseConllu :: FilePath -> String -> Either String Doc
parseConllu :: String -> String -> Either String Doc
parseConllu = Parser Sent -> String -> String -> Either String Doc
parseConlluWith Parser Sent
sentence