module Language.Gender.Dictionary
  ( Gender(..)
  , parseDictionary
  ) where
import Language.Gender.Types
import Control.Applicative
import Data.Attoparsec.Text
import Data.Char                    (chr)
import Data.Text             hiding (filter,take)
import Prelude               hiding (concat,take)
data DictLine = Comment  Text
              | Mapping (Char,[Text])
              | Name     Text Gender (Maybe Column30)
              | EquivName Text Text
              deriving (Show)
data Column30 = CompressedUmlaut
              | ExpandedUmlaut
              | ArabicPseudoVowel
              deriving (Show)
parseDictionary text =
  case parseOnly dict text of
    Left   _ -> []
    Right xs -> do
      let mappings = mapValToKeys $ filter isMapping xs
          rawnames = filter isName xs
      Prelude.map (nameToTuple mappings) rawnames
mapValToKeys = Prelude.concatMap valToKeys 
valToKeys (Mapping (k,vs)) = Prelude.map (\v -> (v,k)) vs
valToKeys  _               = []
isName Name{} = True
isName _      = False
nameToTuple m (Name n g c30) = (expandUmlaut m n,g)
nameToTuple _ _              = error "Language.Gender.Dictionary: nameToTuple only for Name type."
expandUmlaut m text = do
  let result = parseOnly (many umlautName) text
      umlautName = umlautPart <|> namePart
      umlautPart = do char '<'
                      umlaut <- takeWhile1 (not . isBracket)
                      char '>'
                      case lookup umlaut m of
                        Just x  -> return $ singleton x
                        Nothing -> return umlaut
      namePart = takeWhile1 (not . isBracket)
      isBracket = flip elem "<>"
  case result of
    Left   _ -> text
    Right xs -> concat xs
isMapping (Mapping _) = True
isMapping  _          = False
dict = many dictLine <* endOfInput
dictLine = dictMapping
       <|> dictComment
       <|> dictName
       <|> dictEquivName
dictMapping = do
  xs <- (,) <$> key <*> some value <* takeTill isEndOfLine <* endOfLine
  return $ Mapping xs
key = do char '#'
         skipWhile isHorizontalSpace
         c <- decimal
         skipSpace
         char '='
         return $ chr c
value = valOr <|> val
  where val   = skipSpace *> char '<' *> takeTill (== '>') <* char '>'
        valOr = val <* string " or"
dictComment = Comment <$> (char '#' *> skipWhile isHorizontalSpace *> takeTill isEndOfLine <* endOfLine)
dictName = do
  gender <- genderParser
  skipSpace
  name <- fmap stripEnd $ take 26
  c30 <- c30Parser 
  skipWhile $ not . isEndOfLine
  endOfLine
  return $ Name name gender c30
dictEquivName = EquivName <$> (char '=' *> skipWhile isHorizontalSpace *> takeTill isHorizontalSpace)
                          <*> (skipWhile isHorizontalSpace *> takeTill isEndOfLine <* endOfLine)
c30Parser = char '-' *> pure (Just CompressedUmlaut)
        <|> char '+' *> pure (Just ExpandedUmlaut)
        <|> char '_' *> pure (Just ArabicPseudoVowel)
        <|> char ' ' *> pure  Nothing
genderParser = string "1F" *> pure Female
           <|> string "F " *> pure Female
           <|> string "1M" *> pure Male
           <|> string "M " *> pure Male
           <|> string "? " *> pure Unisex
           <|> string "?F" *> pure MostlyFemale
           <|> string "?M" *> pure MostlyMale