{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Brok.Parser.Links
( links
, url
) where
import ClassyPrelude
import Data.Attoparsec.Text
import Data.List (nub)
import Brok.Parser.Attoparsec
import Brok.Types.Link (URL)
type Token = Maybe URL
preQueryChars :: String
preQueryChars = "-._~:/#%@"
queryBodyChars :: String
queryBodyChars = "-._~:/#%@!$&*+,;="
chars :: String -> Parser Char
chars chrs = digit <|> letter <|> choice (char <$> chrs)
parens :: Parser Text -> Parser Text
parens parser = surround '(' ')' parser <|> surround '[' ']' parser
part :: String -> Parser Text
part str = concat <$> many' (parens (part str) <|> manyChars (chars str))
query :: Parser Text
query = (++) <$> string "?" <*> part queryBodyChars
url :: Parser Text
url =
concat5 <$> string "http" <*> chopt 's' <*> string "://" <*> part preQueryChars <*>
option "" query
noise :: Parser Token
noise = anyChar >> return Nothing
urls :: Parser [URL]
urls = nub . catMaybes <$> many1 ((Just <$> url) <|> noise)
links :: Text -> Either Text [URL]
links "" = Right []
links content =
case parseOnly urls content of
Right c -> Right c
Left e -> Left $ tshow e