import Data.List.Replace (replace, substAllM) import Network.Curl.Easy (curl_global_init, initialize) import Network.LongURL (CurlInstance(..), domains, longURL, supportedSites) import Control.Applicative ((<$>), (<*>)) import Data.ByteString.Lazy.Char8 (ByteString, pack, splitWith, unpack) import qualified Data.ByteString.Lazy.Char8 as B import Data.List (intercalate) import Text.Regex.Base.Context () import Text.Regex.Base.RegexLike (getAllMatches, makeRegexM, match) import Text.Regex.Posix.ByteString.Lazy (Regex) import Text.Regex.Posix.String () -- FIXME: Make this lazy again bsLines :: ByteString -> [ByteString] bsLines = postProcess . splitWith (== '\n') where postProcess [] = [] postProcess result | B.null (last result) = init result | otherwise = result -- | Split input into lines, in order to do incremental processing, and to try to limit memory usage mapMLines_ :: Monad m => (ByteString -> m ()) -> ByteString -> m () mapMLines_ f = mapM_ f . bsLines main :: IO () main = do curl_global_init 2 -- Initialise just win32sock (if applicable) curl' <- initialize let ci = CurlInstance { curl = curl', userAgent = "hlongurl 0.9.3; Haskell" } supportedDomains <- (domains =<<) <$> supportedSites ci urlRegex <- (makeRegexM $ "http://(" ++ intercalate "|" (replace "." "\\." <$> supportedDomains) ++ ")/([-a-zA-Z0-9_\\'/\\\\\\+&%\\$#\\=~])*") :: IO Regex let substURLs :: ByteString -> IO () substURLs = (B.putStrLn =<<) . (substAllM (((pack . show) <$>) . longURL ci . unpack) <*> getAllMatches . match urlRegex) mapMLines_ substURLs =<< B.getContents