module Language.Lojban.CLL
(cll)
where
import Control.Arrow
import Data.Char
import Data.List
import Data.Maybe
import Data.Ord
import Network.Curl
import Network.HTTP
import Text.HTML.TagSoup
cll :: String -> IO (Maybe [(String,String)])
cll query = do
(code,body) <- curlGetString url [CurlUserAgent "Mozilla"]
if code == CurlOK
then return $ Just $ parse query $ body
else return $ Nothing
where url = "http://www.google.com/search?q="
++ (urlEncode $ "site:jbotcan.org/cllc " ++ query)
parse t = sortBy (flip $ comparing $ contains t . snd) . catMaybes . map extract . split (==sepTag) . dropWhile (/= sepTag) . parseTags
contains a b = lower a `isInfixOf` lower b
extract (TagOpen "h3" _:TagOpen "a" (("href",url):_):rest) = Just (url,desc) where
desc = innerText $ takeWhile (/=TagOpen "br" []) rest
extract _ = Nothing
sepTag = TagOpen "li" [("class","g")]
split :: (a -> Bool) -> [a] -> [[a]]
split p xs = reverse $ go p xs [] where
go p xs acc = case break p xs of
([],_:xs) -> go p xs acc
(x ,_:xs) -> go p xs (x:acc)
(x,[]) -> x:acc
lower = map toLower