module Language.Lojban.CLL
    (cll)
    where

import Control.Arrow
import Data.Char
import Data.List
import Data.Maybe
import Data.Ord
import Network.Curl
import Network.HTTP
import Text.HTML.TagSoup

-- | Queries the CLL page on jbotcan.org using Google.
cll :: String -> IO (Maybe [(String,String)])
cll query = do
  (code,body) <- curlGetString url [CurlUserAgent "Mozilla"]
  if code == CurlOK
     then return $ Just $ parse query $ body
     else return $ Nothing
         where url = "http://www.google.com/search?q="
                     ++ (urlEncode $ "site:jbotcan.org/cllc " ++ query)

parse t = sortBy (flip $ comparing $ contains t . snd) . catMaybes . map extract . split (==sepTag) . dropWhile (/= sepTag) . parseTags

contains a b = lower a `isInfixOf` lower b

extract (TagOpen "h3" _:TagOpen "a" (("href",url):_):rest) = Just (url,desc) where
    desc = innerText $ takeWhile (/=TagOpen "br" []) rest
extract _ = Nothing

sepTag = TagOpen "li" [("class","g")]

split :: (a -> Bool) -> [a] -> [[a]]
split p xs = reverse $ go p xs [] where
    go p xs acc = case break p xs of
                    ([],_:xs) -> go p xs acc
                    (x ,_:xs) -> go p xs (x:acc)
                    (x,[])    -> x:acc

lower = map toLower