{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} module Waldo.Stalk ( OS(..), Browser(..), NetSpeed(..) , PersonalData(..) , StalkRequest, wai2stalk , StalkDB, loadStalkDB , stalk ) where import Data.Word import Data.Bits import Data.Maybe import Data.Bifunctor import Control.Monad import Data.List (intercalate) import Data.Geolocation.GeoIP import Data.ByteString (ByteString) import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Aeson (ToJSON(toJSON), FromJSON(parseJSON), (.=), (.:), (.:?)) import Network.Socket (SockAddr(SockAddrInet, SockAddrInet6)) import Data.Either (rights) import System.FilePath (()) import qualified Text.Regex.TDFA as R import qualified Text.Regex.TDFA.ByteString as RB import qualified Data.Aeson as JS import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import qualified Network.Wai as WAI import qualified Network.HTTP.Types as HTTP import qualified Data.HashMap.Strict as Map import qualified Data.CaseInsensitive as CI import Network.URI import Data.Geo.Coord import Safe import Data.BrowsCap import Data.BrowsCap.Aquire data Browser = Chrome | Safari | FireFox | InternetExplorer | Opera | Netscape deriving (Eq, Ord, Show) data OS = BSD | Linux | Windows | Mac deriving (Eq, Ord, Show) data NetSpeed = Dialup | Cellular | CableDSL | Corporate deriving (Eq, Ord, Show) data PersonalData = PersonalData { pdLocal :: [ByteString] -- Order of decreasing precision. , pdOrg :: Maybe ByteString -- Who owns the IP. , pdISP :: Maybe ByteString -- Who provides internet to the IP. , pdNetSpeed :: Maybe NetSpeed , pdReferer :: Maybe ByteString , pdRefURI :: Maybe URI , pdBrowser :: Maybe Browser , pdOS :: Maybe OS , pdLatLon :: Maybe Coord , pdScreen :: (Int, Int) , pdBrowserEntry :: Maybe BrowserEntry , pdStalk :: StalkRequest } deriving (Eq, Show) data StalkRequest = StalkRequest { srParams :: HTTP.Query , srHeaders :: HTTP.RequestHeaders , srFromIP :: Maybe ByteString , srTrustForward :: Bool } deriving (Eq, Ord, Show) instance ToJSON StalkRequest where toJSON (StalkRequest {srParams=p, srHeaders=h, srFromIP=ip, srTrustForward=t}) = -- FIXME hasty conversion from aeson 0.6, partial format kept for compatability JS.object [ "params" .= map (bimap TE.decodeUtf8 (fmap TE.decodeUtf8)) p , "headers" .= map (\(k, v) -> (TE.decodeUtf8 $ CI.original k, TE.decodeUtf8 v)) h , "ip" .= fmap TE.decodeUtf8 ip , "trust_forward" .= t ] instance FromJSON StalkRequest where parseJSON (JS.Object o) = do ip <- (fmap TE.encodeUtf8) <$> (o .:? "ip") p <- (map (bimap TE.encodeUtf8 (fmap (TE.encodeUtf8)))) <$> (o .: "params") t <- o .: "trust_forward" h <- (map (bimap (CI.mk . TE.encodeUtf8) TE.encodeUtf8)) <$> (o .: "headers") return $ StalkRequest { srParams=p , srHeaders=h , srFromIP=ip , srTrustForward=t } parseJSON _ = mzero data StalkDB = StalkDB { sdbBrowserCap :: BrowsCap , sdbMaxMindCity :: GeoDB , sdbMaxMindOrg :: GeoDB , sdbMaxMindISP :: GeoDB , sdbMaxMindNet :: GeoDB } loadStalkDB :: FilePath -> IO StalkDB loadStalkDB dd = do bc <- browsCapFromFile $ dd "full_php_browscap.ini" cdb <- openGeoDB mmap_cache $ dd "GeoIPCity.dat" odb <- openGeoDB mmap_cache $ dd "GeoIPOrg.dat" idb <- openGeoDB mmap_cache $ dd "GeoIPISP.dat" ndb <- openGeoDB mmap_cache $ dd "GeoIPNet.dat" return $ StalkDB { sdbBrowserCap = bc , sdbMaxMindCity = cdb , sdbMaxMindOrg = odb , sdbMaxMindISP = idb , sdbMaxMindNet = ndb } wai2stalk :: WAI.Request -> StalkRequest wai2stalk req = StalkRequest { srParams = WAI.queryString req , srHeaders = WAI.requestHeaders req , srFromIP = ip , srTrustForward = True } where ip = case WAI.remoteHost req of SockAddrInet _ addr4 -> let (x0, x1, x2, x3) = w32to8 addr4 in Just $ TE.encodeUtf8 . T.pack $ concat [show x3, ".", show x2, ".", show x1, ".", show x0] SockAddrInet6 _ _ _ _ -> Nothing _ -> Nothing stalk :: StalkDB -> StalkRequest -> IO PersonalData stalk sdb req = do bc <- lookupBrowser (sdbBrowserCap sdb) $ fromMaybe "" agnt let mips = if srTrustForward req then (maybeToList $ lookup "X-Forwarded-For" (srHeaders req)) ++ (map snd $ filter (\h -> (fst h) `elem` ["X-Forward-For"]) (srHeaders req)) else maybeToList $ srFromIP req let ips = mapMaybe validIP mips let browser = str2browser $ fromMaybe "" $ fmap beBrowser bc let os = str2os $ fromMaybe "" $ fmap bePlatform bc geos <- forM (ips) $ \ip -> do gipCityM <- geoLocateByIPAddress (sdbMaxMindCity sdb) ip gipOrgM <- geoStringByIPAddress (sdbMaxMindOrg sdb) ip gipISPM <- geoStringByIPAddress (sdbMaxMindISP sdb) ip gipNetM <- geoStringByIPAddress (sdbMaxMindNet sdb) ip return $ if not $ or [isJust gipCityM, isJust gipOrgM, isJust gipISPM, isJust gipNetM] then Nothing else Just $ PersonalData { pdLocal = (fromMaybe [] $ fmap city2locals gipCityM) ++ ["Earth"] , pdOrg = fmap cleanOrg gipOrgM , pdISP = gipISPM , pdNetSpeed = join $ fmap str2speed gipNetM , pdReferer = referer , pdRefURI = refUri , pdBrowser = browser , pdOS = os , pdLatLon = parseLatLon gipCityM , pdScreen = scrn , pdBrowserEntry = bc , pdStalk = req } return $ fromMaybe (noGeoResult bc browser os) $ listToMaybe $ catMaybes geos where parseLatLon gipc = do c <- gipc return ((geoLatitude c) !.! (geoLongitude c)) city2locals :: GeoIPRecord -> [ByteString] city2locals g = [geoCity g, geoRegion g, geoCountryCode3 g, geoCountryName g, geoContinentCode g] noGeoResult bc browser os = PersonalData { pdLocal = ["Earth"] , pdOrg = Nothing , pdISP = Nothing , pdNetSpeed = Nothing , pdReferer = referer , pdRefURI = refUri , pdBrowser = browser , pdOS = os , pdLatLon = Nothing , pdScreen = scrn , pdBrowserEntry = bc , pdStalk = req } parms = srParams req hdrs = srHeaders req scrn = let x = case BS8.readInt (fromMaybe "" $ join $ lookup "w" parms) of Nothing -> 0 Just (xi, _) -> xi y = case BS8.readInt (fromMaybe "" $ join $ lookup "h" parms) of Nothing -> 0 Just (yi, _) -> yi in (x, y) agnt = lookup "User-Agent" hdrs referer = join $ lookup "r" parms refUri = join $ fmap (parseURI . T.unpack . TE.decodeUtf8) referer validIP :: ByteString -> Maybe ByteString validIP fips0 = do (x0, fips1) <- BS8.readInt fips0 (x1, fips2) <- BS8.readInt $ BS.drop 1 fips1 (x2, fips3) <- BS8.readInt $ BS.drop 1 fips2 (x3, _) <- BS8.readInt $ BS.drop 1 fips3 return $ TE.encodeUtf8 . T.pack $ intercalate "." [show x0, show x1, show x2, show x3] rComp = R.CompOption {R.multiline=False,R.rightAssoc=True ,R.caseSensitive=False,R.newSyntax=True,R.lastStarGreedy=False} rExec = R.ExecOption { R.captureGroups=False } rCompile (p, r) = case RB.compile rComp rExec p of Left e -> Left e Right c -> Right (c, r) clean :: [(ByteString, ByteString)] -> BS.ByteString -> BS.ByteString clean rules this = fromMaybe this $ fmap snd $ headMay $ filter (\(p, _) -> either (const False) isJust $ RB.regexec p this) $ rights $ map rCompile rules cleanOrg :: BS.ByteString -> BS.ByteString cleanOrg = clean [ ("\\^Google", "Google") ] str2speed = flip Map.lookup (Map.fromList [ ("Dialup", Dialup) , ("Cellular", Cellular) , ("Cable/DSL", CableDSL) , ("Corporate", Corporate) ]) str2browser = flip Map.lookup (Map.fromList [ ("Chrome" , Chrome), ("Chromium", Chrome) , ("Safari", Safari) , ("Firefox", FireFox), ("Iceweasel", FireFox) , ("IE", InternetExplorer) , ("Opera", Opera), ("Opera Mini", Opera) , ("Netscape", Netscape) ]) str2os = flip Map.lookup (Map.fromList [ ("MacOSX", Mac) , ("Linux", Linux), ("Debian", Linux) , ("FreeBSD", BSD), ("NetBSD", BSD), ("OpenBSD", BSD) , ("IRIX", BSD), ("IRIX64", BSD) , ("HP-UX", BSD) , ("SunOS", BSD), ("Solaris", BSD) , ("WinCE", Windows) , ("Win16", Windows), ("Win32", Windows), ("Win64", Windows) , ("Win31", Windows) , ("Win95", Windows), ("Win98", Windows), ("WinME", Windows) , ("WinNT", Windows) , ("Win2000", Windows), ("Win2003", Windows) , ("WinXP", Windows), ("WinVista", Windows) , ("Win7", Windows), ("Win8", Windows) ]) w32to8 :: Word32 -> (Word8, Word8, Word8, Word8) w32to8 w0 = let (w0_h, w0_l) = w32to16 w0 ((x0, x1), (x2, x3)) = (w16to8 w0_h, w16to8 w0_l) in (x0, x1, x2, x3) w32to16 :: Word32 -> (Word16, Word16) w32to16 w0 = let w_h = fromIntegral $ w0 `shiftR` 16 w_l = fromIntegral $ w0 .&. 0xFFFF in (w_h, w_l) w16to8 :: Word16 -> (Word8, Word8) w16to8 w0 = let w_h = fromIntegral $ w0 `shiftR` 8 w_l = fromIntegral $ w0 .&. 0xFF in (w_h, w_l)