{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module NLP.Postal
( AddressParserOptions
, NormalizeOptions
, setup
, setupParser
, setupLanguageClassifier
, getAddressParserDefaultOptions
, getDefaultNormalizeOptions
, parseAddress
, expandAddress
, tearDownParser
, tearDownLanguageClassifier
, tearDown
) where
import Data.Monoid ((<>))
import Foreign.C.Types
import qualified Language.C.Inline as C
import qualified Language.C.Inline.Unsafe as CU
import Language.C.Inline.Context as CX
import Foreign.Ptr
import Data.ByteString.Internal (fromForeignPtr)
import Foreign.ForeignPtr
import Control.Monad (forM, when)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
C.context (CX.baseCtx <> CX.bsCtx)
C.include "<libpostal/libpostal.h>"
C.include "<string.h>"
data AddressParserOptions
data NormalizeOptions
foreign import ccall "stdlib.h &free" p_free :: FunPtr (Ptr a -> IO ())
setup :: IO Int
setup = fromIntegral <$> [CU.exp| int { libpostal_setup() } |]
setupParser :: IO Int
setupParser = fromIntegral <$> [CU.exp| int { libpostal_setup_parser() } |]
setupLanguageClassifier :: IO Int
setupLanguageClassifier = fromIntegral <$> [CU.exp| int { libpostal_setup_language_classifier() } |]
getAddressParserDefaultOptions :: IO (Ptr AddressParserOptions)
getAddressParserDefaultOptions = castPtr <$> [CU.block|
void * {
libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();
void * hoptions = malloc(sizeof(options));
memcpy(hoptions, &options, sizeof(options));
return hoptions;
} |]
getDefaultNormalizeOptions :: IO (Ptr NormalizeOptions)
getDefaultNormalizeOptions = castPtr <$> [CU.block|
void * {
libpostal_normalize_options_t options = libpostal_get_default_options();
void * hoptions = malloc(sizeof(options));
memcpy(hoptions, &options, sizeof(options));
return hoptions;
} |]
parseAddress :: Ptr AddressParserOptions -> T.Text -> IO [(T.Text, T.Text)]
parseAddress options address = do
response <- [CU.exp| void * { libpostal_parse_address($bs-ptr:bsAddress, * (libpostal_address_parser_options_t *) $(void * castedOptions)) } |]
when (response == nullPtr) $ fail "libpostal_parse_address returned NULL a pointer"
numComponents <- [C.exp| int { ((libpostal_address_parser_response_t *) $(void * response))->num_components } |]
result <- forM [0..numComponents-1] $ \i -> do
(labelLen, labelPtr) <- C.withPtr $ \len -> [C.block| char * {
char * srcbuf = ((libpostal_address_parser_response_t *) $(void * response))->labels[$(int i)];
*$(size_t * len) = strlen(srcbuf);
char * dstbuf = malloc(*$(size_t * len));
memcpy(dstbuf, srcbuf, *$(size_t * len));
return dstbuf;
} |]
labelFPtr <- newForeignPtr p_free labelPtr
(compLen, compPtr) <- C.withPtr $ \len -> [C.block| char * {
char * srcbuf = ((libpostal_address_parser_response_t *) $(void * response))->components[$(int i)];
*$(size_t * len) = strlen(srcbuf);
char * dstbuf = malloc(*$(size_t * len));
memcpy(dstbuf, srcbuf, *$(size_t * len));
return dstbuf;
} |]
compFPtr <- newForeignPtr p_free compPtr
return
( TE.decodeUtf8 $ fromForeignPtr (castForeignPtr labelFPtr) 0 (fromIntegral labelLen)
, TE.decodeUtf8 $ fromForeignPtr (castForeignPtr compFPtr) 0 (fromIntegral compLen)
)
[CU.exp| void { libpostal_address_parser_response_destroy((libpostal_address_parser_response_t *) $(void * response)) } |]
return result
where
castedOptions = castPtr options
bsAddress = TE.encodeUtf8 address <> "\0"
expandAddress :: Ptr NormalizeOptions -> T.Text -> IO [T.Text]
expandAddress options address = do
(numExpansions, expansions) <- C.withPtr $ \numExpansions ->
[CU.exp| char * * { libpostal_expand_address($bs-ptr:bsAddress, * (libpostal_normalize_options_t *) $(void * cOptions), $(size_t * numExpansions) ) } |]
result <- forM [0..numExpansions-1] $ \i -> do
(xpLen, xpPtr) <- C.withPtr $ \len -> [C.block| char * {
char * srcbuf = $(char * * expansions)[$(size_t i)];
*$(size_t * len) = strlen(srcbuf);
char * dstbuf = malloc(*$(size_t * len));
memcpy(dstbuf, srcbuf, *$(size_t * len));
return dstbuf;
} |]
xpFPtr <- newForeignPtr p_free xpPtr
return $ TE.decodeUtf8 $ fromForeignPtr (castForeignPtr xpFPtr) 0 (fromIntegral xpLen)
[CU.exp| void { libpostal_expansion_array_destroy($(char * * expansions), $(size_t numExpansions)) } |]
return result
where
bsAddress = TE.encodeUtf8 address <> "\0"
cOptions = castPtr options
tearDownParser :: IO ()
tearDownParser = [CU.exp| void { libpostal_teardown_parser() } |]
tearDownLanguageClassifier :: IO ()
tearDownLanguageClassifier = [CU.exp| void { libpostal_teardown_language_classifier() } |]
tearDown :: IO ()
tearDown = [CU.exp| void { libpostal_teardown() } |]