{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module Speechmatics.Client( transcribeBytes, transcribe, AuthToken, UserID, LoadedFile(..), Error(..), Format(..), Location(..), ByteConfig(..) ) where import Control.Applicative import Control.Concurrent (threadDelay) import Control.Lens import Control.Monad import Control.Monad.Except import Data.Text.Encoding import Network.HTTP.Types.URI import OpenSSL.Session (context) import Data.Aeson (FromJSON (parseJSON), Value, eitherDecode, encode) import Network.HTTP.Client (defaultManagerSettings, managerResponseTimeout, responseTimeoutMicro, HttpException) import Data.Bifunctor import Network.HTTP.Client.OpenSSL import Network.Mime (MimeType) import Network.Wreq import System.Log.Heavy.LoggingT(LoggingT(..)) import qualified System.Log.Heavy.Shortcuts as Logcut (info, debug) import Data.Text.Format.Heavy.Instances (Single(..)) import qualified Data.ByteString.Char8 as C8BS import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy.Char8 as C8LBS import Data.Maybe import Data.Monoid import Data.Text import qualified Data.Text.Lazy as Lazy import qualified Speechmatics.JSON.PeekJob as Peek import qualified Speechmatics.JSON.PostJob as Post import Control.Lens.Prism import Control.Exception import Control.Exception.Lens import Speechmatics.Log import qualified Speechmatics.Request as Sess -- | Authorization token obtained from speechmatics type AuthToken = Maybe Text -- | UserID obtained from speechmatics type UserID = Integer -- | Which langauge model to use type ModelName = String type JobID = Integer data LoadedFile = LoadedFile { content :: LBS.ByteString, filename :: String, mimetype :: MimeType -- to get this Jappie used defaultMimeLookup "f.mp3" } data Error = UnkownResponse | ParseError String deriving(Show) data Format = JsonV1 | JsonV2 -- | Configuration data for transcribe function -- has less configuration then transcribeBytes because the function accepts -- much more generic parts data Location = Location { format :: Format, url :: String, userId :: UserID, authToken :: AuthToken } -- | Configuration data for transcribeBytes function data ByteConfig = ByteConfig { location :: Location, config :: Value } makeOpts :: Options makeOpts = withAuthorization defaults timeout = responseTimeoutMicro 10000 inputName :: Text inputName = pack "data_file" modelName :: Text modelName = pack "model" -- | Transcribe a file that is already loaded in memory -- Does not catch exeptions transcribeBytes :: ByteConfig -> LoadedFile -> LoggingT IO(Either Error Value) transcribeBytes (ByteConfig location config) (LoadedFile content filename mimetype) = do debug $ pack $ show parts transcribe location defaults parts where parts = [ partLBS inputName content & partContentType ?~ mimetype & partFileName ?~ filename, partLBS "config" $ encode config ] withAuthorization :: Options -> Options withAuthorization options = options & manager .~ Left (opensslManagerSettings context) & manager .~ Left (defaultManagerSettings { managerResponseTimeout = timeout} ) formatQuery :: Format -> QueryText formatQuery JsonV1 = [] formatQuery JsonV2 = [("format", Just "json-v2")] authQuery :: AuthToken -> QueryText authQuery auth = maybeToList $ (\x -> (pack "auth_token", Just x)) <$> auth slash = mappend . flip mappend "/" jobsUri :: String -> UserID -> String -> String jobsUri url userID x = url <> show userID `slash` "jobs" `slash` x printQuery :: QueryText -> String printQuery = C8BS.unpack . renderQuery True . queryTextToQuery -- | More general transcription interface, allows custom parts to be inserted -- with whatever the user wants. -- Does not catch exeptions transcribe :: Location -> Options -> [Part] -> LoggingT IO(Either Error Value) transcribe location@Location{format=form, url=uri, userId=userID, authToken=token} options parts = do session <- lift Sess.newSession info $ "begin trancription for " <> pack uri response <- Sess.postWith auth session postUri parts debug $ decodeUtf8 $ LBS.toStrict $ response ^. responseBody debug $ pack uri case Post.parse (response ^. responseBody) of Left message -> return $ Left $ ParseError message Right parsed -> do let jobID = Post.postId parsed pollStatus location session jobID >>= \case Just error -> return $ Left error Nothing -> do let transcriptUri = url userID (show jobID `slash` "transcript") <> printQuery (formatQuery form <> authQuery token) result <- Sess.getWith makeOpts session transcriptUri return $ first ParseError $ eitherDecode (result ^. responseBody) where auth = withAuthorization options postUri = url userID (printQuery $ authQuery token ) url = jobsUri uri pollStatus :: Location -> Sess.Session -> JobID -> LoggingT IO(Maybe Error) pollStatus = pollStatus' Nothing waitFor :: Int -> IO() waitFor x = threadDelay (1000000*x) pollStatus' :: Maybe Integer -> Location -> Sess.Session -> JobID -> LoggingT IO(Maybe Error) pollStatus' (Just wait) location sess jobid = lift (waitFor (fromIntegral wait)) >> pollStatus' Nothing location sess jobid pollStatus' Nothing location@Location{url=uri, userId=userID, authToken=token} session jobID = do let statusUri = jobsUri uri userID (show jobID) <> printQuery (authQuery token) statusResponse <- Sess.getWith makeOpts session statusUri debug $ pack $ show statusResponse let body = statusResponse ^. responseBody case second Peek.jobCheckWait (Peek.parse body) of Left error -> return $ Just $ ParseError error Right Nothing -> return Nothing -- done Right maybe -> pollStatus' maybe location session jobID