{-# LANGUAGE OverloadedStrings, DeriveGeneric, DuplicateRecordFields #-} module Llama where import Conduit import Data.Aeson import Data.ByteString.Lazy (ByteString) import Data.Text (Text) import Data.Word import GHC.Generics import Network.HTTP.Conduit import Network.HTTP.Simple hiding (httpLbs) import Network.HTTP.Types.Status import System.IO (hPutStrLn, stderr) import Llama.Streaming data Role = System | User | CustomRole Text deriving Show instance ToJSON Role where toJSON System = "system" toJSON User = "user" toJSON (CustomRole t) = String t data LlamaMessage = LlamaMessage { role :: Role , content :: Text } deriving (Show, Generic) instance ToJSON LlamaMessage newtype LlamaApplyTemplateRequest = LlamaApplyTemplateRequest { messages :: [LlamaMessage] } deriving (Show, Generic) instance ToJSON LlamaApplyTemplateRequest newtype LlamaApplyTemplateResponse = LlamaApplyTemplateResponse { prompt :: Text } deriving (Show, Generic) instance FromJSON LlamaApplyTemplateResponse data LlamaTokenizeRequest = LlamaTokenizeRequest { content :: Text , add_special :: Bool , parse_special :: Bool } deriving (Show, Generic) instance ToJSON LlamaTokenizeRequest newtype LlamaTokenizeResponse = LlamaTokenizeResponse { tokens :: [Token] } deriving (Show, Generic) instance FromJSON LlamaTokenizeResponse newtype LlamaDetokenizeRequest = LlamaDetokenizeRequest { tokens :: [Token] } deriving (Show, Generic) instance ToJSON LlamaDetokenizeRequest newtype LlamaDetokenizeResponse = LlamaDetokenizeResponse { content :: Text } deriving (Show, Generic) instance FromJSON LlamaDetokenizeResponse data Health = HealthOk | HealthNok deriving (Show) -- Llama request and response data LlamaRequest = LlamaRequest { prompt :: Text , stream :: Bool } deriving (Show, Generic) instance ToJSON LlamaRequest newtype LlamaResponse = LlamaResponse { content :: Text } deriving (Show, Generic) instance FromJSON LlamaResponse type Token = Word32 type URL = String -- |Apply the LLM tempate to produce a raw LLM prompt from the role-content pairs applyTemplateSimple :: URL -> LlamaApplyTemplateRequest -> IO (Maybe Text) applyTemplateSimple = applyTemplateGeneral httpLBS -- |Like `applyTemplateSimple` but with user-supplied `Manager` applyTemplate :: URL -> Manager -> LlamaApplyTemplateRequest -> IO (Maybe Text) applyTemplate url manager = applyTemplateGeneral (`httpLbs` manager) url -- |Like `applyTemplateSimple` but with user-supplied fetcher applyTemplateGeneral :: (ToJSON p) => (Request -> IO (Response ByteString)) -> [Char] -> p -> IO (Maybe Text) applyTemplateGeneral fetch url input = do let request = parseRequest_ $ url ++ "/apply-template" body = encode input req = request { method = "POST" , requestBody = RequestBodyLBS body , requestHeaders = [("Content-Type", "application/json")] } response <- fetch req case decode (responseBody response) of Just (LlamaApplyTemplateResponse text) -> return (Just text) Nothing -> do liftIO $ hPutStrLn stderr "Failed to decode Llama response" return Nothing -- Function to send a message to the Llama model sendToLlama :: URL -> Manager -> Text -> IO (Maybe Text) sendToLlama url manager input = do let request = parseRequest_ $ url ++ "/completion" body = encode (LlamaRequest input False) req = request { method = "POST" , requestBody = RequestBodyLBS body , requestHeaders = [("Content-Type", "application/json")] } response <- httpLbs req manager case decode (responseBody response) of Just (LlamaResponse text) -> return (Just text) Nothing -> do liftIO $ hPutStrLn stderr "Failed to decode Llama response" return Nothing sendToLlamaStreaming :: (MonadThrow m, MonadResource m) => URL -> Manager -> Text -> IO (ConduitT () LlamaStreamingResponse m ()) sendToLlamaStreaming url manager input = do let request = setRequestManager manager $ parseRequest_ $ url ++ "/completion" body = encode (LlamaRequest input True) req = request { method = "POST" , requestBody = RequestBodyLBS body , requestHeaders = [("Content-Type", "application/json")] } pure $ httpSource req getResponseBody .| eventConduit tokenize :: URL -> LlamaTokenizeRequest -> IO (Maybe [Token]) tokenize url input = do let request = parseRequest_ $ url ++ "/tokenize" req = request { method = "POST" , requestBody = RequestBodyLBS $ encode input , requestHeaders = [("Content-Type", "application/json")] } response <- httpLBS req case decode (responseBody response) of Just (LlamaTokenizeResponse result) -> return (Just result) Nothing -> do liftIO $ hPutStrLn stderr "Failed to decode Llama response" return Nothing detokenize :: URL -> [Token] -> IO (Maybe Text) detokenize url input = do let request = parseRequest_ $ url ++ "/detokenize" body = encode $ LlamaDetokenizeRequest input req = request { method = "POST" , requestBody = RequestBodyLBS body , requestHeaders = [("Content-Type", "application/json")] } response <- httpLBS req case decode (responseBody response) of Just (LlamaDetokenizeResponse text) -> return (Just text) Nothing -> do liftIO $ hPutStrLn stderr "Failed to decode Llama response" return Nothing llama :: URL -> Text -> IO (Maybe Text) llama url input = do manager <- liftIO $ newManager tlsManagerSettings { managerResponseTimeout = responseTimeoutNone } sendToLlama url manager input llamaTemplated :: URL -> LlamaApplyTemplateRequest -> IO (Maybe Text) llamaTemplated url input = do manager <- liftIO $ newManager tlsManagerSettings { managerResponseTimeout = responseTimeoutNone } res <- applyTemplate url manager input case res of Just text -> sendToLlama url manager text _ -> pure Nothing llamaTemplatedStreaming :: (MonadThrow m, MonadResource m) => URL -> LlamaApplyTemplateRequest -> IO (ConduitT () LlamaStreamingResponse m ()) llamaTemplatedStreaming url input = do manager <- liftIO $ newManager tlsManagerSettings { managerResponseTimeout = responseTimeoutNone } res <- applyTemplate url manager input case res of Just text -> sendToLlamaStreaming url manager text _ -> pure $ yieldMany [] health :: URL -> IO Health health url = do manager <- liftIO $ newManager tlsManagerSettings { managerResponseTimeout = responseTimeoutNone } let request = parseRequest_ $ url ++ "/health" response <- httpLbs request manager pure $ if responseStatus response == ok200 then HealthOk else HealthNok