-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Haskell implementation of tiktoken -- -- This packages only implements tokenization. In other words, given an -- existing encoding (cl100k_base) you can tokenize an input. @package tiktoken @version 1.0.2 -- | You can use this module to convert back and forth between a -- ByteString and its corresponding tokens using an existing -- encoding like cl100k_base or o200k_base -- -- Example usage: -- --
--   {-# LANGUAGE OverloadedStrings #-}
--   
--   import Tiktoken (o200k_base, toTokens, toRanks)
--   
--   main :: IO ()
--   main = do
--       -- Just ["El"," perro"," come"," las"," man","z","anas"]
--       print (toTokens o200k_base "El perro come las manzanas")
--   
--       -- Just [4422,96439,3063,1996,873,89,14457]
--       print (toRanks o200k_base "El perro come las manzanas")
--   
module Tiktoken -- | This is an efficient internal representation of an encoding like -- cl100k_base, p50k_edit, or o200k_base data Encoding -- | Parse an encoding from the .tiktoken file format tiktokenToEncoding :: ByteString -> Text -> Either (ParseErrorBundle Text Void) Encoding -- | Add special tokens to a base Encoding addSpecialTokens :: Map ByteString Int -> Encoding -> Encoding -- | r50k_base Encoding r50k_base :: Encoding -- | p50k_base Encoding p50k_base :: Encoding -- | p50k_edit Encoding p50k_edit :: Encoding -- | cl100k_base Encoding cl100k_base :: Encoding -- | o200k_base Encoding o200k_base :: Encoding -- | Use an Encoding to tokenize a ByteString into smaller -- ByteStrings -- -- This only fails if you provide an Encoding that cannot rank all -- possible 1-byte sequences toTokens :: Encoding -> ByteString -> Maybe [ByteString] -- | Use an Encoding to tokenize a ByteString into ranks -- -- This only fails if you provide an Encoding that cannot rank all -- possible 1-byte sequences toRanks :: Encoding -> ByteString -> Maybe [Int] -- | Use an Encoding to tokenize a ByteString into smaller -- ByteStrings and their associated ranks -- -- This only fails if you provide an Encoding that cannot rank all -- possible 1-byte sequences toTokensAndRanks :: Encoding -> ByteString -> Maybe [(Int, ByteString)] -- | Combine a sequence of ByteString tokens back into a -- ByteString -- -- This is just a synonym for -- Data.ByteString.concat (no Encoding -- necessary), provided solely for consistency/convenience. fromTokens :: [ByteString] -> ByteString -- | Convert a sequence of ranks back into a ByteString -- -- This will fail if you supply any ranks which are not recognized by the -- Encoding. fromRanks :: Encoding -> [Int] -> Maybe ByteString instance Control.DeepSeq.NFData Tiktoken.Encoding instance GHC.Generics.Generic Tiktoken.Encoding instance GHC.Classes.Ord Tiktoken.Ranked instance GHC.Classes.Eq Tiktoken.Ranked