module Silero.Detector (
VoiceDetector (..),
SpeechSegment (..),
detectSegments,
defaultVad,
withVad,
) where
import Control.Applicative (Applicative (liftA2))
import Control.Exception (bracket, finally)
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO (liftIO))
import Data.Int (Int32)
import Data.Vector.Storable (Storable, Vector)
import qualified Data.Vector.Storable as Vector
import Foreign (Ptr, Storable (..), free, malloc, nullPtr, peekArray, with)
import Foreign.C (CFloat (..))
import Foreign.Storable.Generic (GStorable)
import GHC.Generics (Generic)
import Silero.Model (SileroModel (..), sampleRate, withModel)
import UnliftIO (MonadUnliftIO)
data VoiceDetector = VoiceDetector
{ VoiceDetector -> SileroModel
model :: SileroModel
, VoiceDetector -> Float
startThreshold :: Float
, VoiceDetector -> Float
endThreshold :: Float
, VoiceDetector -> Float
minSpeechSamples :: Float
, VoiceDetector -> Float
maxSpeechSamples :: Float
, VoiceDetector -> Float
speechPadSamples :: Float
, VoiceDetector -> Float
minSilenceSamples :: Float
, VoiceDetector -> Float
minSilenceSamplesAtMaxSpeech :: Float
}
deriving ((forall x. VoiceDetector -> Rep VoiceDetector x)
-> (forall x. Rep VoiceDetector x -> VoiceDetector)
-> Generic VoiceDetector
forall x. Rep VoiceDetector x -> VoiceDetector
forall x. VoiceDetector -> Rep VoiceDetector x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. VoiceDetector -> Rep VoiceDetector x
from :: forall x. VoiceDetector -> Rep VoiceDetector x
$cto :: forall x. Rep VoiceDetector x -> VoiceDetector
to :: forall x. Rep VoiceDetector x -> VoiceDetector
Generic, VoiceDetector -> Int
(VoiceDetector -> Int)
-> (VoiceDetector -> Int)
-> (forall b. Ptr b -> Int -> IO VoiceDetector)
-> (forall b. Ptr b -> Int -> VoiceDetector -> IO ())
-> GStorable VoiceDetector
forall b. Ptr b -> Int -> IO VoiceDetector
forall b. Ptr b -> Int -> VoiceDetector -> IO ()
forall a.
(a -> Int)
-> (a -> Int)
-> (forall b. Ptr b -> Int -> IO a)
-> (forall b. Ptr b -> Int -> a -> IO ())
-> GStorable a
$cgsizeOf :: VoiceDetector -> Int
gsizeOf :: VoiceDetector -> Int
$cgalignment :: VoiceDetector -> Int
galignment :: VoiceDetector -> Int
$cgpeekByteOff :: forall b. Ptr b -> Int -> IO VoiceDetector
gpeekByteOff :: forall b. Ptr b -> Int -> IO VoiceDetector
$cgpokeByteOff :: forall b. Ptr b -> Int -> VoiceDetector -> IO ()
gpokeByteOff :: forall b. Ptr b -> Int -> VoiceDetector -> IO ()
GStorable)
defaultVad :: SileroModel -> VoiceDetector
defaultVad :: SileroModel -> VoiceDetector
defaultVad SileroModel
model =
VoiceDetector
{ $sel:model:VoiceDetector :: SileroModel
model = SileroModel
model
, $sel:startThreshold:VoiceDetector :: Float
startThreshold = Float
0.5
, $sel:endThreshold:VoiceDetector :: Float
endThreshold = Float
0.35
, $sel:minSpeechSamples:VoiceDetector :: Float
minSpeechSamples = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sampleRate Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
1000.0 Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
250.0
, $sel:maxSpeechSamples:VoiceDetector :: Float
maxSpeechSamples = Float
1.0 Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
0.0
, $sel:speechPadSamples:VoiceDetector :: Float
speechPadSamples = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sampleRate Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
1000.0 Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
30.0
, $sel:minSilenceSamples:VoiceDetector :: Float
minSilenceSamples = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sampleRate Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
1000.0 Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
100.0
, $sel:minSilenceSamplesAtMaxSpeech:VoiceDetector :: Float
minSilenceSamplesAtMaxSpeech = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sampleRate Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
1000.0 Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
98.0
}
withVad :: (MonadUnliftIO m) => (VoiceDetector -> m a) -> m a
withVad :: forall (m :: * -> *) a.
MonadUnliftIO m =>
(VoiceDetector -> m a) -> m a
withVad VoiceDetector -> m a
runVad = (SileroModel -> m a) -> m a
forall (m :: * -> *) a.
MonadUnliftIO m =>
(SileroModel -> m a) -> m a
withModel (VoiceDetector -> m a
runVad (VoiceDetector -> m a)
-> (SileroModel -> VoiceDetector) -> SileroModel -> m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SileroModel -> VoiceDetector
defaultVad)
data SpeechSegment = SpeechSegment
{ SpeechSegment -> Int32
startIndex :: Int32
, SpeechSegment -> Int32
endIndex :: Int32
, SpeechSegment -> CFloat
startTime :: CFloat
, SpeechSegment -> CFloat
endTime :: CFloat
}
deriving (Int -> SpeechSegment -> ShowS
[SpeechSegment] -> ShowS
SpeechSegment -> String
(Int -> SpeechSegment -> ShowS)
-> (SpeechSegment -> String)
-> ([SpeechSegment] -> ShowS)
-> Show SpeechSegment
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SpeechSegment -> ShowS
showsPrec :: Int -> SpeechSegment -> ShowS
$cshow :: SpeechSegment -> String
show :: SpeechSegment -> String
$cshowList :: [SpeechSegment] -> ShowS
showList :: [SpeechSegment] -> ShowS
Show, ReadPrec [SpeechSegment]
ReadPrec SpeechSegment
Int -> ReadS SpeechSegment
ReadS [SpeechSegment]
(Int -> ReadS SpeechSegment)
-> ReadS [SpeechSegment]
-> ReadPrec SpeechSegment
-> ReadPrec [SpeechSegment]
-> Read SpeechSegment
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS SpeechSegment
readsPrec :: Int -> ReadS SpeechSegment
$creadList :: ReadS [SpeechSegment]
readList :: ReadS [SpeechSegment]
$creadPrec :: ReadPrec SpeechSegment
readPrec :: ReadPrec SpeechSegment
$creadListPrec :: ReadPrec [SpeechSegment]
readListPrec :: ReadPrec [SpeechSegment]
Read, SpeechSegment -> SpeechSegment -> Bool
(SpeechSegment -> SpeechSegment -> Bool)
-> (SpeechSegment -> SpeechSegment -> Bool) -> Eq SpeechSegment
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SpeechSegment -> SpeechSegment -> Bool
== :: SpeechSegment -> SpeechSegment -> Bool
$c/= :: SpeechSegment -> SpeechSegment -> Bool
/= :: SpeechSegment -> SpeechSegment -> Bool
Eq, Eq SpeechSegment
Eq SpeechSegment =>
(SpeechSegment -> SpeechSegment -> Ordering)
-> (SpeechSegment -> SpeechSegment -> Bool)
-> (SpeechSegment -> SpeechSegment -> Bool)
-> (SpeechSegment -> SpeechSegment -> Bool)
-> (SpeechSegment -> SpeechSegment -> Bool)
-> (SpeechSegment -> SpeechSegment -> SpeechSegment)
-> (SpeechSegment -> SpeechSegment -> SpeechSegment)
-> Ord SpeechSegment
SpeechSegment -> SpeechSegment -> Bool
SpeechSegment -> SpeechSegment -> Ordering
SpeechSegment -> SpeechSegment -> SpeechSegment
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: SpeechSegment -> SpeechSegment -> Ordering
compare :: SpeechSegment -> SpeechSegment -> Ordering
$c< :: SpeechSegment -> SpeechSegment -> Bool
< :: SpeechSegment -> SpeechSegment -> Bool
$c<= :: SpeechSegment -> SpeechSegment -> Bool
<= :: SpeechSegment -> SpeechSegment -> Bool
$c> :: SpeechSegment -> SpeechSegment -> Bool
> :: SpeechSegment -> SpeechSegment -> Bool
$c>= :: SpeechSegment -> SpeechSegment -> Bool
>= :: SpeechSegment -> SpeechSegment -> Bool
$cmax :: SpeechSegment -> SpeechSegment -> SpeechSegment
max :: SpeechSegment -> SpeechSegment -> SpeechSegment
$cmin :: SpeechSegment -> SpeechSegment -> SpeechSegment
min :: SpeechSegment -> SpeechSegment -> SpeechSegment
Ord, (forall x. SpeechSegment -> Rep SpeechSegment x)
-> (forall x. Rep SpeechSegment x -> SpeechSegment)
-> Generic SpeechSegment
forall x. Rep SpeechSegment x -> SpeechSegment
forall x. SpeechSegment -> Rep SpeechSegment x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SpeechSegment -> Rep SpeechSegment x
from :: forall x. SpeechSegment -> Rep SpeechSegment x
$cto :: forall x. Rep SpeechSegment x -> SpeechSegment
to :: forall x. Rep SpeechSegment x -> SpeechSegment
Generic, SpeechSegment -> Int
(SpeechSegment -> Int)
-> (SpeechSegment -> Int)
-> (forall b. Ptr b -> Int -> IO SpeechSegment)
-> (forall b. Ptr b -> Int -> SpeechSegment -> IO ())
-> GStorable SpeechSegment
forall b. Ptr b -> Int -> IO SpeechSegment
forall b. Ptr b -> Int -> SpeechSegment -> IO ()
forall a.
(a -> Int)
-> (a -> Int)
-> (forall b. Ptr b -> Int -> IO a)
-> (forall b. Ptr b -> Int -> a -> IO ())
-> GStorable a
$cgsizeOf :: SpeechSegment -> Int
gsizeOf :: SpeechSegment -> Int
$cgalignment :: SpeechSegment -> Int
galignment :: SpeechSegment -> Int
$cgpeekByteOff :: forall b. Ptr b -> Int -> IO SpeechSegment
gpeekByteOff :: forall b. Ptr b -> Int -> IO SpeechSegment
$cgpokeByteOff :: forall b. Ptr b -> Int -> SpeechSegment -> IO ()
gpokeByteOff :: forall b. Ptr b -> Int -> SpeechSegment -> IO ()
GStorable)
foreign import ccall "detector.h detect_segments"
c_detect_segments ::
Ptr VoiceDetector ->
Int ->
Ptr Float ->
Ptr Int ->
Ptr (Ptr SpeechSegment) ->
IO ()
detectSegments :: (MonadIO m) => VoiceDetector -> Vector Float -> m [SpeechSegment]
detectSegments :: forall (m :: * -> *).
MonadIO m =>
VoiceDetector -> Vector Float -> m [SpeechSegment]
detectSegments VoiceDetector
vad Vector Float
samples =
IO [SpeechSegment] -> m [SpeechSegment]
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [SpeechSegment] -> m [SpeechSegment])
-> ((Ptr VoiceDetector -> IO [SpeechSegment])
-> IO [SpeechSegment])
-> (Ptr VoiceDetector -> IO [SpeechSegment])
-> m [SpeechSegment]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. VoiceDetector
-> (Ptr VoiceDetector -> IO [SpeechSegment]) -> IO [SpeechSegment]
forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with VoiceDetector
vad ((Ptr VoiceDetector -> IO [SpeechSegment]) -> m [SpeechSegment])
-> (Ptr VoiceDetector -> IO [SpeechSegment]) -> m [SpeechSegment]
forall a b. (a -> b) -> a -> b
$ \Ptr VoiceDetector
vadPtr ->
Vector Float
-> (Ptr Float -> IO [SpeechSegment]) -> IO [SpeechSegment]
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
Vector.unsafeWith Vector Float
samples ((Ptr Float -> IO [SpeechSegment]) -> IO [SpeechSegment])
-> (Ptr Float -> IO [SpeechSegment]) -> IO [SpeechSegment]
forall a b. (a -> b) -> a -> b
$ \Ptr Float
samplesPtr ->
forall a b. Storable a => (Ptr a -> IO b) -> IO b
withPtr @Int ((Ptr Int -> IO [SpeechSegment]) -> IO [SpeechSegment])
-> (Ptr Int -> IO [SpeechSegment]) -> IO [SpeechSegment]
forall a b. (a -> b) -> a -> b
$ \Ptr Int
segmentsLengthPtr ->
forall a b. Storable a => (Ptr a -> IO b) -> IO b
withPtr @(Ptr SpeechSegment) ((Ptr (Ptr SpeechSegment) -> IO [SpeechSegment])
-> IO [SpeechSegment])
-> (Ptr (Ptr SpeechSegment) -> IO [SpeechSegment])
-> IO [SpeechSegment]
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr SpeechSegment)
segmentsPtr -> do
Ptr Float -> Float -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Float
samplesPtr Float
0
Ptr (Ptr SpeechSegment) -> Ptr SpeechSegment -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr (Ptr SpeechSegment)
segmentsPtr Ptr SpeechSegment
forall a. Ptr a
nullPtr
(IO [SpeechSegment] -> IO () -> IO [SpeechSegment])
-> IO () -> IO [SpeechSegment] -> IO [SpeechSegment]
forall a b c. (a -> b -> c) -> b -> a -> c
flip IO [SpeechSegment] -> IO () -> IO [SpeechSegment]
forall a b. IO a -> IO b -> IO a
finally (Ptr SpeechSegment -> IO ()
forall a. Ptr a -> IO ()
free (Ptr SpeechSegment -> IO ()) -> IO (Ptr SpeechSegment) -> IO ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Ptr (Ptr SpeechSegment) -> IO (Ptr SpeechSegment)
forall a. Storable a => Ptr a -> IO a
peek Ptr (Ptr SpeechSegment)
segmentsPtr) (IO [SpeechSegment] -> IO [SpeechSegment])
-> IO [SpeechSegment] -> IO [SpeechSegment]
forall a b. (a -> b) -> a -> b
$ do
Ptr VoiceDetector
-> Int -> Ptr Float -> Ptr Int -> Ptr (Ptr SpeechSegment) -> IO ()
c_detect_segments
Ptr VoiceDetector
vadPtr
(Vector Float -> Int
forall a. Storable a => Vector a -> Int
Vector.length Vector Float
samples)
Ptr Float
samplesPtr
Ptr Int
segmentsLengthPtr
Ptr (Ptr SpeechSegment)
segmentsPtr
IO (IO [SpeechSegment]) -> IO [SpeechSegment]
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (IO (IO [SpeechSegment]) -> IO [SpeechSegment])
-> IO (IO [SpeechSegment]) -> IO [SpeechSegment]
forall a b. (a -> b) -> a -> b
$
(Int -> Ptr SpeechSegment -> IO [SpeechSegment])
-> IO Int -> IO (Ptr SpeechSegment) -> IO (IO [SpeechSegment])
forall a b c. (a -> b -> c) -> IO a -> IO b -> IO c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2
Int -> Ptr SpeechSegment -> IO [SpeechSegment]
forall a. Storable a => Int -> Ptr a -> IO [a]
peekArray
(Ptr Int -> IO Int
forall a. Storable a => Ptr a -> IO a
peek Ptr Int
segmentsLengthPtr)
(Ptr (Ptr SpeechSegment) -> IO (Ptr SpeechSegment)
forall a. Storable a => Ptr a -> IO a
peek Ptr (Ptr SpeechSegment)
segmentsPtr)
where
withPtr :: forall a b. (Storable a) => (Ptr a -> IO b) -> IO b
withPtr :: forall a b. Storable a => (Ptr a -> IO b) -> IO b
withPtr = IO (Ptr a) -> (Ptr a -> IO ()) -> (Ptr a -> IO b) -> IO b
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket (forall a. Storable a => IO (Ptr a)
malloc @a) Ptr a -> IO ()
forall a. Ptr a -> IO ()
free