Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Synopsis
- data SearchStateSized (n :: Nat) a = SearchStateSized {
- _currentQuery :: !Text
- _prevQuery :: !Text
- _chunkNumber :: !Int
- _totalMatches :: !Int
- _newMatches :: !Bool
- _done :: !Bool
- _matchSet :: !(MatchSetSized n)
- data Ocassion
- type MatchSetSized n = Set (ScoredMatchSized n)
- newtype Chunks (n :: Nat) = Chunks {}
- data SearchFunctions a b = SearchFunctions {}
- chunkNumber :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Int Int
- currentQuery :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Text Text
- done :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Bool Bool
- matchSet :: forall n a n a. Lens (SearchStateSized n a) (SearchStateSized n a) (MatchSetSized n) (MatchSetSized n)
- newMatches :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Bool Bool
- prevQuery :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Text Text
- totalMatches :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Int Int
- data SearchReport = SearchReport {}
- display :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) ((Bool -> Text -> b) -> MatcherSized n a -> Text -> Vector n Int -> [b])
- makeMatcher :: forall a b. Lens' (SearchFunctions a b) (Text -> Matcher a)
- match :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) (MatcherSized n a -> Text -> Maybe (MatchFull n))
- data SearchEnv n a b = SearchEnv {
- _searchFunctions :: SearchFunctions a b
- _send :: forall n m. (KnownNat n, KnownNat m) => Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()
- _maxMatches :: Int
- _candidates :: Chunks n
- _query :: MVar (Maybe Text)
- _allMatches :: IOVector (Vector n Bit)
- hasNewMatches :: Lens' SearchReport Bool
- nummatches :: Lens' SearchReport Int
- ocassion :: Lens' SearchReport Ocassion
- searchedTerm :: Lens' SearchReport Text
- allMatches :: forall n a b. Lens' (SearchEnv n a b) (IOVector (Vector n Bit))
- candidates :: forall n a b. Lens' (SearchEnv n a b) (Chunks n)
- maxMatches :: forall n a b. Lens' (SearchEnv n a b) Int
- query :: forall n a b. Lens' (SearchEnv n a b) (MVar (Maybe Text))
- searchFunctions :: forall n a b b. Lens (SearchEnv n a b) (SearchEnv n a b) (SearchFunctions a b) (SearchFunctions a b)
- send :: forall n a b n m. (KnownNat n, KnownNat m) => SimpleGetter (SearchEnv n a b) (Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ())
- (!) :: KnownNat n => Chunks n -> ChunkIndex -> Text
- getChunk :: Int -> Chunks n -> Vector n Text
- matchChunk :: forall n m a. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> (Vector m Bit, MatchSetSized n)
- matchChunkM :: forall n m a s. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> ST s (Vector m Bit, MatchSetSized n)
- resetMatches :: forall n m a b. KnownNat n => SearchEnv n a b -> SearchStateSized m a -> IO ()
- searchNextChunk :: (KnownNat n, KnownNat m) => SearchEnv n a b -> MatcherSized m a -> SearchStateSized m a -> IO (SearchStateSized m a)
- matcherLoop :: (KnownNat n, KnownNat m) => SearchEnv n a b -> Text -> Text -> MatcherSized m a -> IO (Maybe Text)
- searchEnv :: KnownNat n => SearchFunctions a b -> Int -> (forall n m. (KnownNat n, KnownNat m) => Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()) -> Chunks n -> IO (SearchEnv n a b)
- searchLoop :: KnownNat n => SearchEnv n a b -> IO ()
- fuzzyFunctions :: CaseSensitivity -> SearchFunctions MatchPart b
- orderlessFunctions :: CaseSensitivity -> SearchFunctions Int b
- makeChunks :: forall n. KnownNat n => Vector Text -> Chunks n
- makeChunksP :: KnownNat n => Proxy n -> Vector Text -> Chunks n
- setToVectorST :: (a -> b) -> Set a -> ST s (Vector b)
- startSearcher :: KnownNat n => SearchEnv n a b -> IO ()
- sendQuery :: KnownNat n => SearchEnv n a b -> Text -> IO ()
- stopSearcher :: KnownNat n => SearchEnv n a b -> IO ()
- concatChunks :: KnownNat n => Int -> Chunks n -> Vector Text
- forceChunks :: KnownNat n => Chunks n -> Chunks n
- chunksFromStream :: forall n. KnownNat n => InputStream Text -> IO (Chunks n)
- chunksFromStreamP :: forall n. KnownNat n => Proxy n -> InputStream Text -> IO (Chunks n)
- chunksFromHandle :: KnownNat n => Proxy n -> Handle -> IO (Chunks n)
- readVectorHandleWith :: (Text -> Text) -> (Vector Text -> Vector Text) -> Handle -> IO (Vector Text)
- fileNamesSorted :: Handle -> IO (Vector Text)
- data CaseSensitivity
- type Indices (n :: Nat) = (Int, Vector n Int)
- data SearchSettings a (n :: Nat) = SearchSettings (a -> Text -> Maybe (MatchFull n)) (a -> Int) Int (Text -> Vector n Int -> Text -> Vector n Int -> Ordering)
- data MatchFull (n :: Nat) = MatchFull {}
- data MatchPart = MatchPart {
- matchBegin :: !Int
- matchEnd :: !Int
- data MatchState (n :: Nat) a = MatchState {
- endLocation :: !Int
- partialMatch :: !(Vector n Int)
- aux :: !a
- data Matcher a = forall n.KnownNat n => Matcher (MatcherSized n a)
- data MatcherSized (n :: Nat) a = MatcherSized {
- caseSensitivity :: CaseSensitivity
- machina :: !(AcMachine a)
- sizes :: !(Either Int (Vector n Int))
- fuzzyMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart
- fuzzyMatcher :: CaseSensitivity -> Text -> Matcher MatchPart
- emptyMatcher :: MatcherSized 0 a
- orderlessMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n Int
- orderlessMatcher :: CaseSensitivity -> Text -> Matcher Int
- fuzzyMatchSized :: KnownNat n => MatcherSized n MatchPart -> Text -> Maybe (MatchFull n)
- fuzzyMatch :: Matcher MatchPart -> Text -> Maybe [Text]
- fuzzyMatchParts :: KnownNat n => MatcherSized n MatchPart -> Text -> Vector n Int -> [Text]
- fuzzyMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n MatchPart -> Text -> Vector n Int -> [a]
- orderlessMatchSized :: KnownNat n => MatcherSized n Int -> Text -> Maybe (MatchFull n)
- orderlessMatch :: Matcher Int -> Text -> Maybe [Text]
- orderlessMatchParts :: KnownNat n => MatcherSized n Int -> Text -> Vector n Int -> [Text]
- orderlessMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n Int -> Text -> Vector n Int -> [a]
- parts :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
- partsAs :: (Bool -> Text -> a) -> Either Int (Vector Int) -> Text -> Vector Int -> [a]
- partsOrderless :: Either Int (Vector Int) -> Text -> Vector Int -> [Text]
- partsOrderlessAs :: (Bool -> Text -> a) -> Either Int (Vector Int) -> Text -> Vector Int -> [a]
- minify :: Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex]
- fuzzySettings :: KnownNat n => Int -> SearchSettings (MatcherSized n MatchPart) n
- orderlessSettings :: KnownNat n => Int -> SearchSettings (MatcherSized n Int) n
- data ChunkIndex = ChunkIndex {}
- data ScoredMatchSized (n :: Nat) = ScoredMatchSized {
- score :: !(Down Int)
- chunkIndex :: !ChunkIndex
- matchData :: !(Vector n Int)
- emptyMatch :: Int -> Int -> ScoredMatchSized 0
Documentation
data SearchStateSized (n :: Nat) a Source #
SearchStateSized | |
|
type MatchSetSized n = Set (ScoredMatchSized n) Source #
data SearchFunctions a b Source #
SearchFunctions | |
|
chunkNumber :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Int Int Source #
currentQuery :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Text Text Source #
done :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Bool Bool Source #
matchSet :: forall n a n a. Lens (SearchStateSized n a) (SearchStateSized n a) (MatchSetSized n) (MatchSetSized n) Source #
newMatches :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Bool Bool Source #
prevQuery :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Text Text Source #
totalMatches :: forall n a a. Lens (SearchStateSized n a) (SearchStateSized n a) Int Int Source #
data SearchReport Source #
SearchReport | |
|
display :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) ((Bool -> Text -> b) -> MatcherSized n a -> Text -> Vector n Int -> [b]) Source #
makeMatcher :: forall a b. Lens' (SearchFunctions a b) (Text -> Matcher a) Source #
match :: forall a b n. KnownNat n => SimpleGetter (SearchFunctions a b) (MatcherSized n a -> Text -> Maybe (MatchFull n)) Source #
The constant environment in which the search runs.
SearchEnv | |
|
searchFunctions :: forall n a b b. Lens (SearchEnv n a b) (SearchEnv n a b) (SearchFunctions a b) (SearchFunctions a b) Source #
send :: forall n a b n m. (KnownNat n, KnownNat m) => SimpleGetter (SearchEnv n a b) (Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()) Source #
matchChunk :: forall n m a. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> (Vector m Bit, MatchSetSized n) Source #
matchChunkM :: forall n m a s. (KnownNat n, KnownNat m) => (MatcherSized n a -> Text -> Maybe (MatchFull n)) -> MatcherSized n a -> Int -> Vector m Text -> Vector m Bit -> ST s (Vector m Bit, MatchSetSized n) Source #
resetMatches :: forall n m a b. KnownNat n => SearchEnv n a b -> SearchStateSized m a -> IO () Source #
searchNextChunk :: (KnownNat n, KnownNat m) => SearchEnv n a b -> MatcherSized m a -> SearchStateSized m a -> IO (SearchStateSized m a) Source #
matcherLoop :: (KnownNat n, KnownNat m) => SearchEnv n a b -> Text -> Text -> MatcherSized m a -> IO (Maybe Text) Source #
searchEnv :: KnownNat n => SearchFunctions a b -> Int -> (forall n m. (KnownNat n, KnownNat m) => Chunks n -> SearchReport -> MatcherSized m a -> MatchSetSized m -> IO ()) -> Chunks n -> IO (SearchEnv n a b) Source #
chunksFromStream :: forall n. KnownNat n => InputStream Text -> IO (Chunks n) Source #
chunksFromStreamP :: forall n. KnownNat n => Proxy n -> InputStream Text -> IO (Chunks n) Source #
data CaseSensitivity #
Instances
type Indices (n :: Nat) = (Int, Vector n Int) Source #
Type synonym for the index of a candidate in the backing vector along with the positions of the matches for it.
data SearchSettings a (n :: Nat) Source #
The configuration for a search style with n needles and matcher of type a
data MatchFull (n :: Nat) Source #
The full match consisting of a score for the match and vector consisting of the positions of the match. The score is intended as for bucketing and as a
result shouldn't be two large and must be non-negative . For the fuzzy style in this module n
contiguous matches contribute n-1
to the score. The
scores thus range from 0
to n-1
where n
is the length of the string to be matched. For orderless style this score is always 0
.
MatchPart | |
|
data MatchState (n :: Nat) a Source #
The matching process essentially takes the form of a fold with possible early termination over the matches produced. See the runLower from the alfred-margaret. Here MatchState is the return type of this fold and essentially it records the positions of the matches. Here like in alfred-margaret position is the code unit index of the first code unit beyond the match. We can't use the CodeUnitIndex here because it doesn't have an unbox instance.
MatchState | |
|
Instances
Show a => Show (MatchState n a) Source # | |
Defined in Talash.Core showsPrec :: Int -> MatchState n a -> ShowS # show :: MatchState n a -> String # showList :: [MatchState n a] -> ShowS # |
The existential version of MatcherSized
forall n.KnownNat n => Matcher (MatcherSized n a) |
data MatcherSized (n :: Nat) a Source #
The MatcherSized type consists of a state machine for matching a fixed number of needles. The number of matches needed is encoded in the Nat parameterzing
the type. Here the purpose is to improve the memory consumption by utlizing the Unbox
instance for sized tagged unboxed vectors from
(vector-sized)[https:/hackage.haskell.orgpackage/vector-sized] package. This significantly reduces the memory consumption. At least in the present
implementation there is no benefit for correctness and dealing with the length tag is occasionally annoying.
MatcherSized | |
|
fuzzyMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n MatchPart Source #
Constructs the matcher for fuzzy matching. The needles are all possible contigous subtrings of the string being matched. The Nat n
must be instantiated at the
length n
of the query string. They are n choose 2 such substrings, so to the complexity of matching is \(O(m + n^2)\) where m
is the length of candidate string.
This is a rough (and probably wrong) estimate as the updating the matchstate for each found match is not a constant time operation. Not sure if Aho Corasick is
the optimal way for this kind of matching but in practice it seems fast enough.
fuzzyMatcher :: CaseSensitivity -> Text -> Matcher MatchPart Source #
Unsized version of fuzzyMatcherSized
emptyMatcher :: MatcherSized 0 a Source #
orderlessMatcherSized :: KnownNat n => p n -> CaseSensitivity -> Text -> MatcherSized n Int Source #
Constructs the matcher for orderless matching, the needles are the words from the query string and the proxy argument should be instantiated at the number of words.
orderlessMatcher :: CaseSensitivity -> Text -> Matcher Int Source #
Unsized version of orderlessMatcherSized
fuzzyMatchSized :: KnownNat n => MatcherSized n MatchPart -> Text -> Maybe (MatchFull n) Source #
fuzzyMatchParts :: KnownNat n => MatcherSized n MatchPart -> Text -> Vector n Int -> [Text] Source #
fuzzyMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n MatchPart -> Text -> Vector n Int -> [a] Source #
orderlessMatchSized :: KnownNat n => MatcherSized n Int -> Text -> Maybe (MatchFull n) Source #
orderlessMatchParts :: KnownNat n => MatcherSized n Int -> Text -> Vector n Int -> [Text] Source #
orderlessMatchPartsAs :: KnownNat n => (Bool -> Text -> a) -> MatcherSized n Int -> Text -> Vector n Int -> [a] Source #
:: Either Int (Vector Int) | The information about the lengths of different needles. |
-> Text | The candidate string that has been matched |
-> Vector Int | The vector recording the positions of the needle in the matched string. |
-> [Text] | The candidate string split up according to the match |
The parts of a string resulting from a match using the fuzzy matcher.
partsOrderless :: Either Int (Vector Int) -> Text -> Vector Int -> [Text] Source #
The parts of a string resulting from a match using the orderless matcher. See parts for an explanation of arguments.
partsOrderlessAs :: (Bool -> Text -> a) -> Either Int (Vector Int) -> Text -> Vector Int -> [a] Source #
minify :: Either Int (Vector Int) -> Vector Int -> [CodeUnitIndex] Source #
Shorten a match by collapsing the contiguous sub-matches together.
fuzzySettings :: KnownNat n => Int -> SearchSettings (MatcherSized n MatchPart) n Source #
Search functions suitable for fuzzy matching. The candidate c
will match query s
if c
contains all the characters in s
in order. In general there
can be several ways of matching. This tries to find a match with minimum number of parts of. It does not find the minimum number of parts, if that requires
reducing the extent of the partial match during search. E.g. matching "as"
against "talash"
the split will be ["tal","as","h"]
and not
["t","a","la","s","h"]
. While matching "talash best match testing hat"
against "tea"
will not result in ["talash best match ","te","sting h","a","t"]
since
"te"
occurs only after we have match all three letters and we can't know if we will find the "a"
without going through the string.
orderlessSettings :: KnownNat n => Int -> SearchSettings (MatcherSized n Int) n Source #
Search functions that match the words in i.e. space separated substring in any order. "talash best"
will match "be as"
with the split
["tal","as","h","be","st"]
but "talash best"
will not match "bet"
.
data ChunkIndex Source #
Instances
data ScoredMatchSized (n :: Nat) Source #
ScoredMatchSized | |
|
Instances
emptyMatch :: Int -> Int -> ScoredMatchSized 0 Source #