Files
addon/lib/fuzzy_match/match.py

78 lines
2.4 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import heapq
from . import algorithims
def extract(query, choices, match_type='trigram', score_cutoff=0, limit=5):
"""
Find the similarity between a query item and a list of choices.
Returns a tuple of all choices and their associated similarity score.
Arguments:
query: The string you are wanting to match.
choices: An iterable or dictionary-like object containing choices
to be matched against the query.
score_cutoff: Optional argument for score threshold. If the best
match is found, but it is not greater than this number, then
return None anyway ("not a good enough match"). Defaults to 0.
"""
try:
if match_type == 'trigram':
match_type = algorithims.trigram
elif match_type == 'levenshtein':
match_type = algorithims.levenshtein
elif match_type == 'cosine':
match_type = algorithims.cosine
elif match_type == 'jaro_winkler':
match_type = algorithims.jaro_winkler
try:
if choices is None or len(choices) == 0:
return
except TypeError:
pass
results = []
for i in choices:
score = (match_type(query, i))
data = (i, score)
if score >= score_cutoff:
results.append(data)
return heapq.nlargest(limit, results, key=lambda i: i[1]) if limit is not None else \
sorted(results, key=lambda i: i[1], reverse=True)
# return results
except:
return None
def extractOne(query, choices, match_type='trigram', score_cutoff=0):
"""
Finds the most similar item to query item from a list of choices.
Returns tuple of best choice and its associated similarity score.
Arguments:
query: The string you are wanting to match.
choices: An iterable or dictionary-like object containing choices
to be matched against the query.
score_cutoff: Optional argument for score threshold. If the best
match is found, but it is not greater than this number, then
return None anyway ("not a good enough match"). Defaults to 0.
"""
try:
best_list = extract(query, choices, match_type, score_cutoff)
best = max(best_list, key=lambda i: i[1])
return best
except:
return None