78 lines
2.4 KiB
Python
Executable File
78 lines
2.4 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
import heapq
|
|
from . import algorithims
|
|
|
|
|
|
|
|
def extract(query, choices, match_type='trigram', score_cutoff=0, limit=5):
|
|
"""
|
|
Find the similarity between a query item and a list of choices.
|
|
Returns a tuple of all choices and their associated similarity score.
|
|
|
|
Arguments:
|
|
query: The string you are wanting to match.
|
|
choices: An iterable or dictionary-like object containing choices
|
|
to be matched against the query.
|
|
score_cutoff: Optional argument for score threshold. If the best
|
|
match is found, but it is not greater than this number, then
|
|
return None anyway ("not a good enough match"). Defaults to 0.
|
|
|
|
"""
|
|
try:
|
|
if match_type == 'trigram':
|
|
match_type = algorithims.trigram
|
|
elif match_type == 'levenshtein':
|
|
match_type = algorithims.levenshtein
|
|
elif match_type == 'cosine':
|
|
match_type = algorithims.cosine
|
|
elif match_type == 'jaro_winkler':
|
|
match_type = algorithims.jaro_winkler
|
|
try:
|
|
if choices is None or len(choices) == 0:
|
|
return
|
|
except TypeError:
|
|
pass
|
|
|
|
results = []
|
|
|
|
for i in choices:
|
|
score = (match_type(query, i))
|
|
data = (i, score)
|
|
if score >= score_cutoff:
|
|
results.append(data)
|
|
|
|
|
|
return heapq.nlargest(limit, results, key=lambda i: i[1]) if limit is not None else \
|
|
sorted(results, key=lambda i: i[1], reverse=True)
|
|
|
|
# return results
|
|
|
|
except:
|
|
return None
|
|
|
|
|
|
def extractOne(query, choices, match_type='trigram', score_cutoff=0):
|
|
"""
|
|
Finds the most similar item to query item from a list of choices.
|
|
Returns tuple of best choice and its associated similarity score.
|
|
|
|
Arguments:
|
|
query: The string you are wanting to match.
|
|
choices: An iterable or dictionary-like object containing choices
|
|
to be matched against the query.
|
|
score_cutoff: Optional argument for score threshold. If the best
|
|
match is found, but it is not greater than this number, then
|
|
return None anyway ("not a good enough match"). Defaults to 0.
|
|
|
|
"""
|
|
|
|
try:
|
|
best_list = extract(query, choices, match_type, score_cutoff)
|
|
|
|
best = max(best_list, key=lambda i: i[1])
|
|
|
|
return best
|
|
|
|
except:
|
|
return None |