python模糊字符串匹配工具FuzzyWuzzy
作者:YXN-python 阅读量:1 发布日期:2024-11-21
安装:
pip install fuzzywuzzy
1.相似度分析:
from fuzzywuzzy import fuzz
# 中文字符串比较
str1 = "我爱北京天安门"
str2 = "我喜爱北京天安门广场"
# 计算相似度
similarity_score = fuzz.ratio(str1, str2)
print(f"相似度分数: {similarity_score}")
#输出:相似度分数: 82
2.拼写检查
from fuzzywuzzy import fuzz
correct_words = ["长城", "故宫", "上海"]
user_input = "商海"
best_match = max(correct_words, key=lambda word: fuzz.ratio(user_input, word))
print(f"推荐的正确词汇: {best_match}")
#输出:推荐的正确词汇: 上海
3.提示词
from fuzzywuzzy import process
search_term = "故宫"
candidates = ["故宫博物院", "故宫的历史", "天安门", "故宫的建筑"]
best_match = process.extract(search_term, candidates)
print(f"建议: {best_match}")
#输出 建议: [('故宫博物院', 90), ('故宫的历史', 90), ('故宫的建筑', 90), ('天安门', 0)]
4.数据清洗
from fuzzywuzzy import process
data = ["apple", "aple", "banana", "bananna","banana"]
cleaned_data = list(set(process.dedupe(data)))
print(cleaned_data)
#输出 ['bananna', 'apple']
YXN-python
2024-11-21