Your code makes no sense.
First: using x =...
you can't change value on list brands
. You need brands[index] = ...
Second: it needs nested for
-loop to compare x
with all other words in brands
for index, word in enumerate(brands):
for other in brands[index+1:]:
#print(word, other, fuzz.token_sort_ratio(word, other))
if fuzz.token_sort_ratio(word, other) > 85:
brands[index] = other
Minimal working code
import pandas as pd
import fuzzywuzzy.fuzz as fuzz
data = {'brands':
'''Cool
Awesome
cool
CoOl
Awesum
Awesome
Mathss
Math
Maths
Mathss'''.split('\n')
} # rows
df = pd.DataFrame(data)
print('--- before ---')
print(df)
brands = df['brands'].to_list()
print('--- changes ---')
for index, word in enumerate(brands):
#for other_index, other_word in enumerate(brands):
for other_index, other_word in enumerate(brands[index+1:], index+1):
#if word != other_word:
result = fuzz.token_sort_ratio(word, other_word)
if result > 85:
print(f'OK | {result:3} | {index:2} {word:7} -> {other_index:2} {other_word}')
elif result > 50:
print(f' | {result:3} | {index:2} {word:7} -> {other_index:2} {other_word}')
if result > 85:
brands[index] = other_word
#break
#word = other_word
df['brands'] = brands
print('--- after ---')
print(df)
Result:
--- before ---
brands
0 Cool
1 Awesome
2 cool
3 CoOl
4 Awesum
5 Awesome
6 Mathss
7 Math
8 Maths
9 Mathss
--- changes ---
OK | 100 | 0 Cool -> 2 cool
OK | 100 | 0 Cool -> 3 CoOl
| 77 | 1 Awesome -> 4 Awesum
OK | 100 | 1 Awesome -> 5 Awesome
OK | 100 | 2 cool -> 3 CoOl
| 77 | 4 Awesum -> 5 Awesome
| 80 | 6 Mathss -> 7 Math
OK | 91 | 6 Mathss -> 8 Maths
OK | 100 | 6 Mathss -> 9 Mathss
OK | 89 | 7 Math -> 8 Maths
| 80 | 7 Math -> 9 Mathss
OK | 91 | 8 Maths -> 9 Mathss
--- after ---
brands
0 CoOl
1 Awesome
2 CoOl
3 CoOl
4 Awesum
5 Awesome
6 Mathss
7 Maths
8 Mathss
9 Mathss
It doesn't change Awesum
into Awesome
because it gets 77
It doesn't change Math
into Mathss
because it gets 80
. But it gets 89
for Maths
.
If you use word = other_word
in for
-loop then it can convert Math
to Maths
(89
) and next this Maths
to Mathss
(91
). But this way it may change many times and finally it change to word which originally can gives value much smaller then 85
. Expected result you can get also for 75
instead of 85
.
But this method get last word with value >85
, not with the bigest value - so there can be better matching word and it will not use it. Using break it get first word with >85
. maybe it should get all words with >85
and choose word with biggest value. And it would have to skip words which are the same but in different row. But all this can make strange situations.
In comments in code I keep other ideas for modifications.
EDIT:
The same with >75
and with colors.

import pandas as pd
import fuzzywuzzy.fuzz as fuzz
from colorama import Fore as FG, Back as BG, Style as ST
data = {'brands':
'''Cool
Awesome
cool
CoOl
Awesum
Awesome
Mathss
Math
Maths
Mathss'''.split('\n')
} # rows
df = pd.DataFrame(data)
print('--- before ---')
print(df)
brands = df['brands'].to_list()
print('--- changes ---')
for index, word in enumerate(brands):
print('-', index, '-')
#for other_index, other_word in enumerate(brands):
for other_index, other_word in enumerate(brands[index+1:], index+1):
#if word != other_word:
result = fuzz.token_sort_ratio(word, other_word)
if result > 85:
color = ST.BRIGHT + FG.GREEN
info = 'OK'
elif result > 75:
color = ST.BRIGHT + FG.YELLOW
info = ' ?'
elif result > 50:
color = ST.BRIGHT + FG.WHITE
info = ' '
else:
color = ST.BRIGHT + FG.RED
info = ' -'
print(f'{color}{info} | {result:3} | {index:2} {word:7} -> {other_index:2} {other_word}{ST.RESET_ALL}')
if result > 75:
brands[index] = other_word
#break
#word = other_word
df['brands'] = brands
print('--- after ---')
print(df)