Likely, performance wise, the best way to do this is with Python native string operations.
I would write like so:
lim=(('a',2),('b',3),('c',3),('d',1),('e',1))
results={}
for s in [list_of_many_strings]:
results[s]=bool(not(set(s)-set('abcde'))) and (not any(s.count(c)>x for c,x in lim))
This relies on str.count(sub[, start[, end]]) to count the occurrence of a sub string in a string and any function to test if any condition is true.
Since you are interested in performance, you can time how long processing 100,000 strings might take with timeit
:
import re
def f1(li):
results={}
lim=(('a',2),('b',3),('c',3),('d',1),('e',1))
for s in li:
results[s]=bool(not(set(s)-set('abcde'))) and (not any(s.count(c)>x for c,x in lim))
return results
def f2(li):
pat=re.compile(r'^a{0,2}b{0,3}c{0,3}d{0,1}e{0,1}$')
results={}
for s in li:
results[s]=True if pat.search(''.join(sorted(s))) else False
return results
def f3(li):
pat=re.compile(r'^(?!.*[^a-e])(?!(?:.*a){3})(?!(?:.*b){4})(?!(?:.*c){4})(?!(?:.*d){2})(?!(?:.*e){2}).+')
results={}
for s in li:
results[s]=True if pat.search(s) else False
return results
if __name__=='__main__':
import timeit
import random
s='abcdeabcdebc'
li=[''.join(random.sample(s,8)) for _ in range(100000)]
print(f1(li)==f2(li)==f3(li))
for f in (f1,f2,f3):
print(" {:^10s}{:.4f} secs".format(f.__name__, timeit.timeit("f(li)", setup="from __main__ import f, li", number=10)))
On my computer, takes:
True
f1 0.8519 secs
f2 1.1235 secs
f3 1.3070 secs