The Python way:
from dataclasses import dataclass, field
from itertools import product
from random import randint
from typing import Dict, List
@dataclass
class PriceComparison():
rows : int
cols : int
maxprice : int = 50
threshold : int = 0
itemcodes : List[List[str]] = field(init=False)
pricelist : Dict[str, int] = field(init=False)
def __post_init__(self):
##create sample data
self.itemcodes = [[f'A{r+self.cols*c:03d}' for c in range(self.rows)] for r in range(self.cols)]
print(self.itemcodes)
self.pricelist = {self.itemcodes[c][r]:randint(0,self.maxprice) for r in range(self.rows) for c in range(self.cols)}
##remove items with price = 0
for col in self.itemcodes:
for item in col[:]:
if self.pricelist[item] == 0:
print(f'removing {item} from {col}')
col.remove(item)
del self.pricelist[item]
def find_cheapest(self):
iterations = 1
for col in self.itemcodes:
iterations *= len(col)
print(f'this may require {iterations} iterations!')
cheapest = self.maxprice * self.cols + 1
for i, combo in enumerate(product(*self.itemcodes)):
##dummy price calculation
price = sum([self.pricelist[item] for item in combo]) * randint(1,10) // 10
if price < cheapest:
print(f'current cheapest is {price} at iteration {i}')
cheapest = price
if price < self.threshold:
print('under threshold: returning')
break
return cheapest
Some notes:
- I assume the cheapest combo is not simply given by selecting the cheapest item in each column, otherwise we would not need all this complicated machinery; so I inserted a random coefficient while calculating the total price of a combo - this should be replaced with the actual formula
- I also assume we have item codes in our input table, with prices for each item stored elsewhere. As sample data I create codes from 'A000' to 'Axxx', and assign a random price between 0 and a
maxprice
to each one
- Items with price = 0 are removed immediately, before the search for the cheapest combo
- For large input tables the search will take a very long time. So although it wasn't requested I also added an optional
threshold
parameter: if we find a total price under that value we consider it is cheap enough and stop the search
EDIT
The following is a Python 3.5 compatible version.
However it must be noted that with a 10x15 input table the number of required iterations will be somewhere near 1E+15 (something less actually, depending on how many cells we are able to ignore as "obvious outliers"). Even if we check 1 million combos per second it will still run for (something less than) 1E+09 seconds, or about 32 years.
So we need a way to improve our strategy. I integrated two options:
- Setting a threshold, so that we don't search for the actual best price but stop as soon as we find an "acceptable" one
- Splitting the tables in "zones" (subsets of columns), looking for the best partial solution for each zone and then combining them.
Sample runs:
##10 x 15, 5 zones, each 3 columns wide
this may require up to 1.000000e+03 iterations!
...
current best price is 1 at iteration 71 in 0.06 secs
this may require up to 1.000000e+03 iterations!
...
current best price is 2 at iteration 291 in 0.11 secs
this may require up to 1.000000e+03 iterations!
...
current best price is 1 at iteration 330 in 0.07 secs
this may require up to 8.100000e+02 iterations!
...
current best price is 4 at iteration 34 in 0.09 secs
this may require up to 1.000000e+03 iterations!
...
current best price is 1 at iteration 82 in 0.07 secs
['A000', 'A106', 'A017', 'A033', 'A139', 'A020', 'A051', 'A052', 'A008', 'A009', 'A055', 'A131', 'A147', 'A133', 'A044']
##10 x 15, no zones, threshold = 25
this may require up to 8.100000e+14 iterations!
...
current best price is 24 at iteration 267493282 in 1033.24 secs
under threshold: returning
['A000', 'A001', 'A002', 'A003', 'A004', 'A005', 'A051', 'A052', 'A008', 'A039', 'A055', 'A071', 'A042', 'A133', 'A044']
Code follows:
from itertools import product
from random import randint
from time import time
class PriceComparison():
def __init__(self, rows, cols, zones = [], maxprice = 50, threshold = 0):
self.rows = rows
self.cols = cols
if zones == []:
self.zones = [cols]
else:
self.zones = zones
self.maxprice = maxprice
self.threshold = threshold
self.__post_init__()
def __post_init__(self):
##create sample data
self.itemcodes = [['A%03d' % (r+self.cols*c) for c in range(self.rows)] for r in range(self.cols)]
print(self.itemcodes)
self.pricelist = {self.itemcodes[c][r]:randint(0,self.maxprice) for r in range(self.rows) for c in range(self.cols)}
##remove items with price = 0
for col in self.itemcodes:
for item in col[:]:
if self.pricelist[item] == 0:
print('removing %s from %s' % (item, col))
col.remove(item)
del self.pricelist[item]
def find_cheapest(self, lo, hi):
iterations = 1
for col in self.itemcodes[lo:hi]:
iterations *= len(col)
start = time()
print('\nthis may require up to %e iterations!' % (iterations))
bestprice = self.maxprice * self.cols + 1
for i, combo in enumerate(product(*self.itemcodes[lo:hi])):
##dummy price calculation
price = sum([self.pricelist[item] for item in combo]) * randint(1,10) // 10
if price < bestprice:
elapsed = time() - start
print('current best price is %d at iteration %d in %.2f secs' % (price, i, elapsed))
cheapest = combo
bestprice = price
if price < self.threshold:
print('under threshold: returning')
break
return cheapest
def find_by_zones(self):
print(self.zones)
fullcombo = []
lo = 0
for zone in self.zones:
hi = lo + zone
fullcombo += self.find_cheapest(lo, hi)
lo = hi
return fullcombo