高速なプログラムを書くコツは「自分で書かないこと」です
getNearestDistance なら
たとえば sklearn の neighbors あたりに高速なモジュールが実装されています
これを使えば3行で実装できます
def getNearestDistance(x, y, paste_list):
tree = sklearn.neighbors.KDTree(paste_list, metric='minkowski', p=1)
dist, ind = tree.query([[x,y]])
return dist[0][0]
実際に動くコードはこんな感じです(jypyter-noteboolのIpython 上で動かしています)
import numpy as np
import sklearn
def getNearestDistance_pyopyopyo(x, y, paste_list):
tree = sklearn.neighbors.KDTree(paste_list, metric='minkowski', p=1)
dist, ind = tree.query([[x,y]])
return dist[0][0]
def getNearestDistance_original(x, y, paste_list):
min_distance = 9999
if True:
for p in paste_list:
distance = np.abs(p[0] - x) + abs(p[1] - y)
if min_distance > distance:
min_distance = distance
else:
all_distance = [abs(p[0] - x) + abs(p[1] - y) for p in paste_list]
if len(all_distance) > 0:
min_distance = min(all_distance)
return min_distance
N = 100000
rng = np.random.RandomState(0)
paste_list = rng.random_sample((N, 2))
paste_list *= 2000
paste_list.astype(np.int)
print("original:", getNearestDistance_original(10, 2, paste_list) )
print("pyopyopyo:", getNearestDistance_pyopyopyo(10, 2, paste_list) )
%timeit getNearestDistance_original(10, 2, paste_list)
%timeit getNearestDistance_pyopyopyo(10, 2, paste_list)
(時間計測にはipythonの%%timeitを使っています)
paste_list が毎回同じ場合は,さらに高速化できます
まず
tree = sklearn.neighbors.KDTree(paste_list, metric='minkowski', p=1)
を一回だけじっこうして
あとは
def getNearestDistance(x, y, tree):
dist, ind = tree.query([[x,y]])
return dist[0][0]
となるようにします.これで劇的に早くなります
実際のコードはこちら
import numpy as np
import sklearn
def getNearestDistance_pyopyopyo_v2(x, y, tree):
dist, ind = tree.query([[x,y]])
return dist[0][0]
def getNearestDistance_original(x, y, paste_list):
min_distance = 9999
if True:
for p in paste_list:
distance = np.abs(p[0] - x) + abs(p[1] - y)
if min_distance > distance:
min_distance = distance
else:
all_distance = [abs(p[0] - x) + abs(p[1] - y) for p in paste_list]
if len(all_distance) > 0:
min_distance = min(all_distance)
return min_distance
N = 100000
rng = np.random.RandomState(0)
paste_list = rng.random_sample((N, 2))
paste_list *= 2000
paste_list.astype(np.int)
def original(loop):
for i in range(0,loop):
x=i+2
y=1000-i
getNearestDistance_original(x, y, paste_list)
def pyopyopyo(loop):
tree = sklearn.neighbors.KDTree(paste_list, metric='minkowski', p=1)
for i in range(0,loop):
x=i+2
y=1000-i
getNearestDistance_pyopyopyo_v2(x, y, tree)
loop=10
%timeit original(loop)
%timeit pyopyopyo(loop)
私の手元のマシンだとこれで
1.9 s ± 7.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
35.5 ms ± 6.88 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
つまりもともとが1.9秒で,それが35ミリ秒まで高速化できました