00001 ## @package face_contour_detector.learing.k_nearest_neighbour 00002 # Implementation of the k-nearest-neighbour algorithm 00003 # @author Fabian Wenzelmann 00004 00005 import heapq 00006 import math 00007 00008 ## @brief Calculate the euclidean distance of two n-dimensional points. 00009 # 00010 # The euclidean distance for two points \f$p = (p_1, p_2, \dots, p_n)\f$ and \f$q = (q_1, q_2, \dots, q_n)\f$ is defined as follows: \f$ ED(p, q) = \sqrt{ \sum\limits_{i=1}^n (q_i - p_i)^2 } \f$ 00011 def euclidean_distance(p1, p2): 00012 distance_sum = 0 00013 for x, y in zip(p1, p2): 00014 distance_sum += (x - y)**2 00015 return math.sqrt(distance_sum) 00016 00017 ## @brief Find for <i>point</i> it's k nearest neighours in the iterable object <i>points</i> 00018 # @param point the reference points which near neighbours you want to find 00019 # @param points iterable object of other points (distance to point is calculated for each point in this "set") 00020 # @param k defines how many neighbours are found in the result list (if the length of the list is smaller than k a list of <i>points</i> is returned) 00021 # @param key the distance function is not called on point and a point in points directly. The key function is called on point and the result is passed to the distance function 00022 # @param key2 similar to key but this function is applied to each point in the points "set". Default is None which means that key is used. The advantage of having to key functions is that we can apply different functions on point and all other points. 00023 # @param distance a distance function that defines a float distance between two points. The distance function is applied to key(point) and key2(p) where <i>p</i> is a point form <i>points</i>. Default is euclidean_distance 00024 # @return List with the k nearest neighbours of <i>point</i> in <i>points</i> 00025 # <i>point</i> is the reference point the distance is calculated form. <i>points</i> is the iterable object with all other points. Calculates a 00026 # list containing the k-nearest neighbours of <i>point</i> in <i>points</i>. If <code>len(points) < k</code> the list has length 00027 # <code>len(points)</code>. 00028 def k_nearest_neighbours(point, points, k, key=lambda x: x, key2=None, distance=euclidean_distance): 00029 if key2 is None: 00030 key2 = key 00031 h = [] 00032 for p in points: 00033 heapq.heappush(h, (distance(key(point), key2(p)), p)) 00034 size = len(h) 00035 return [ heapq.heappop(h)[1] for i in range(min(k, size)) ]