做用戶聚類,有一個feature涉及到面積計算,溫故一下之前瞭解到的凸包算法的流程,實際上,這篇文章基於的原文章,達到的精度,滿足我的需求。具體實現上代碼參考,因爲座標系不是笛卡爾座標系
先上筆者參考已有的實現稍作修改後作爲靜態工具類使用的代碼
# the class that seals the algorithm that uses convex hull to calculate the points-distributed-area
from math import *
class cal_area(object):
@staticmethod
def deg2rad(deg):
return deg * (pi / 180)
@staticmethod
def cross(A, B):
return A[0] * B[1] - A[1] * B[0]
@staticmethod
def vectorMinus(a, b):
return ((a[0] - b[0]) * 1000, (a[1] - b[1]) * 1000)
@staticmethod
def getLTDis(A, B):
lon1, lat1, lon2, lat2 = map(cal_area.deg2rad, [A[0], A[1], B[0], B[1]])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
c = 2 * asin(sqrt(a))
r = 6371.393
# print A,B
return c * r * 1.0 # 1000.0
@staticmethod
def triangleAre(A, B, C):
x, y, z = cal_area.getLTDis(A, B), cal_area.getLTDis(B, C), cal_area.getLTDis(C, A)
c = (x + y + z) / 2
return sqrt((c) * (c - y) * (c - z) * (c - x))
@staticmethod
def grahamScanArea(data):
data.sort(key=lambda x: (x[0], x[1]), reverse=False)
ans = [0] * (len(data) * 2)
m = 0
for item in data:
top = len(item)
while (m > 1 and cal_area.cross(cal_area.vectorMinus(ans[m - 1], ans[m - 2]),
cal_area.vectorMinus(item, ans[m - 2])) <= 0): m = m - 1
ans[m] = item
m = m + 1
k = m
flag = True
data.reverse()
for item in data:
if flag:
flag = False
continue
while (m > k and cal_area.cross(cal_area.vectorMinus(ans[m - 1], ans[m - 2]),
cal_area.vectorMinus(item, ans[m - 2])) <= 0): m = m - 1
ans[m] = item
m = m + 1
m = m - 1
b = [ans[i] for i in range(0, m)]
if len(b) < 3: return 0
# if DEBUG : print b
return cal_area.AREA(b)
@staticmethod
def AREA(b):
ans = 0.0
for i in range(len(b)):
if i == 0 or i + 1 >= len(b): continue
x, y = b[i], b[i + 1]
ans += cal_area.triangleAre(b[0], x, y)
return ans
## 使用方式
# calculate coverage of the business that customer has been to
# form the user_points table
user_visited_area=[]
for user_id in user_business.keys():
points=[]
visited_business=user_business[user_id]
for business_id in visited_business:
business_gps=business[business_id]
if business_gps['longitude']==None or business_gps['latitude']==None: continue
points.append([float(business_gps['longitude']),float(business_gps['latitude'])])
visited_area=cal_area.grahamScanArea(points)
user_visited_area.append(visited_area)
對於一個點集P來講,它的凸包就是一個凸多邊形Q,其中滿足P中的每個點都在Q的邊界上或內部。就像下圖所示
凸包的計算算法有好多種,wiki和算法導論第33章中都有比較詳細的介紹,比如下面是算法導論中給出的Graham-Scan算法計算凸包的僞代碼。
現在網上已經有了好多計算點集凸包的優秀代碼,比如這篇文章,作者在文中使用了一個動畫來表示了Graham-Scan算法計算凸包的過程,並給出了python程序的實現,十分有助於學習者對算法的理解。最近有個東西需要使用到凸包,本着“不要重複造輪子”的原則,我在開始的時候直接使用了作者文中的程序。開始使用的時候沒有發現什麼問題,比如下圖所示的效果。
但是,當我在使用其他的數據進行測試的時候,發現程序執行的效果並不太好,下圖是使用著名的Iris數據集的前兩個維度測試的效果,這明顯不是一個凸包。
沒有找到原因,於是開始自己寫程序,發現結果也是這樣的。後來將執行過程中棧的狀態打印輸出,才發現我們兩個的程序中都存在一個小問題。當點集中出現兩個完全相同的點的時候,算法就是失效了。上面僞代碼中序號8所示的while循環的判斷條件不應該是兩個向量的叉乘大於0, 而應該是大於等於0。否則的話,對於點集中出現連續兩個完全相同的點的時候,棧只能彈出其中一個點,而另一個點會仍然在棧中。所以會出現上圖所示的結果。修改之後的結果如下圖所示,這纔是一個正確的凸包。
具體的Python實現代碼如下
import matplotlib.pyplot as plt
import math
import sklearn.datasets as datasets
"""
使用Graham掃描法計算凸包
網上的代碼好多運行效果並不好
算法參見《算法導論》第三版 第605頁
"""
def get_bottom_point(points):
"""
返回points中縱座標最小的點的索引,如果有多個縱座標最小的點則返回其中橫座標最小的那個
:param points:
:return:
"""
min_index = 0
n = len(points)
for i in range(0, n):
if points[i][1] < points[min_index][1] or (points[i][1] == points[min_index][1] and points[i][0] < points[min_index][0]):
min_index = i
return min_index
def sort_polar_angle_cos(points, center_point):
"""
按照與中心點的極角進行排序,使用的是餘弦的方法
:param points: 需要排序的點
:param center_point: 中心點
:return:
"""
n = len(points)
cos_value = []
rank = []
norm_list = []
for i in range(0, n):
point_ = points[i]
point = [point_[0]-center_point[0], point_[1]-center_point[1]]
rank.append(i)
norm_value = math.sqrt(point[0]*point[0] + point[1]*point[1])
norm_list.append(norm_value)
if norm_value == 0:
cos_value.append(1)
else:
cos_value.append(point[0] / norm_value)
for i in range(0, n-1):
index = i + 1
while index > 0:
if cos_value[index] > cos_value[index-1] or (cos_value[index] == cos_value[index-1] and norm_list[index] > norm_list[index-1]):
temp = cos_value[index]
temp_rank = rank[index]
temp_norm = norm_list[index]
cos_value[index] = cos_value[index-1]
rank[index] = rank[index-1]
norm_list[index] = norm_list[index-1]
cos_value[index-1] = temp
rank[index-1] = temp_rank
norm_list[index-1] = temp_norm
index = index-1
else:
break
sorted_points = []
for i in rank:
sorted_points.append(points[i])
return sorted_points
def vector_angle(vector):
"""
返回一個向量與向量 [1, 0]之間的夾角, 這個夾角是指從[1, 0]沿逆時針方向旋轉多少度能到達這個向量
:param vector:
:return:
"""
norm_ = math.sqrt(vector[0]*vector[0] + vector[1]*vector[1])
if norm_ == 0:
return 0
angle = math.acos(vector[0]/norm_)
if vector[1] >= 0:
return angle
else:
return 2*math.pi - angle
def coss_multi(v1, v2):
"""
計算兩個向量的叉乘
:param v1:
:param v2:
:return:
"""
return v1[0]*v2[1] - v1[1]*v2[0]
def graham_scan(points):
# print("Graham掃描法計算凸包")
bottom_index = get_bottom_point(points)
bottom_point = points.pop(bottom_index)
sorted_points = sort_polar_angle_cos(points, bottom_point)
m = len(sorted_points)
if m < 2:
print("點的數量過少,無法構成凸包")
return
stack = []
stack.append(bottom_point)
stack.append(sorted_points[0])
stack.append(sorted_points[1])
for i in range(2, m):
length = len(stack)
top = stack[length-1]
next_top = stack[length-2]
v1 = [sorted_points[i][0]-next_top[0], sorted_points[i][1]-next_top[1]]
v2 = [top[0]-next_top[0], top[1]-next_top[1]]
while coss_multi(v1, v2) >= 0:
stack.pop()
length = len(stack)
top = stack[length-1]
next_top = stack[length-2]
v1 = [sorted_points[i][0] - next_top[0], sorted_points[i][1] - next_top[1]]
v2 = [top[0] - next_top[0], top[1] - next_top[1]]
stack.append(sorted_points[i])
return stack
def test1():
points = [[1.1, 3.6],
[2.1, 5.4],
[2.5, 1.8],
[3.3, 3.98],
[4.8, 6.2],
[4.3, 4.1],
[4.2, 2.4],
[5.9, 3.5],
[6.2, 5.3],
[6.1, 2.56],
[7.4, 3.7],
[7.1, 4.3],
[7, 4.1]]
for point in points:
plt.scatter(point[0], point[1], marker='o', c='y')
result = graham_scan(points)
length = len(result)
for i in range(0, length-1):
plt.plot([result[i][0], result[i+1][0]], [result[i][1], result[i+1][1]], c='r')
plt.plot([result[0][0], result[length-1][0]], [result[0][1], result[length-1][1]], c='r')
plt.show()
def test2():
"""
使用複雜一些的數據測試程序運行效果
:return:
"""
iris = datasets.load_iris()
data = iris.data
points_ = data[:, 0:2]
points__ = points_[0:50, :]
points = points__.tolist()
temp_index = 0
for point in points:
plt.scatter(point[0], point[1], marker='o', c='y')
index_str = str(temp_index)
plt.annotate(index_str, (point[0], point[1]))
temp_index = temp_index + 1
result = graham_scan(points)
print(result)
length = len(result)
for i in range(0, length-1):
plt.plot([result[i][0], result[i+1][0]], [result[i][1], result[i+1][1]], c='r')
plt.plot([result[0][0], result[length-1][0]], [result[0][1], result[length-1][1]], c='r')
# for i in range(0, len(rank)):
plt.show()
if __name__ == "__main__":
test2()
經緯度座標下利用凸包,近似計算面積的代碼參考
def cross(A,B):
return A[0] * B[1] - A[1] * B[0]
def vectorMinus( a , b):
return ( (a[0] - b[0] )*1000,(a[1] - b[1] )*1000)
def getLTDis( A, B ):
lon1, lat1, lon2, lat2 = map(radians, [A[0], A[1], B[0], B[1]])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371.393
#print A,B
return c * r * 1000.0
def triangleAre(A,B,C):
x,y,z = getLTDis(A,B),getLTDis(B,C),getLTDis(C,A)
c = (x + y + z) /2
return sqrt((c)*(c-y)*(c-z)*(c-x))
def grahamScanArea(data):
data.sort(key=lambda x:(x[0],x[1]),reverse=False)
ans = [ 0 ] * (len(data)*2)
m = 0
for item in data:
top = len(item)
while( m > 1 and cross( vectorMinus(ans[ m -1 ] , ans [ m - 2 ]), vectorMinus( item , ans [ m - 2 ] )) <= 0 ) : m = m -1
ans[m] = item
m = m + 1
k = m
flag = True
data.reverse()
for item in data:
if flag :
flag = False
continue
while( m > k and cross( vectorMinus(ans[ m -1 ] , ans [ m - 2 ]), vectorMinus( item , ans [ m - 2 ] )) <= 0) : m = m - 1
ans [m] = item
m = m + 1
m = m -1
b = [ ans[i] for i in range(0, m)]
if len(b) < 3 : return 0
#if DEBUG : print b
return AREA(b)
def AREA(b):
ans = 0.0
for i in range(len(b)):
if i == 0 or i + 1 >= len(b) : continue
x , y = b[i] , b[i + 1]
ans += triangleAre( b[0] , x , y )
return ans