1、摘要
本文主要讲解:根据GPS数据,找出公交车乘客的上车站点经纬度。根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点名,将上车和下车站点对应起来,即用户od。
主要思路:
- 按照乘客刷卡对应的POS卡号,找到相对应的车辆定位和时间
- 使用二分法根据用户的刷卡时间找到最近的公交车定位时间,从而对应用户的上车坐标
- 根据乘客乘坐的线路去找到线路名称对应的站点和经纬度
- 按照乘客刷卡对应的POS卡号,找到相对应的线路名
- 根据用户所乘线路,找到离用户上车坐标最近的站点
- 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
- 使用时间匹配思路找出用户的下车站点,也可使用隐马尔可夫算法算出下车站点,具体请参考文末链接。
2、数据介绍
原始GPS数据如下:
最后的成果截图如下:
线路站点名如下:
上车站点及经纬度如下:
3、相关技术
二分法查找最近的时间思路如下:
(1)首先,从数组的中间元素开始搜索,如果该元素正好是目标元素,则搜索过程结束,否则执行下一步。
(2)如果目标元素大于/小于中间元素,则在数组大于/小于中间元素的那一半区域查找,然后重复步骤(1)的操作。
(3)如果某一步数组为空,则表示找不到目标元素。
def str_search(li, card_car_date_time):
start = 0
end = len(li) - 1
# 只要start和end 还没错开 就一直找
while start <= end:
# 通过计算获取当前查找范围的中间位置
mid = (start + end) // 2
geo_date_time = li[mid]
geo_time = datetime.strptime(geo_date_time, "%Y/%m/%d %H:%M:%S")
if end - start < 2:
return geo_date_time
# 如果mid比item大,说明item可能会出现在mid左边,对左边再查找
elif geo_time > card_car_date_time:
end = mid - 1
# mid 比item小,说明item有可能在mid右边,对右边再查找
else:
start = mid + 1
4、完整代码
主运行程序入口
找出用户的上车站点经纬度
bus_up_station.py
import os
from datetime import datetime
import pandas as pd
from my_utils.read_write import writeCsv
'''
此文件用于找出用户的上车站点经纬度
'''
os.chdir(r'D:\项目\公交数据处理\SQL\\')
def str_search(li, card_car_date_time):
start = 0
end = len(li) - 1
# 只要start和end 还没错开 就一直找
while start <= end:
# 通过计算获取当前查找范围的中间位置
mid = (start + end) // 2
geo_date_time = li[mid]
geo_time = datetime.strptime(geo_date_time, "%Y/%m/%d %H:%M:%S")
if end - start < 2:
return geo_date_time
# 如果mid比item大,说明item可能会出现在mid左边,对左边再查找
elif geo_time > card_car_date_time:
end = mid - 1
# mid 比item小,说明item有可能在mid右边,对右边再查找
else:
start = mid + 1
# 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
# card_terminal: user_id routeid plateid date time gpstime
# plate_geo: plateid x y speed gpstime
def compare_time_find_station(card_terminal, plate_geo):
up_station_list = []
gpstime = getattr(card_terminal, 'gpstime')
card_car_date_time = datetime.strptime(gpstime, "%Y/%m/%d %H:%M:%S")
time_list = plate_geo['gpstime']
time_list = time_list.sort_values()
time_list_copy = time_list.tolist()
geo_time = str_search(time_list_copy, card_car_date_time)
if geo_time:
one_plate_geo = plate_geo[plate_geo['gpstime'] == geo_time]
up_station_list.append(getattr(card_terminal, 'user_id'))
up_station_list.append(getattr(card_terminal, 'routeid'))
up_station_list.append(getattr(card_terminal, 'plateid'))
# 此处取的时公交车定位的时间,而不是刷卡的时间
up_station_list.append(getattr(card_terminal, 'gpstime'))
up_station_list.append(one_plate_geo.iat[0, 1])
up_station_list.append(one_plate_geo.iat[0, 2])
return up_station_list
def loop():
up_stations = []
for card_terminal in user_plate.itertuples():
plateid = getattr(card_terminal, 'plateid')
# 按照乘客刷卡对应的POS卡号,找到相对应的车辆定位和时间,
plate_geo = GPS[GPS['plateid'] == plateid]
# 根据用户的刷卡时间找到最近的公交车定位时间,从而对应用户的上车坐标
if not plate_geo.empty:
up_station = compare_time_find_station(card_terminal, plate_geo)
if up_station:
up_stations.append(up_station)
writeCsv(up_stations, '乘客上车经纬度.csv')
if __name__ == "__main__":
gps_Data = 'gps_Data.xlsx'
GPS = pd.read_excel(gps_Data, sheet_name='_GPS20190426')
user_plate = pd.read_excel(gps_Data, sheet_name='查询')
loop()
find_station_name.py
找到站点名
import os
import pandas as pd
from my_utils.calculateDistance import getDistance
from my_utils.read_write import writeOneCsv
'''
此文件用于找出用户的上车站点名
'''
os.chdir(r'D:\项目\公交数据处理\SQL\\')
# 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
# user_id,routeid,plateid,gpstime,x,y
def compare_geo_find_station(line_station_geo, up_station):
min_distance = 0.5
up_station_name = []
up_lon = getattr(up_station, 'x')
up_lat = getattr(up_station, 'y')
# routename stopname x y
for station_geo in line_station_geo.itertuples():
line_lon = getattr(station_geo, 'x')
line_lat = getattr(station_geo, 'y')
distance = getDistance(line_lon, line_lat, up_lon, up_lat)
if distance < min_distance:
min_distance = distance
up_station_name.clear()
up_station_name.append(getattr(up_station, 'user_id'))
up_station_name.append(getattr(up_station, 'routeid'))
up_station_name.append(getattr(up_station, 'plateid'))
up_station_name.append(getattr(up_station, 'gpstime'))
up_station_name.append(getattr(station_geo, 'routename'))
up_station_name.append(getattr(station_geo, 'stopname'))
up_station_name.append(line_lon)
up_station_name.append(line_lat)
up_station_name.append(distance)
del line_station_geo
return up_station_name
# 按照乘客刷卡对应的POS卡号,找到相对应的线路名
def find_line_stations(routeid):
line = str(routeid)[3:] + '路'
# 找到该线路的所有站点
line_stations = Route[Route['routename'] == line]
return line_stations
def loop():
for up_station in up_stations.itertuples():
routeid = getattr(up_station, 'routeid')
# 根据乘客乘坐的线路去找到线路名称对应的站点和经纬度
line_stations = find_line_stations(routeid)
if not line_stations.empty:
# 根据用户所乘线路,找到离用户上车坐标最近的站点
up_station_name = compare_geo_find_station(line_stations, up_station)
if up_station_name:
writeOneCsv(up_station_name, 'up_station_name.csv')
del up_station
if __name__ == "__main__":
gps_Data = 'gps_Data.xlsx'
# 获取公交线路数据经纬度数据
# routename stopname x y
Route = pd.read_excel(gps_Data, sheet_name='Route')
card_on_out = '乘客上车经纬度.csv'
up_stations = pd.read_csv(card_on_out, encoding='gbk', engine='python')
loop()
on_out_station.py
计算襄阳公交车OD
# -*- coding: utf-8
import os
from datetime import datetime
import pandas as pd
from my_utils.calculateDistance import getDistance
from my_utils.read_write import writeOneCsv
'''
此文件用于找出用户的上车下车站点名,即用户od
'''
os.chdir(r'D:\项目\公交数据处理\SQL\\')
def minNums(startTime, endTime):
'''计算两个时间点之间的分钟数'''
total_seconds = (endTime - startTime).total_seconds()
# 来获取准确的时间差,并将时间差转换为秒
mins = total_seconds / 60
return int(mins)
# user_id,routeid,plateid,gpstime,routename,stopname,x,y,distance
def on_out_bus(user_id, group):
length1_num = 0
group = group.sort_values('gpstime')
length = group.shape[0]
for index in range(0, length):
on_out_bus = []
on_out_bus.append(group.iat[index, 4])
on_out_bus.append(user_id)
up_time = group.iat[index, 3]
on_out_bus.append(group.iat[index, 1])
up_stat = group.iat[index, 5]
on_out_bus.append(up_stat)
line_lon = group.iat[index, 6]
line_lat = group.iat[index, 7]
on_out_bus.append(line_lon)
on_out_bus.append(line_lat)
on_out_bus.append(up_time)
# 默认设置下面这条为下车站点
if length > index + 1:
out_time = group.iat[index + 1, 3]
out = group.iat[index + 1, 5]
# 判断下车的时间是否大于上车的时间
if up_stat != out and out_time > up_time:
on_out_bus.append(out)
up_lon = group.iat[index + 1, 6]
up_lat = group.iat[index + 1, 7]
on_out_bus.append(up_lon)
on_out_bus.append(up_lat)
on_out_bus.append(out_time)
# 计算出行时间
waste_time = minNums(up_time, out_time)
on_out_bus.append(waste_time)
# 计算出行距离
distance = getDistance(float(line_lon), float(line_lat), float(up_lon), float(up_lat))
on_out_bus.append(round(distance, 4))
writeOneCsv(on_out_bus, 'up_down_stations.csv')
# 如果只有一个上车站点就无法探测出下车站点
elif length == 1:
length1_num = length1_num + 1
else:
# 如果是最后一条就链接到第一条
out_time = group.iat[0, 3]
out = group.iat[0, 5]
# 判断下车的时间是否大于上车的时间
if out != up_stat and out_time > up_time:
on_out_bus.append(out)
up_lon = group.iat[0, 6]
up_lat = group.iat[0, 7]
on_out_bus.append(up_lon)
on_out_bus.append(up_lat)
on_out_bus.append(out_time)
# 计算出行时间
waste_time = minNums(up_time, out_time)
on_out_bus.append(waste_time)
# 计算出行距离
distance = getDistance(float(line_lon), float(line_lat), float(up_lon), float(up_lat))
on_out_bus.append(round(distance, 4))
writeOneCsv(on_out_bus, 'up_down_stations.csv')
def get_bus_station():
for name, group in grouped_upstation:
on_out_bus(name, group)
if __name__ == '__main__':
up_station_name = 'up_station_name.csv'
up_station = pd.read_csv(up_station_name, engine='python', dtype='str', sep=',')
up_station['gpstime'] = up_station['gpstime'].map(lambda x: datetime.strptime(x, '%Y/%m/%d %H:%M:%S'))
up_station['date'] = up_station['gpstime'].map(lambda x: x.date())
dateGroups = up_station.groupby('date')
for date, group in dateGroups:
grouped_upstation = group.groupby("user_id")
get_bus_station()
5、参考链接
转载:https://blog.csdn.net/qq_30803353/article/details/117265965
查看评论