小言_互联网的博客

联通手机信令大数据的处理分析与可视化

508人阅读  评论(0)

我有联通的2020年扩样后的具体迁徙人数数据,包括所有城市
如果需要的话请到我其他文章找到我的qq

数据处理代码:

import pandas as pd
import os
from utils.read_write import eachFile, pdReadCsv

'''
每个社区到达商圈的平均人口数, #3代表节假日#2代表周末 #1代表工作日
    * START_GRID_ID	起始网格编号	string
    * START_CITY	起始城市	string
    * END_GRID_ID	到达网格编号	string
    * END_CITY	到达城市	string
    * date	日期	string
    * START_TYPE	起始人口类型	string	01-到访 02-居住 03-工作 05职住重合 
    * END_TYPE	到达人口类型	string	01-到访 02-居住 03-工作
    * POP	人数	int
    * times 次数
'''


# def test():
#     filepath = os.path.join(root+'000054_0_weekend.txt')
#     data = pd.read_csv(filepath, sep='|', usecols=[0, 2, 4, 7], error_bad_lines=False, engine='python')
#     column = ['START_GRID_ID', 'END_GRID_ID', 'date', 'pop']
#     data.columns = column
#     # data = data[data['date'].isin([20191013])]
#     workFromCom = pd.merge(data, community, left_on='START_GRID_ID', right_on='YGA_Grid_1', how='right')
#     workFromComToMall = pd.merge(workFromCom, mall, left_on='END_GRID_ID', right_on='YGA_Grid_1', how='right')
#     workGroup = workFromComToMall.groupby(['SQCODE', 'mall_name']).agg({'pop': sum})
#     csv = workGroup['pop'].apply(lambda x: int(x / 5))
#     csv.to_csv(filepath + 'holidayFromCommunityToMall.csv', mode='a')


def read_file(dirpath):
    filepath = os.path.join(dirpath)
    print(dirpath)
    data = pd.read_csv(filepath, sep='|', usecols=[0, 2, 4, 7], error_bad_lines=False, engine='python')
    column = ['START_GRID_ID', 'END_GRID_ID', 'date', 'pop']
    data.columns = column

    weekend = data[data['date'] == 20191013]
    workFromCom = pd.merge(weekend, community, left_on='START_GRID_ID', right_on='YGA_Grid_1', how='right')
    workFromComToMall = pd.merge(workFromCom, mall, left_on='END_GRID_ID', right_on='YGA_Grid_1', how='right')
    workGroup = workFromComToMall.groupby([ 'SQCODE', 'mall_name']).agg({
   'pop': sum})
    csv = workGroup['pop'].apply(lambda x: int(x / 5))
    csv.to_csv(save + 'weekendFromCommunityToMall.csv', mode='a',header=False,index=True)

    holiday = data[data['date'] < 20191008]
    workFromCom = pd.merge(holiday, community, left_on='START_GRID_ID', right_on='YGA_Grid_1', how='right')
    workFromComToMall = pd.merge(workFromCom, mall, left_on='END_GRID_ID', right_on='YGA_Grid_1', how='right')
    workGroup = workFromComToMall.groupby([ 'SQCODE', 'mall_name']).agg({
   'pop': sum})
    csv = workGroup['pop'].apply(lambda x: int(x / 5))
    csv.to_csv(save + 'holidayFromCommunityToMall.csv', mode='a',header=False,index=True)

    work = data[(data['date'] > 20191007) & (data['date'] != 20191013)]
    workFromCom = pd.merge(work, community, left_on='START_GRID_ID', right_on='YGA_Grid_1', how='right')
    workFromComToMall = pd.merge(workFromCom, mall, left_on='END_GRID_ID', right_on='YGA_Grid_1', how='right')
    workGroup = workFromComToMall.groupby([ 'SQCODE', 'mall_name']).agg({
   'pop': sum})
    csv = workGroup['pop'].apply(lambda x: int(x / 5))
    csv.to_csv(save + 'workFromCommunityToMall.csv', mode='a',header=False,index=True)

def groupby():
    src = 'D:\学习文件\项目文件\规土委\data\od\save\save\\'

    data = pd.read_csv(src+'workFromCommunityToMall.csv',sep=',',names=['SQCODE','mall_name','pop'])
    group = data.groupby(['SQCODE','mall_name']).agg({
   'pop':sum})
    csv = group['pop'].apply(lambda x: int(x / 6))
    csv.to_csv(src+'workCommunityToMall'+'.csv',header=True)

    data = pd.read_csv(src+'holidayFromCommunityToMall.csv',sep=',',names=['SQCODE','mall_name','pop'])
    group = data.groupby(['SQCODE','mall_name']).agg({
   'pop':sum})
    csv = group['pop'].apply(lambda x: int(x / 7))
    csv.to_csv(src+'holidayCommunityToMall'+'.csv',header=True)

    data = pd.read_csv(src+'weekendFromCommunityToMall.csv',sep=',',names=['SQCODE','mall_name','pop'])
    group = data.groupby(['SQCODE','mall_name']).agg({
   'pop':sum})
    group.to_csv(src+'weekendCommunityToMall'+'.csv',header=True)

if __name__ == '__main__':
    groupby()
    root = 'D:\学习文件\项目文件\规土委\data\od\other\\'
    save = 'D:\学习文件\项目文件\规土委\data\od\comTomall\\'
    grid = 'D:\学习文件\项目文件\规土委\data\od\YGA\\'
    community_file = 'com_grid.txt'
    community = pdReadCsv(grid + community_file, sep=',')
    mall = pd.read_csv(grid + 'mall_grid.txt', sep=',', dtype=str)
    # test()
    for dir in eachFile(root):
        # read_file(root + '000054_0_unholiday')
        read_file(root + dir)


转载:https://blog.csdn.net/qq_30803353/article/details/111612516
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场