飞道的博客

PyTorch搭建基于图神经网络(GCN)的天气推荐系统(附源码和数据集)

409人阅读  评论(0)

需要源码和数据集请点赞关注收藏后评论区留言~~~

一、背景

极端天气情况一直困扰着人们的工作和生活。部分企业或者工种对极端天气的要求不同,但是目前主流的天气推荐系统是直接将天气信息推送给全部用户。这意味着重要的天气信息在用户手上得不到筛选,降低用户的满意度,甚至导致用户的经济损失。我们计划开发一个基于图神经网络的天气靶向模型,根据用户的历史交互行为,判断不同天气对他的利害程度。如果有必要,则将该极端天气情况推送给该用户,让其有时间做好应对准备。该模型能够减少不必要的信息传递,提高用户的体验感。

二、模型介绍

四、模型介绍

(一)数据集共有三个txt文件,分别是user.txt,weather.txt,rating.txt。这些文件一共包含900名用户,1600个天气状况,95964条用户的历史交互记录。

  1. user.txt

用户的信息记录在user.txt中。格式如下:

用户ID\t年龄\t性别\t职业\t地理位置

  1. weather.txt

天气的信息记录在weather.txt中。格式如下:

天气ID\t天气类型\t温度\t湿度\t风速 

  1. rating.txt

用户的历史交互记录在rating.txt中。格式如下:

用户ID\t天气ID\t评分

三、项目结构

如下图 data里面存放了数据集

四、运行结果

开始训练  可以看到第一行显示了一些训练的基本配置内容 包括用的设备cpu 训练批次 学习率等等

 可以看出随着训练次数的增加 损失率在不断降低

最后会自动选出一个最佳的测试和训练集的损失值

 结果可视化如下

 五、代码

部分源码如下

train类


  
  1. import pandas as pd
  2. import time
  3. from utils import fix_seed_torch, draw_loss_pic
  4. import argparse
  5. from model import GCN
  6. from Logger import Logger
  7. from mydataset import MyDataset
  8. import torch
  9. from torch.nn import MSELoss
  10. from torch.optim import Adam
  11. from torch.utils.data import DataLoader, random_split
  12. import sys
  13. import os
  14. os.environ[ 'KMP_DUPLICATE_LIB_OK']= 'TRUE'
  15. # 固定随机数种子
  16. fix_seed_torch(seed= 2021)
  17. # 设置训练的超参数
  18. parser = argparse. ArgumentParser()
  19. parser. add_argument( '--gcn_layers', type= int, default= 2, help= 'the number of gcn layers')
  20. parser. add_argument( '--n_epochs', type= int, default= 20, help= 'the number of epochs')
  21. parser. add_argument( '--embedSize', type= int, default= 64, help= 'dimension of user and entity embeddings')
  22. parser. add_argument( '--batch_size', type= int, default= 1024, help= 'batch size')
  23. parser. add_argument( '--lr', type= float, default= 0.001, help= 'learning rate')
  24. parser. add_argument( '--ratio', type= float, default= 0.8, help= 'size of training dataset')
  25. args = parser. parse_args()
  26. # 设备是否支持cuda
  27. device = torch. device( "cuda") if torch.cuda. is_available() else torch. device( "cpu")
  28. args.device = device
  29. # 读取用户特征、天气特征、评分
  30. user_feature = pd. read_csv( './data/user.txt', encoding= 'utf-8', sep= '\t')
  31. item_feature = pd. read_csv( './data/weather.txt', encoding= 'utf-8', sep= '\t')
  32. rating = pd. read_csv( './data/rating.txt', encoding= 'utf-8', sep= '\t')
  33. # 构建数据集
  34. dataset = MyDataset(rating)
  35. trainLen = int(args.ratio * len(dataset))
  36. train, test = random_split(dataset, [trainLen, len(dataset) - trainLen])
  37. train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True, pin_memory=True)
  38. test_loader = DataLoader(test, batch_size= len(test))
  39. # 记录训练的超参数
  40. start_time = '{}'. format(time. strftime( "%m-%d-%H-%M", time. localtime()))
  41. logger = Logger( './log/log-{}.txt'. format(start_time))
  42. logger. info( ' '. join( '%s: %s' % (k, str(v)) for k, v in sorted( dict( vars(args)). items())))
  43. # 定义模型
  44. model = GCN(args, user_feature, item_feature, rating)
  45. model. to(device)
  46. # 定义优化器
  47. optimizer = Adam(model. parameters(), lr=args.lr, weight_decay= 0.001)
  48. # 定义损失函数
  49. loss_function = MSELoss()
  50. train_result = []
  51. test_result = []
  52. # 最好的epoch
  53. best_loss = sys.float_info.max
  54. # 训练
  55. for i in range(args.n_epochs):
  56. model. train()
  57. for batch in train_loader:
  58. optimizer. zero_grad()
  59. prediction= model(batch[ 0]. to(device),batch[ 1]. to(device))
  60. train_loss=torch. sqrt( loss_function(batch[ 2]. float(). to(device),prediction))
  61. train_loss. backward()
  62. optimizer. step()
  63. train_result. append(train_loss. item())
  64. model. eval()
  65. for data in test_loader:
  66. prediction= model(data[ 0]. to(device),data[ 1]. to(device))
  67. test_loss=torch. sqrt( loss_function(data[ 2]. float(). to(device),prediction))
  68. test_loss=test_loss. item()
  69. if best_loss>test_loss:
  70. best_loss=test_loss
  71. torch. save(model. state_dict(), './model/bestModeParms-{}.pth'. format(start_time))
  72. test_result. append(test_loss)
  73. logger. info( "Epoch{:d}:trainLoss{:.4f},testLoss{:.4f}". format(i,train_loss,test_loss))
  74. else:
  75. model. load_state_dict(torch. load( "./model/bestModeParms-11-18-19-47.pth"))
  76. user_id= input( "请输入用户id")
  77. item_num=rating[ 'itemId']. max()+ 1
  78. u=torch. tensor([ int(user_id) for i in range(item_num)],dtype= float)
  79. 气ID ".format(user_id))
  80. print(i[0]for i in result)
  81. # 画图
  82. draw_loss_pic(train_result, test_result)

Logger类


  
  1. import sys
  2. import os
  3. import logging
  4. class Logger(object):
  5. def __init__(self, filename):
  6. self.logger = logging.getLogger(filename)
  7. self.logger.setLevel(logging.DEBUG)
  8. formatter = logging.Formatter('%(asctime)s: %(message)s',
  9. datefmt='%Y-%m-%d %H-%M-%S')
  10. # write into file
  11. fh = logging.FileHandler(filename)
  12. fh.setLevel(logging.DEBUG)
  13. fh.setFormatter(formatter)
  14. # show on console
  15. ch = logging.StreamHandler(sys.stdout)
  16. ch.setLevel(logging.DEBUG)
  17. ch.setFormatter(formatter)
  18. # add to Handler
  19. self.logger.addHandler(fh)
  20. self.logger.addHandler(ch)
  21. def _flush(self):
  22. for handler in self.logger.handlers:
  23. handler.flush()
  24. def info(self, message):
  25. self.logger.info(message)
  26. self._flush()

model类


  
  1. import numpy as np
  2. import torch.nn
  3. import torch.nn as nn
  4. from utils import *
  5. from torch.nn import Module
  6. import scipy.sparse as sp
  7. class GCN_Layer( Module):
  8. def __init__( self,inF,outF):
  9. super(GCN_Layer,self).__init__()
  10. self.W1=torch.nn.Linear(in_features=inF,out_features=outF)
  11. self.W2=torch.nn.Linear(in_features=inF,out_features=outF)
  12. def forward( self,graph,selfLoop,features):
  13. part1=self.W1(torch.sparse.mm(graph+selfLoop,features))
  14. part2 = self.W2(torch.mul(torch.sparse.mm(graph,features),features))
  15. return nn.LeakyReLU()(part1+part2)
  16. ######################
  17. # 请你补充代码 #
  18. ######################
  19. class GCN( Module):
  20. def __init__( self, args, user_feature, item_feature, rating):
  21. super(GCN, self).__init__()
  22. self.args = args
  23. self.device = args.device
  24. self.user_feature = user_feature
  25. self.item_feature = item_feature
  26. self.rating = rating
  27. self.num_user = rating[ 'user_id']. max() + 1
  28. self.num_item = rating[ 'item_id']. max() + 1
  29. # user embedding
  30. self.user_id_embedding = nn.Embedding(user_feature[ 'id']. max() + 1, 32)
  31. self.user_age_embedding = nn.Embedding(user_feature[ 'age']. max() + 1, 4)
  32. self.user_gender_embedding = nn.Embedding(user_feature[ 'gender']. max() + 1, 2)
  33. self.user_occupation_embedding = nn.Embedding(user_feature[ 'occupation']. max() + 1, 8)
  34. self.user_location_embedding = nn.Embedding(user_feature[ 'location']. max() + 1, 18)
  35. # item embedding
  36. self.item_id_embedding = nn.Embedding(item_feature[ 'id']. max() + 1, 32)
  37. self.item_type_embedding = nn.Embedding(item_feature[ 'type']. max() + 1, 8)
  38. self.item_temperature_embedding = nn.Embedding(item_feature[ 'temperature']. max() + 1, 8)
  39. self.item_humidity_embedding = nn.Embedding(item_feature[ 'humidity']. max() + 1, 8)
  40. self.item_windSpeed_embedding = nn.Embedding(item_feature[ 'windSpeed']. max() + 1, 8)
  41. # 自循环
  42. self.selfLoop = self.getSelfLoop(self.num_user + self.num_item)
  43. # 堆叠GCN层
  44. self.GCN_Layers = torch.nn.ModuleList()
  45. for _ in range(self.args.gcn_layers):
  46. self.GCN_Layers.append(GCN_Layer(self.args.embedSize, self.args.embedSize))
  47. self.graph = self.buildGraph()
  48. self.transForm = nn.Linear(in_features=self.args.embedSize * (self.args.gcn_layers + 1),
  49. out_features=self.args.embedSize)
  50. def getSelfLoop( self, num):
  51. i = torch.LongTensor(
  52. [[k for k in range( 0, num)], [j for j in range( 0, num)]])
  53. val = torch.FloatTensor([ 1] * num)
  54. return torch.sparse.FloatTensor(i, val).to(self.device)
  55. def buildGraph( self):
  56. rating=self.rating.values
  57. graph=sp.coo_matrix(
  58. (rating[:, 2],(rating[:, 0],rating[:, 1])),shape=(self.num_user,self.num_item)).tocsr()
  59. graph=sp.bmat([[sp.csr_matrix((graph.shape[ 0],graph.shape[ 0])),graph],
  60. [graph.T,sp.csr_matrix((graph.shape[ 1],graph.shape[ 1]))]])
  61. row_sum_sqrt=sp.diags( 1/(np.sqrt(graph. sum(axis= 1).A.ravel())+ 1e-8))
  62. col_sum_sqrt = sp.diags( 1 / (np.sqrt(graph. sum(axis= 0).A.ravel()) + 1e-8))
  63. graph=row_sum_sqrt@graph@col_sum_sqrt
  64. graph=graph.tocoo()
  65. values=graph.data
  66. indices=np.vstack((graph.row,graph.col))
  67. graph=torch.sparse.FloatTensor(torch.LongTensor(indices),torch.FloatTensor(values),torch.Size(graph.shape))
  68. return graph.to(self.device)
  69. ######################
  70. # 请你补充代码 #
  71. ######################
  72. def getFeature( self):
  73. # 根据用户特征获取对应的embedding
  74. user_id = self.user_id_embedding(torch.tensor(self.user_feature[ 'id']).to(self.device))
  75. age = self.user_age_embedding(torch.tensor(self.user_feature[ 'age']).to(self.device))
  76. gender = self.user_gender_embedding(torch.tensor(self.user_feature[ 'gender']).to(self.device))
  77. occupation = self.user_occupation_embedding(torch.tensor(self.user_feature[ 'occupation']).to(self.device))
  78. location = self.user_location_embedding(torch.tensor(self.user_feature[ 'location']).to(self.device))
  79. user_emb = torch.cat((user_id, age, gender, occupation, location), dim= 1)
  80. # 根据天气特征获取对应的embedding
  81. item_id = self.item_id_embedding(torch.tensor(self.item_feature[ 'id']).to(self.device))
  82. item_type = self.item_type_embedding(torch.tensor(self.item_feature[ 'type']).to(self.device))
  83. temperature = self.item_temperature_embedding(torch.tensor(self.item_feature[ 'temperature']).to(self.device))
  84. humidity = self.item_humidity_embedding(torch.tensor(self.item_feature[ 'humidity']).to(self.device))
  85. windSpeed = self.item_windSpeed_embedding(torch.tensor(self.item_feature[ 'windSpeed']).to(self.device))
  86. item_emb = torch.cat((item_id, item_type, temperature, humidity, windSpeed), dim= 1)
  87. # 拼接到一起
  88. concat_emb = torch.cat([user_emb, item_emb], dim= 0)
  89. return concat_emb.to(self.device)
  90. def forward( self, users, items):
  91. features=self.getFeature()
  92. final_emb=features.clone()
  93. for GCN_Layer in self.GCN_Layers:
  94. features=GCN_Layer(self.graph,self.selfLoop,features)
  95. final_emb=torch.cat((final_emb,features.clone()),dim= 1)
  96. user_emb,item_emb=torch.split(final_emb,[self.num_user,self.num_item])
  97. user_emb=user_emb[users]
  98. item_emb=item_emb[items]
  99. user_emb=self.transForm(user_emb)
  100. item_emb=self.transForm(item_emb)
  101. prediction=torch.mul(user_emb,item_emb). sum( 1)
  102. return prediction
  103. ######################
  104. # 请你补充代码 #
  105. ######################

mydataset类


  
  1. from torch.utils.data import Dataset
  2. import pandas as pd
  3. class MyDataset( Dataset):
  4. def __init__( self, rating):
  5. super(Dataset, self).__init__()
  6. self.user = rating[ 'user_id']
  7. self.weather = rating[ 'item_id']
  8. self.rating = rating[ 'rating']
  9. def __len__( self):
  10. return len(self.rating)
  11. def __getitem__( self, item):
  12. return self.user[item], self.weather[item], self.rating[item]

utils类


  
  1. from torch.utils.data import Dataset
  2. import pandas as pd
  3. class MyDataset( Dataset):
  4. def __init__( self, rating):
  5. super(Dataset, self).__init__()
  6. self.user = rating[ 'user_id']
  7. self.weather = rating[ 'item_id']
  8. self.rating = rating[ 'rating']
  9. def __len__( self):
  10. return len(self.rating)
  11. def __getitem__( self, item):
  12. return self.user[item], self.weather[item], self.rating[item]

创作不易 觉得有帮助请点赞关注收藏~~~


转载:https://blog.csdn.net/jiebaoshayebuhui/article/details/127929909
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场