1、摘要
本文主要内容:使用百度情绪分析接口评估股票近半年的新闻,评估新闻属于利好还是利空,最终统计利好和利空的比例,供选股做参考
本文福利:赠送百度AppID:应用的唯一标识AppKey:公匙(相当于账号)AppSecret:私匙(相当于密码)
2、主要思路
- 选择自己要评估的股票代码数组
- 从金融界行情中心获取股票新闻信息
- 得到页面的内容并保存
- 调用百度云自然语言处理接口,进行情感倾向分析
- 统计利好和利空的比例
3、代码
import os
import re
import lxml # 一个Python库,使用它可以轻松处理XML和HTML文件,还可以用于web爬取
import requests
from aip import AipNlp
from lxml import etree
from matplotlib import pyplot
from utils.read_write import readTxt, writeOneCsv, readToStr
os.chdir(r'D:\项目\量化交易')
pyplot.rcParams['font.sans-serif'] = ['SimHei'] # 图形参数设置,用来正常显示中文标签,国标黑体
pyplot.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 得到最大页码的数值
def getallpage(url):
pagedata = requests.get(url).content.decode("gbk")
# print(pagedata)
# lxml教程:https://www.cnblogs.com/zhangxinqi/p/9210211.html#_label2
mytree = lxml.etree.HTML(pagedata)
# 取所有的页码数
if pagedata.find("page_newslib"):
data = mytree.xpath("//*[@class=\"page_newslib\"]//a[last()-1]/text()")
return data
else:
return ['1']
# 得到页面的内容并保存
def everypagecontent(url, number):
# 解决服务器延时的问题try
try:
pagedata = requests.get(url).content.decode("gbk")
mytree = lxml.etree.HTML(pagedata)
# 取所有的内容
datas = mytree.xpath("//*[@class = \"newlist\"]//li/span/a/text()")
for data in datas:
data = data + "\r\n"
with open(number + ".txt", "a") as file:
file.write(data)
file.flush()
return datas
except:
print("服务器超时")
# 调用百度云自然语言处理接口,进行情感倾向分析
def analyze(number):
""" 你的 APPID AK SK """
# AppID:应用的唯一标识AppKey:公匙(相当于账号)AppSecret:私匙(相当于密码)
APP_ID = '22793513'
API_KEY = '0veui3PVDnoXSc3ZmGNI3Et5'
SECRET_KEY = 'BGvXXlPQKigySi0MqG26i17bfnRPl8wy'
pos = 0
nav = 0
avgpos = 0
navavg = 0
i = 0
aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
lines = readTxt(number + ".txt")
str = readToStr(number + ".txt")
companys = re.findall(r"\[(.+?)\]", str)
company = max(companys, key=companys.count)
for line in lines:
aline = line.replace("\r\n", "").strip()
if len(aline) != 0:
try:
result = aipNlp.sentimentClassify(aline) # 调用百度接口
positive = result['items'][0]['positive_prob']
nagative = result['items'][0]['negative_prob']
i += 1
if positive >= nagative:
pos += 1
else:
nav += 1
avgpos = pos / i
navavg = nav / i
except Exception as e:
pass
print(company)
print(avgpos)
print(navavg)
writeOneCsv([number, company, avgpos, navavg], '各种股票的情绪.csv')
def getpageurl(url):
pagenumber = getallpage(url)[0]
for i in range(1, int(pagenumber) + 1):
try:
if i == 1:
url = "http://stock.jrj.com.cn/share," + number + ",ggxw.shtml"
else:
url = "http://stock.jrj.com.cn/share," + number + ",ggxw_" + str(i) + ".shtml"
except:
pass
everypagecontent(url, number)
numbers = ['600519', '000661', '002241', '300677', '000860', '300750', '600436', '603939', '300347', '603429']
# 得到股票名称,获取新闻信息生成文件
for number in numbers:
print(number)
url = "http://stock.jrj.com.cn/share," + number + ",ggxw.shtml"
getpageurl(url)
# 读取文件
for number in numbers:
print(number)
analyze(number)
转载:https://blog.csdn.net/qq_30803353/article/details/114791758
查看评论