一、简介
"时间序列+金融"是一个很有"钱"景的话题,若想开始DeepLearning+TimeSeries+Stock,首先得拿到数据。本文提供了一种股票数据获取的源代码。
二、代码
1、首先,将要获取数据的股票按照图中xlsx的格式整理,sheet名设置为"stock_names"。如下图:
2、然后直接运行下列代码,配置好输入(stock_names.xlsx)和输出路径,就可以实现数据获取。
import os
import baostock as bs
import pandas as pd
from datetime import datetime
from datetime import timedelta# 自定义的日期解析函数,仅保留日期部分
def custom_date_parser(x):return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').date()class stock_data():def __init__(self):self.params = self.init_baostock()def init_baostock(self):# 计算MACD / KDJ / RSI 的params 参数设置# 12,26,9# 6,12,24# 9,3,3# params = []# with open('configs/input_params.txt', 'r') as f:# code_list = f.readlines()# for index, item in enumerate(code_list):# item = item.strip()# item = item.split(',')# params += [int(i) for i in item]# return paramsparams = [12,26,9,6,12,24,9,3,3]return paramsdef get_stock_basedata(self,code,start_date,end_date):"""open/high/low/close/volume/MACD/KDJ"""lg = bs.login()#将时间去除时分秒start_date = (pd.to_datetime(start_date) + timedelta(days=1)).strftime("%Y-%m-%d")end_date = (pd.to_datetime(end_date) + timedelta(days=1)).strftime("%Y-%m-%d")rs = bs.query_history_k_data_plus(code,"date,code,open,high,low,close,preclose,volume,amount,turn",start_date=start_date, end_date=end_date, frequency="d",adjustflag='2') # 注意adjustflag取前复权data_list = []while (rs.error_code == '0') & rs.next():data_list.append(rs.get_row_data())self.stock_pd = pd.DataFrame(data_list, columns=rs.fields)self.stock_pd[['open', 'high', 'low', 'close', 'volume']] = self.stock_pd[['open', 'high', 'low', 'close', 'volume']].astype('float64')self.stock_pd = self.stock_pd.rename(columns={'date': 'datetime'})self.stock_pd.index = pd.DatetimeIndex(self.stock_pd['datetime'])# Step2: 利用Pandas 计算MACD / KDJ / RSIshort_ema = self.stock_pd['close'].ewm(span=self.params[0]).mean()long_ema = self.stock_pd['close'].ewm(span=self.params[1]).mean()self.stock_pd.loc[:, 'DIFF'] = short_ema - long_emaself.stock_pd.loc[:, 'DEA'] = self.stock_pd['DIFF'].ewm(span=self.params[2]).mean()self.stock_pd.loc[:, 'MACD'] = 2 * (self.stock_pd['DIFF'] - self.stock_pd['DEA'])low_list = self.stock_pd['low'].rolling(9, min_periods=9).min()low_list.fillna(value=self.stock_pd['low'].expanding().min(), inplace=True)high_list = self.stock_pd['high'].rolling(9, min_periods=9).max()high_list.fillna(value=self.stock_pd['high'].expanding().max(), inplace=True)rsv = (self.stock_pd['close'] - low_list) / (high_list - low_list) * 100self.stock_pd['k'] = pd.DataFrame(rsv).ewm(com=2).mean()self.stock_pd['d'] = self.stock_pd['k'].ewm(com=2).mean()self.stock_pd['j'] = 3 * self.stock_pd['k'] - 2 * self.stock_pd['d']return self.stock_pddef get_data_workflow(input_stock_names,output_stock_datas):#1.获取csv下所有股票的数据并保留在文件夹下stock_class = stock_data()stocks_df = pd.read_excel(input_stock_names,sheet_name='stock_names',parse_dates=['起始时间','终止时间'],date_parser=custom_date_parser)#2.获取数据for _,code,start_date,end_date,stock_name in stocks_df.itertuples():tmp_stock_pds = stock_class.get_stock_basedata(code,start_date,end_date)# 3.保存写入数据tmp_stock_pds.to_excel(os.path.join(output_stock_datas, stock_name + '.xlsx'), index=False)if __name__ == '__main__':get_data_workflow(input_stock_names='input_datas/stock_names.xlsx',output_stock_datas='output_datas/stock_datas')