forked from troyyxk/gcgru_stock_prediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
input_data.py
41 lines (34 loc) · 1.47 KB
/
input_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import pandas as pd
import pickle as pkl
def load_dow_price_data(data_addr, adj_addr):
dow_adj = pd.read_csv(adj_addr, header=None).values
dow_price = pd.read_csv(data_addr, header=None).values
return dow_price, dow_adj
def preprocess_data(data, labels, time_len, rate, seq_len, pre_len):
train_size = int(time_len * rate)
train_data = data[0:train_size]
test_data = data[train_size:time_len]
train_label = labels[0:train_size]
test_label = labels[train_size:time_len]
# for getting the trend
pre_test_label = labels[train_size-1:time_len-1]
trainX, trainY, testX, testY, pre_testY = [], [], [], [], []
for i in range(len(train_data) - seq_len - pre_len):
a = train_data[i: i + seq_len + pre_len]
b = train_label[i: i + seq_len + pre_len]
trainX.append(a[0: seq_len]) # seq_len 12
trainY.append(b[pre_len: seq_len + pre_len]) # pre_len 1
for i in range(len(test_data) - seq_len - pre_len):
a = test_data[i: i + seq_len + pre_len]
b = test_label[i: i + seq_len + pre_len]
c = pre_test_label[i: i + seq_len + pre_len]
testX.append(a[0: seq_len])
testY.append(b[pre_len: seq_len + pre_len])
pre_testY.append(c[seq_len: seq_len + pre_len])
trainX1 = np.array(trainX)
trainY1 = np.array(trainY)
testX1 = np.array(testX)
testY1 = np.array(testY)
pre_testY1 = np.array(pre_testY)
return trainX1, trainY1, testX1, testY1, pre_testY1