# Expect this program to run for 30 minutes or more
# Load package
import numpy as np
from scipy.stats import norm
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow import keras
from numpy.random import seed
seed(100)
import matplotlib.pyplot as plt
from IPython.display import clear_output
n = norm.pdf
N = norm.cdf
# Load option data
option_dataset = pd.read_csv('Option_Data.csv')
option_dataset.head()
# Include option price with and without noise in data set splitting for later BS mean error calculation on test set
y = option_dataset[['Option Price with Noise','Option Price']]
X = option_dataset[['Spot price', 'Strike Price', 'Risk Free Rate','Volatility','Maturity','Dividend']]
# Divide data into training set and test set(note that random seed is set)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=100)
# Divide training set into training and validation set
X_train,X_val,y_train,y_val=train_test_split(X_train,y_train,test_size=0.25,random_state=100)
# Scale features based on Z-Score
scaler = StandardScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_vals = scaler.transform(X_val)
X_scaled_test = scaler.transform(X_test)
y_train = np.asarray(y_train)
y_val = np.asarray(y_val)
y_test = np.asarray(y_test)
# Create ML Model
# Sequential function allows you to define your Neural Network in sequential order
# Within Sequential, use Dense function to define number of nodes, activation function and other related parameters
# For more information regrading to activation functoin, please refer to https://keras.io/activations/
model = keras.models.Sequential([Dense(20,activation = "sigmoid",input_shape = (6,)),
Dense(20,activation = "sigmoid"),Dense(20,activation = "sigmoid"),
Dense(1)])
# Model summary function shows what you created in the model
model.summary()
# Complie function allows you to choose your measure of loss and optimzer
# For other optimizer, please refer to https://keras.io/optimizers/
model.compile(loss = "mae",optimizer = "Adam")
# Checkpoint function is used here to periodically save a copy of the model.
# Currently it is set to save the best performing model
checkpoint_cb = keras.callbacks.ModelCheckpoint("bs_pricing_model_vFinal.h5",save_best_only = True)
# Early stopping allows you to stop your training early if no improvment is shown after cerain period
# Currently it is set at if no improvement occured in 5000 epochs, at the stop the model will also revert back to the best weight
early_stopping_cb = keras.callbacks.EarlyStopping(patience = 5000,restore_best_weights = True)
# Remark: checkpoint could be redundant here as early stopping function can also help restoring to the best weight
# We put both here just to illustrate different ways to keep the best model
# train your model
# The fit function allows you to train a NN model. Here we have training data, number of epochs, validation data,
# and callbacks as input
# Callback is an optional parameters that allow you to enable tricks for training such as early stopping and checkpoint
# Remarks: Altough we put 50000 epochs here, the model will stop its training once our early stopping criterion is triggered
# Also, select the first column of y_train data array, which is the option price with noise column
history=model.fit(X_scaled_train,y_train[:,0],epochs= 50000,verbose = 0, validation_data=(X_scaled_vals,y_val[:,0]),
callbacks=[checkpoint_cb,early_stopping_cb])
# Load the best model you saved and calcuate MAE for testing set
model = keras.models.load_model("bs_pricing_model_vFinal.h5")
mae_test = model.evaluate(X_scaled_test,y_test[:,0],verbose=0)
print('Nerual network mean absoluste error on test set:', mae_test)
model_prediction = model.predict(X_scaled_test)
mean_error = np.average(model_prediction.T - y_test[:,0])
std_error = np.std(model_prediction.T - y_test[:,0])
mean_error_vs_BS_price = np.average(model_prediction.T - y_test[:,1])
std_error_vs_BS_price = np.std(model_prediction.T - y_test[:,1])
BS_mean_error = np.average(y_test[:,0] - y_test[:,1])
BS_std_error = np.std(y_test[:,0] - y_test[:,1])
print('Black-Scholes Statistics:')
print('Mean error on test set:',BS_mean_error)
print('Standard deviation of error on test set:',BS_std_error)
print(" ")
print('Neural Network Statistics:')
print('Mean error on test set vs. option price with noise:',mean_error)
print('Standard deviation of error on test set vs. option price with noise:',std_error)
print('Mean error on test set vs. BS analytical formula price:',mean_error_vs_BS_price)
print('Standard deviation of error on test set vs. BS analytical formula price:',std_error_vs_BS_price)
# Plot training history
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0.1,0.2)
plt.show()
#Export your training history for MSE
output = pd.DataFrame(history.history)
output.to_csv("mae_history.csv")