import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
# Generate data
N = 200
X = 100*np.random.rand(N)
Y = np.ones(N)
Y[np.logical_and(np.greater(X, 20), np.less(X, 40))] = -1
# Split to train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
plt.scatter(X, Y)
plt.title('Samples')
plt.show()
plt.scatter(X_train, Y_train)
plt.show()
D = 3
M = np.array([np.power(X_train, i) for i in range(D)]).transpose()
U, S, V = np.linalg.svd(M, full_matrices=False)
A = np.matmul(V.transpose(), np.matmul(np.linalg.inv(np.diag(S)), np.matmul(U.transpose(), np.expand_dims(Y_train, -1))))
Yscore = np.matmul(M, A)
Ypred = np.sign(Yscore)
Accuracy = np.mean(np.equal(Y_train, Ypred))
ROC_AUC = roc_auc_score(y_true=Y_train, y_score=Yscore)
A
print('Accuracy = %f, AUC = %f' % (Accuracy, ROC_AUC))
M_test = np.array([np.power(X_test, i) for i in range(D)]).transpose()
test_scores = np.matmul(M_test, A)
test_pred = np.sign(test_scores)
Test_Accuracy = np.mean(np.equal(Y_test, test_pred))
Test_ROC_AUC = roc_auc_score(y_true=Y_test, y_score=test_scores)
print('Accuracy = %f, AUC = %f' % (Test_Accuracy, Test_ROC_AUC))
T = np.arange(0, 100, .01)
MT = np.array([np.power(T, i) for i in range(D)]).transpose()
YT = np.matmul(MT, A)
plt.plot(T, YT, label='Predicted y')
plt.plot(T, np.sign(YT), label='Sign of Predicted y')
plt.title('MSE fitted function')
plt.legend()
plt.show()
def cartesian(arrays, out=None):
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
m = n // arrays[0].size
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m,1:])
for j in range(1, arrays[0].size):
out[j*m:(j+1)*m,1:] = out[0:m,1:]
return out
train_acc = 0
grid_rad = 100
grid_res = .1
for A_ in cartesian([np.arange(-grid_rad, grid_rad, grid_res) for _ in range(D)]):
scores = np.matmul(M, A_)
preds = np.sign(scores)
acc = np.mean(np.equal(Y_train, preds))
if acc > train_acc:
train_acc = acc
best_A = A_
print('f\' = %f x^2 + %f x + %f' % best_A.reverse())
print('Train accuracy = %f' % train_acc)
Yscore = np.matmul(M, best_A)
Ypred = np.sign(Yscore)
Accuracy = np.mean(np.equal(Y_train, Ypred))
ROC_AUC = roc_auc_score(y_true=Y_train, y_score=Yscore)
print('Accuracy = %f, AUC = %f' % (Accuracy, ROC_AUC))
test_scores = np.matmul(M_test, best_A)
test_pred = np.sign(test_scores)
Test_Accuracy = np.mean(np.equal(Y_test, test_pred))
Test_ROC_AUC = roc_auc_score(y_true=Y_test, y_score=test_scores)
print('Accuracy = %f, AUC = %f' % (Test_Accuracy, Test_ROC_AUC))
YT = np.matmul(MT, best_A)
signYT = np.sign(YT)
plt.plot(T, signYT, label='Sign of Predicted y')
plt.plot(T, YT, label='Predicted y')
plt.title('Misclassification Error fitted function')
plt.ylim([-2, 10])
plt.legend()
plt.show()
best_A