# -*- coding: utf-8 -*-

# 1. Generate 100 points x uniformly distributed between 0 and 1, and let y = 2+3x+ζ,
#    where ζ is a Gaussian random variable with a standard deviation of 0.5. Use an
#    SVD to ﬁt y = a + bx to this data set, ﬁnding a and b.

import numpy as np

# x is uniformly distributed
x = np.random.random(100)

ones = []
for i in x:
  # zeta is gaussian with sd = 0.5
  zeta = np.random.normal(0, 0.5)
  y = 2 + 3*x + zeta
  ones.append(1)

array = np.transpose(np.array([ones,x]))
#
U,s,V = np.linalg.svd(array)
#
print("U:")
print(U.shape)
print(U)
print("s: ")
print(s.shape)
print(s)
print("V: ")
print(V.shape)
print(V)
#
# S = np.diag(s)
#
# # this raises ValueError: matrices are not aligned ?!?!
# pinv_svd = np.dot(np.dot(V.T,np.linalg.inv(np.diag(s))),U.T)
# a = np.linalg.inv(S)
# print(a.shape)
# print(V.T.shape)
# b = np.dot(V.T,a)

# print(pinv_svd)
# r = np.dot(a, V.T)
# print(r.shape)

# 1a. Evaluate the errors in a and with equation (12.34)


# 1b. By bootstrap sampling to generate 100 data sets (with replacement)
def make_bootstrap_sample(orig):
  new_sample = []
  for i in orig:
    i = np.random.randint(orig.size)
    new_sample.append(orig[i])

means = []
for i in range(100):
  new_sample = make_bootstrap_sample(y)
  means.append(sum(new_sample)/100)

print("bootstrap mean: " + sum(means)/100)

# 1c. From ﬁtting an ensemble of 100 independent data sets

#
#
# # 2. Generate 100 points x uniformly distributed between 0 and 1, and let y = sin(2 +
# #    3x) + ζ, where ζ is a Gaussian random variable with a standard deviation of 0.1.
# #    Write a Levenberg-Marquardt routine to ﬁt y = sin(a+bx) to this data set starting
# #    from a = b = 1, and investigate the convergence for both ﬁxed and adaptively
# #    adjusted λ values.
#
# array = []
# for i in range(100):
#   x = random.random_sample()
#   zeta = random.normal(0.0, 0.1)
#   y = sin(2 + 3*x) + zeta
#   array.append([x, y])
#
# A = matrix(array)