# backprop example from the notes with
# no activation function, no hidden layers
# 3 inputs, 1 output, and 1 sample

def loss(w1, w2, w3, b):
    yhat = 1 * w1 + (-1) * w2 + 2 * w3 + b
    return (yhat - 1.5)**2

def one_step(w1, w2, w3, b):
    yhat = 1 * w1 + (-1) * w2 + 2 * w3 + b
    # loss = (yhat - 1.5)**2
    dl_dyhat = 2 * (yhat - 1.5)
    dl_dw1 = dl_dyhat
    dl_dw2 = -dl_dyhat
    dl_dw3 = 2 * dl_dyhat
    dl_db = dl_dyhat
    return (w1 - dl_dw1/100, w2 - dl_dw2/100, w3 - dl_dw3/100, b - dl_db/100)

def hundred_steps(w1, w2, w3, b):
    for i in range(100):
        (w1, w2, w3, b) = one_step(w1, w2, w3, b)
    return (w1, w2, w3, b)

(w1, w2, w3, b) = (0.1, -0.2, 0.4, 0.5)

