60 lines
1.4 KiB
Python
60 lines
1.4 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
def predict(x, w, b):
|
|
return x * w + b
|
|
|
|
def loss(x, y):
|
|
return (y - x) ** 2
|
|
|
|
def mse(predicted, actual):
|
|
sum = 0.0
|
|
for i, x in enumerate(predicted):
|
|
sum += loss(x, actual[i])
|
|
return sum / len(predicted)
|
|
|
|
def slope_weight(input, predicted, actual):
|
|
sum = 0.0
|
|
for i, x in enumerate(predicted):
|
|
sum += (actual[i] - x) * input[i]
|
|
return sum / len(predicted) * 2
|
|
|
|
def slope_bias(predicted, actual):
|
|
sum = 0.0
|
|
for i, x in enumerate(predicted):
|
|
sum += (actual[i] - x)
|
|
return sum / len(predicted) * 2
|
|
|
|
data = pd.read_csv("california_housing_train.csv")
|
|
|
|
w = 0
|
|
b = 0
|
|
prev = 0
|
|
delta = 10
|
|
|
|
beta = 0.01
|
|
|
|
median_income = data['median_income']
|
|
median_house_value = data['median_house_value']
|
|
|
|
mu_input = median_income.mean()
|
|
sigma_input = median_income.std()
|
|
|
|
mu_actual = median_house_value.mean()
|
|
sigma_actual = median_house_value.std()
|
|
|
|
median_income = (median_income - mu_input) / sigma_input
|
|
median_house_value = (median_house_value - mu_actual) / sigma_actual
|
|
|
|
while delta > 1e-6:
|
|
predicted = []
|
|
actual = median_house_value.tolist()
|
|
for sample in median_income.tolist():
|
|
predicted.append(predict(sample, w, b))
|
|
delta = abs(mse(predicted, actual) - prev)
|
|
|
|
w += slope_weight(median_income.tolist(), predicted, actual) * beta
|
|
b += slope_bias(predicted, actual) * beta
|
|
prev = mse(predicted, actual)
|
|
|
|
# `w` and `b` now are our weight and bias
|