import pandas as pd import matplotlib.pyplot as plt def predict(x, w, b): return x * w + b def loss(x, y): return (y - x) ** 2 def mse(predicted, actual): sum = 0.0 for i, x in enumerate(predicted): sum += loss(x, actual[i]) return sum / len(predicted) def slope_weight(input, predicted, actual): sum = 0.0 for i, x in enumerate(predicted): sum += (actual[i] - x) * input[i] return sum / len(predicted) * 2 def slope_bias(predicted, actual): sum = 0.0 for i, x in enumerate(predicted): sum += (actual[i] - x) return sum / len(predicted) * 2 data = pd.read_csv("california_housing_train.csv") w = 0 b = 0 prev = 0 delta = 10 beta = 0.01 median_income = data['median_income'] median_house_value = data['median_house_value'] mu_input = median_income.mean() sigma_input = median_income.std() mu_actual = median_house_value.mean() sigma_actual = median_house_value.std() median_income = (median_income - mu_input) / sigma_input median_house_value = (median_house_value - mu_actual) / sigma_actual while delta > 1e-6: predicted = [] actual = median_house_value.tolist() for sample in median_income.tolist(): predicted.append(predict(sample, w, b)) delta = abs(mse(predicted, actual) - prev) w += slope_weight(median_income.tolist(), predicted, actual) * beta b += slope_bias(predicted, actual) * beta prev = mse(predicted, actual) # `w` and `b` now are our weight and bias