ml/linear_regression.py
2025-12-19 10:59:59 +01:00

60 lines
1.4 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
def predict(x, w, b):
return x * w + b
def loss(x, y):
return (y - x) ** 2
def mse(predicted, actual):
sum = 0.0
for i, x in enumerate(predicted):
sum += loss(x, actual[i])
return sum / len(predicted)
def slope_weight(input, predicted, actual):
sum = 0.0
for i, x in enumerate(predicted):
sum += (actual[i] - x) * input[i]
return sum / len(predicted) * 2
def slope_bias(predicted, actual):
sum = 0.0
for i, x in enumerate(predicted):
sum += (actual[i] - x)
return sum / len(predicted) * 2
data = pd.read_csv("california_housing_train.csv")
w = 0
b = 0
prev = 0
delta = 10
beta = 0.01
median_income = data['median_income']
median_house_value = data['median_house_value']
mu_input = median_income.mean()
sigma_input = median_income.std()
mu_actual = median_house_value.mean()
sigma_actual = median_house_value.std()
median_income = (median_income - mu_input) / sigma_input
median_house_value = (median_house_value - mu_actual) / sigma_actual
while delta > 1e-6:
predicted = []
actual = median_house_value.tolist()
for sample in median_income.tolist():
predicted.append(predict(sample, w, b))
delta = abs(mse(predicted, actual) - prev)
w += slope_weight(median_income.tolist(), predicted, actual) * beta
b += slope_bias(predicted, actual) * beta
prev = mse(predicted, actual)
# `w` and `b` now are our weight and bias