作者 by 超米 / 2024-05-31 / 暂无评论 / 139 个足迹
import pandas as pd
df = pd.read_csv(r"C:\Users\Administrator\Desktop\credit-overdue.csv", header=0)
df.head()
from matplotlib import pyplot as plt
plt.figure(figsize = (10,6))
map_size = {0:20, 1:100}
size = list(map(lambda x: map_size[x], df['overdue']))
plt.scatter(df['debt'],df['income'], s = size, c = df['overdue'], marker='v')
def sigmoid(z):
sigmoid = 1/(1+np.exp(-z))
return sigmoid
def loss(h,y):
loss = (-y*np.log(h) - (1-y) * np.log(1-h)).mean()
return loss
def gradient(X, h, y):
gradient = np.dot(X.T, (h - y))/y.shape[0]
return gradient
def Logistic_Regression(x,y,lr,num_iter):
intercept = np.ones((x.shape[0],1))
x = np.concatenate((intercept, x), axis = 1)
w = np.zeros(x.shape[1])
for i in range (num_iter):
z = np.dot(x,w)
h = sigmoid(z)
g = gradient(x,h,y)
w -= lr * g
z = np.dot(x,w)
h = sigmoid(z)
l = loss(h,y)
return l,w
import numpy as np
x = df[['debt','income']].values
y = df['overdue'].values
lr = 0.001
num_iter = 10000
L = Logistic_Regression(x,y,lr,num_iter)
L
plt.figure(figsize=(10,6))
map_size = {0:20,1:100}
size = list(map(lambda x:map_size[x],df['overdue']))
plt.scatter (df['debt'],df['income'],s=size,c=df['overdue'],marker = 'v')
x1_min,x1_max = df['debt'].min(),df['debt'].max(),
x2_min,x2_max = df['income'].min(),df['income'].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
grid = np.c_[xx1.ravel(), xx2.ravel()]
probs = (np.dot(grid, np.array([L[1][1:3]]).T) + L[1][0]).reshape(xx1.shape)
plt.contour(xx1,xx2,probs,levels = [0],linewideths = 1,colors = 'red');
def Logistic_Regression(x,y,lr,num_iter):
intercept = np.ones((x.shape[0],1))
x = np.concatenate((intercept,x),axis=1)
w = np.zeros(x.shape[1])
l_list = []
for i in range(num_iter):
z = np.dot(x,w)
h = sigmoid(z)
g = gradient(x,h,y)
w -= lr*g
z = np.dot(x,w)
h = sigmoid(z)
l = loss(h,y)
l_list.append(l)
return l_list
lr = 0.01
num_iter = 30000
l_y = Logistic_Regression(x,y,lr,num_iter)
plt.figure(figsize=(10,6))
plt.plot([i for i in range(len(l_y))],l_y)
plt.xlabel("Number of iterations")
plt.ylabel("Loss function")
独特见解