龙空技术网

机器学习编程作业2——逻辑回归(Python版)

高速蜗牛 173

前言:

当前兄弟们对“python中逻辑回归”大概比较珍视,小伙伴们都需要学习一些“python中逻辑回归”的相关内容。那么小编也在网摘上搜集了一些对于“python中逻辑回归””的相关文章,希望你们能喜欢,兄弟们一起来学习一下吧!

本次编程作业的实现环境是Python3、Anaconda3(64-bit)、Jupyter Notebook。是在深度之眼“机器学习训练营”作业基础上完成的,个别代码有修改,供交流学习之用。

import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as sns #在matplotlib上构建,支持numpy和pandas的数据结构可视化plt.style.use('fivethirtyeight') #样式美化import matplotlib.pyplot as plt# import tensorflow as tffrom sklearn.metrics import classification_report#这个包是评价报告

准备数据

data = pd.read_csv('ex2data1.txt', names=['exam1', 'exam2', 'admitted'])data.head()#看前五行

看下数据的样子

sns.set(color_codes=False, context="notebook", style="darkgrid", palette=sns.color_palette("RdBu", 2))sns.lmplot('exam1', 'exam2', hue='admitted', data=data,  size=6,  fit_reg=False,  scatter_kws={"s": 50} )plt.show()#看下数据的样子
def get_X(df):#读取特征 ones = pd.DataFrame({'ones': np.ones(len(df))})#ones是m行1列的dataframe data = pd.concat([ones, df], axis=1) # 合并数据,根据列合并 #return data.iloc[:, :-1].as_matrix() # 这个操作返回 ndarray,不是矩阵;会有警告,所以改为values return data.iloc[:, :-1].values # 这个操作返回 ndarray,不是矩阵def get_y(df):#读取标签# '''assume the last column is the target''' return np.array(df.iloc[:, -1])#df.iloc[:, -1]是指df的最后一列def normalize_feature(df):# """Applies function along input axis(default 0) of DataFrame.""" return df.apply(lambda column: (column - column.mean()) / column.std())#特征缩放X = get_X(data)print(X.shape)y = get_y(data)print(y.shape)
sigmoid 函数¶

g 代表一个常用的逻辑函数(logistic function)为S形函数(Sigmoid function)。

def sigmoid(z): # your code here (appro ~ 1 lines) gz = 1 / (1 + np.exp(-z)) return gz

下面程序会调用上面你写好的函数,并画出sigmoid函数图像。如果你的程序正确,你应该能在下方看到函数图像。

fig, ax = plt.subplots(figsize=(8, 6))ax.plot(np.arange(-10, 10, step=0.01), sigmoid(np.arange(-10, 10, step=0.01)))#ax.set_ylim((-0.1,1.1))ax.set_xlabel('z', fontsize=18)ax.set_ylabel('g(z)', fontsize=18)ax.set_title('sigmoid function', fontsize=18)plt.show()

cost function(代价函数)

theta = np.zeros(3) # X(m*n) so theta is n*1#np.transpose(theta)theta.shapedef cost(theta, X, y): ''' cost fn is -l(theta) for you to minimize''' # your code here (appro ~ 2 lines) X = np.matrix(X) #(100,3) y = np.matrix(y) #(1,100) theta = np.matrix(theta) #(1,3) costf = - (y * np.log(sigmoid(X * theta.T)) + (1-y) * np.log(1 - sigmoid(X * theta.T))) / len(X) #costf = np.mean(-y * np.log(sigmoid(X @ theta)) - (1 - y) * np.log(1 - sigmoid(X @ theta))) return costf

# Hint:X @ theta与X.dot(theta)等价

cost(theta, X, y)

Out[102]:

matrix([[0.69314718]])

gradient descent(梯度下降)

def gradient(theta, X, y): # your code here (appro ~ 2 lines) grad1 = [] X = np.matrix(X) #(100,3) y = np.matrix(y) #(1,100) theta = np.matrix(theta) #(1,3) for i in range(theta.shape[1]): temp = (X[:,i].T) * (sigmoid(X * theta.T) - y.T) / len(X) grad1.append(temp.tolist()[0][0]) grad = np.array(grad1) #grad = (1 / len(X)) * X.T @ (sigmoid(X @ theta) - y) return grad

In [185]:

gradient(theta, X, y)

Out[185]:

array([ -0.1 , -12.00921659, -11.26284221])
拟合参数
import scipy.optimize as optres = opt.minimize(fun=cost, x0=theta, args=(X, y), method='Newton-CG', jac=gradient)
用训练集预测和验证¶
def predict(x, theta): # your code here (appro ~ 2 lines) y_pred = sigmoid(X @ theta) return (y_pred >= 0.5).astype(int)

In [220]:

final_theta = res.xy_pred = predict(X, final_theta)print(classification_report(y, y_pred))
寻找决策边界
print(res.x) # this is final theta[-25.15896284 0.20621275 0.20145242]In [223]:coef = -(res.x / res.x[2]) # find the equationprint(coef)​x = np.arange(130, step=0.1)y = coef[0] + coef[1]*x

[124.88786723 -1.02363005 -1. ]

sns.set(context="notebook", style="ticks", font_scale=1.5)sns.lmplot('exam1', 'exam2', hue='admitted', data=data,  size=6,  fit_reg=False,  scatter_kws={"s": 25} )plt.plot(x, y, 'grey')plt.xlim(0, 130)plt.ylim(0, 130)plt.title('Decision Boundary')plt.show()
3- 正则化逻辑回归¶
df = pd.read_csv('ex2data2.txt', names=['test1', 'test2', 'accepted'])df.head()

In [227]:

sns.set(context="notebook", style="ticks", font_scale=1.5)​sns.lmplot('test1', 'test2', hue='accepted', data=df,  size=6,  fit_reg=False,  scatter_kws={"s": 50} )​plt.title('Regularized Logistic Regression')plt.show()
feature mapping(特征映射
def feature_mapping(x, y, power, as_ndarray=False):# """return mapped features as ndarray or dataframe""" data = {"f{}{}".format(i - p, p): np.power(x, i - p) * np.power(y, p) for i in np.arange(power + 1) for p in np.arange(i + 1) } if as_ndarray: return pd.DataFrame(data).as_matrix() else: return pd.DataFrame(data)x1 = np.array(df.test1) #ndarray类型,(118,)x2 = np.array(df.test2) #ndarray类型,(118,)data = feature_mapping(x1, x2, power=6)print(data.shape) #(118,28)data.head()
regularized cost(正则化代价函数)¶
theta = np.zeros(data.shape[1])X = feature_mapping(x1, x2, power=6, as_ndarray=True)print(X.shape)​y = get_y(df)print(y.shape)(118, 28)(118,)C:\Users\zhangzl\Anaconda3\lib\site-packages\ipykernel_launcher.py:10: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead. # Remove the CWD from sys.path while we load stuff.

In [276]:

def regularized_cost(theta, X, y, l=1): # your code here (appro ~ 3 lines theta_jton = theta[1:] cost1 = cost(theta, X, y) regu_cost = l/(2*len(X)) * sum(np.power(theta_jton, 2)) return regu_cost + cost1​

. . .

regularized_cost(theta, X, y, l=1)(1, 118) (118, 28)

Out[277]:

matrix([[0.69314718]])
regularized gradient(正则化梯度)
def regularized_gradient(theta, X, y, l=1): # your code here (appro ~ 3 lines) theta_jton = theta[1:] regularized_term = np.concatenate([np.array([0]), l/len(X) * theta_jton]) return gradient(theta, X, y) + regularized_term

. . .

regularized_gradient(theta, X, y)

Out[287]:

array([8.47457627e-03, 1.87880932e-02, 7.77711864e-05, 5.03446395e-02, 1.15013308e-02, 3.76648474e-02, 1.83559872e-02, 7.32393391e-03, 8.19244468e-03, 2.34764889e-02, 3.93486234e-02, 2.23923907e-03, 1.28600503e-02, 3.09593720e-03, 3.93028171e-02, 1.99707467e-02, 4.32983232e-03, 3.38643902e-03, 5.83822078e-03, 4.47629067e-03, 3.10079849e-02, 3.10312442e-02, 1.09740238e-03, 6.31570797e-03, 4.08503006e-04, 7.26504316e-03, 1.37646175e-03, 3.87936363e-02])
拟合参数¶
import scipy.optimize as optprint('init cost = {}'.format(regularized_cost(theta, X, y)))​res = opt.minimize(fun=regularized_cost, x0=theta, args=(X, y), method='Newton-CG', jac=regularized_gradient)
预测¶
final_theta = res.xy_pred = predict(X, final_theta)​print(classification_report(y, y_pred))

precision recall f1-score support

0 0.88 0.75 0.81 60

1 0.78 0.90 0.83 58

micro avg 0.82 0.82 0.82 118

macro avg 0.83 0.82 0.82 118

weighted avg 0.83 0.82 0.82 118

标签: #python中逻辑回归