龙空技术网

python机器学习案例-支持向量机建模及评估(完整代码+实现效果)

不再依然 254

前言:

而今咱们对“python句向量”可能比较珍视,姐妹们都需要剖析一些“python句向量”的相关知识。那么小编同时在网上搜集了一些有关“python句向量””的相关内容,希望我们能喜欢,我们一起来了解一下吧!

实现功能:

python机器学习案例-支持向量机建模及评估。

实现代码:

1

# 导入需要的库

2

from warnings import simplefilter

3

simplefilter(action='ignore', category=FutureWarning)

4

import pandas as pd

5

from sklearn.model_selection import train_test_split

6

import seaborn as sns

7

import matplotlib.pyplot as plt

8

from sklearn import metrics

9

from sklearn.metrics import roc_curve, auc

10

from sklearn.svm import SVC

11

12

# =============读取数据===========

13

def Read_data(file):

14

dt = pd.read_csv(file)

15

dt.columns = ['age', 'sex', 'chest_pain_type', 'resting_blood_pressure', 'cholesterol','fasting_blood_sugar', 'rest_ecg', 'max_heart_rate_achieved','exercise_induced_angina','st_depression', 'st_slope', 'num_major_vessels', 'thalassemia', 'target']

16

data =dt

17

return data

18

19

# ===========数据清洗==============

20

def data_clean(data):

21

# 重复值处理

22

print('存在' if any(data.duplicated()) else '不存在', '重复观测值')

23

data.drop_duplicates()

24

25

# 缺失值处理

26

print('不存在' if any(data.isnull()) else '存在', '缺失值')

27

data.dropna() # 直接删除记录

28

data.fillna(method='ffill') # 前向填充

29

data.fillna(method='bfill') # 后向填充

30

data.fillna(value=2) # 值填充

31

data.fillna(value={'resting_blood_pressure': data['resting_blood_pressure'].mean()}) # 统计值填充

32

33

# 异常值处理

34

data1 = data['resting_blood_pressure']

35

# 标准差监测

36

xmean = data1.mean()

37

xstd = data1.std()

38

print('存在' if any(data1 > xmean + 2 * xstd) else '不存在', '上限异常值')

39

print('存在' if any(data1 < xmean - 2 * xstd) else '不存在', '下限异常值')

40

# 箱线图监测

41

q1 = data1.quantile(0.25)

42

q3 = data1.quantile(0.75)

43

up = q3 + 1.5 * (q3 - q1)

44

dw = q1 - 1.5 * (q3 - q1)

45

print('存在' if any(data1 > up) else '不存在', '上限异常值')

46

print('存在' if any(data1 < dw) else '不存在', '下限异常值')

47

data1[data1 > up] = data1[data1 < up].max()

48

data1[data1 < dw] = data1[data1 > dw].min()

49

return data

50

51

#==============数据编码=============

52

def data_encoding(data):

53

data = data[["age", 'sex', "chest_pain_type", "resting_blood_pressure", "cholesterol","fasting_blood_sugar", "rest_ecg","max_heart_rate_achieved", "exercise_induced_angina","st_depression", "st_slope", "num_major_vessels","thalassemia","target"]]

54

Discretefeature=['sex',"chest_pain_type", "fasting_blood_sugar", "rest_ecg","exercise_induced_angina", "st_slope", "thalassemia"]

55

Continuousfeature=["age", "resting_blood_pressure", "cholesterol","max_heart_rate_achieved","st_depression","num_major_vessels"]

56

57

df = pd.get_dummies(data,columns=Discretefeature)

58

59

df[Continuousfeature]=(df[Continuousfeature]-df[Continuousfeature].mean())/(df[Continuousfeature].std())

60

df["target"]=data[["target"]]

61

return df

62

63

#=============数据集划分==============

64

def data_partition(data):

65

# 1.4查看样本是否平衡

66

print(data["target"].value_counts())

67

# X提取变量特征;Y提取目标变量

68

X = data.drop('target', axis=1)

69

y = data['target']

70

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,random_state=10)

71

feature=list(X.columns)

72

return X_train, y_train, X_test, y_test,feature

73

74

#===========绘制ROC曲线================

75

def Draw_ROC(list1,list2):

76

fpr_model,tpr_model,thresholds=roc_curve(list1,list2,pos_label=1)

77

roc_auc_model=auc(fpr_model,tpr_model)

78

79

font = {'family': 'Times New Roman','size': 12,}

80

sns.set(font_scale=1.2)

81

plt.rc('font',family='Times New Roman')

82

83

plt.plot(fpr_model,tpr_model,'blue',label='AUC = %0.2f'% roc_auc_model)

84

plt.legend(loc='lower right',fontsize = 12)

85

plt.plot([0,1],[0,1],'r--')

86

plt.ylabel('True Positive Rate',fontsize = 14)

87

plt.xlabel('Flase Positive Rate',fontsize = 14)

88

plt.show()

89

return

90

91

#============支持向量机===================

92

def SVM(X_train, y_train, X_test, y_test,feature):

93

svc = SVC(probability = True)

94

svc.fit(X_train, y_train)

95

print("\nFinally results of SVM fitting:")

96

print("Accuracy on training set: {:.3f}".format(svc.score(X_train, y_train)))

97

print("Accuracy on test set: {:.3f}".format(svc.score(X_test, y_test)))

98

predict_target=svc.predict(X_test)

99

100

predict_target_prob=svc.predict_proba(X_test) # 输出分类概率

101

predict_target_prob_svc=predict_target_prob[:,1]

102

df = pd.DataFrame({'prob':predict_target_prob_svc,'target':predict_target,'labels':list(y_test)})

103

104

print('正确预测数量:')

105

print(sum(predict_target==y_test))

106

107

print('SVM验证集:')

108

print(metrics.classification_report(y_test,predict_target))

109

print(metrics.confusion_matrix(y_test, predict_target))

110

111

print('SVM训练集:')

112

predict_Target=svc.predict(X_train)

113

print(metrics.classification_report(y_train,predict_Target))

114

print(metrics.confusion_matrix(y_train, predict_Target))

115

return list(y_test), list(predict_target_prob_svc)

116

117

#============主函数==============

118

if __name__=="__main__":

119

data1=Read_data("F:\数据杂坛\\0504\heartdisease\Heart-Disease-Data-Set-main\\UCI Heart Disease Dataset.csv")

120

data1=data_clean(data1)

121

data2=data_encoding(data1)

122

X_train, y_train, X_test, y_test,feature= data_partition(data2)

123

124

y_test,predict_target_prob_svc=SVM(X_train, y_train, X_test, y_test,feature)

125

Draw_ROC(y_test,predict_target_prob_svc)

实现效果:

喜欢记得点赞,在看,收藏,

关注V订阅号:数据杂坛,获取完整代码和效果,将持续更新!

标签: #python句向量