前言:
今天咱们对“python总结500字”可能比较关心,大家都想要分析一些“python总结500字”的相关文章。那么小编也在网上网罗了一些对于“python总结500字””的相关资讯,希望咱们能喜欢,咱们一起来学习一下吧!Python数据分析学习总结
●概述
● 数据分析的含义与目标
方法:统计分析方法
目标:提取有用信息
手段:研究、概括、总结
● Python与数据分析
Python特点:简洁、开发效率高、运算速度慢、胶水特性(集成C语言)
Python数据分析:numpy、scipy、matplotlib、pandas、scikit-learn、keras…
● Python数据分析大家族
numpy:数据结构基础
scipy:强大的科学计算方法(矩阵分析、信号分析、数理分析…)
matplotlib:丰富的可视化套件
pandas:基础数据分析套件
scikit-learn:强大的数据分析建模库
keras:人工神经网络
● Python数据分析环境搭建
平台:Windows、Linux
科学计算工具:Anaconda
●Python数据分析基础
● numpy
开源、数据计算扩展;ndarray、多维操作、线性代数
● numpy使用程序
import numpy as npdef main(): lst=[[1,3,5],[2,4,6]] print(type(lst)) np_lst=np.array(lst) print(type(np_lst)) np_lst=np.array(lst, dtype=np.float) print(np_lst.shape) print(np_lst.ndim) print(np_lst.dtype) print(np_lst.itemsize) print(np_lst.size)if __name__=="__main__": main()执行结果:<class 'list'><class 'numpy.ndarray'>(2, 3)2float6486
● numpy常用数组
print(np.zeros([2,4]))print(np.ones([3,5]))print(np.random.rand(2,4))print(np.random.rand())print("RandInt:")print(np.random.randint(1,10,3))print("Randn:") # 标准正态分布print(np.random.randn(2,4)) print("Choice")print(np.random.choice([10,20,30]))print("Distribute:") # Beta分布print(np.random.beta(1,10,100))执行结果:[[ 0. 0. 0. 0.] [ 0. 0. 0. 0.]][[ 1. 1. 1. 1. 1.] [ 1. 1. 1. 1. 1.] [ 1. 1. 1. 1. 1.]][[ 0.80307088 0.25491367 0.54381007 0.10159737] [ 0.71565024 0.62473538 0.66892166 0.41078071]]0.16467244260637237RandInt:[5 3 2]Randn:[[-0.51707383 -1.46091351 -0.78197086 0.44640286] [-0.0998081 0.40701679 0.07750661 0.66041753]]Choice10Distribute:[ 0.03897375 0.09804991 0.1617222 ..., 0.12878516 0.11699157 0.05681225]
● numpy常用操作
print("Arange:")print(np.arange(1,11))print("Exp:")print(np.exp(lst))print("Exp2:")print(np.exp2(lst))print("Sqrt:")print(np.sqrt(lst))print("Sin:")print(np.sin(lst))print("Log:")print(np.log(lst))执行结果:Arange:[ 1 2 3 4 5 6 7 8 9 10]Exp:[[ 2.71828183 20.08553692 148.4131591 ] [ 7.3890561 54.59815003 403.42879349]]Exp2:[[ 2. 8. 32.] [ 4. 16. 64.]]Sqrt:[[ 1. 1.73205081 2.23606798] [ 1.41421356 2. 2.44948974]]Sin:[[ 0.84147098 0.14112001 -0.95892427] [ 0.90929743 -0.7568025 -0.2794155 ]]Log:[[ 0. 1.09861229 1.60943791] [ 0.69314718 1.38629436 1.79175947]]lst=np.array([[[1,2,3,4],[4,5,6,7]],[[7,8,9,10],[10,11,12,13]],[[14,15,16,17],[18,19,20,11]]])print(lst.sum(axis=2))print(lst.sum(axis=1))print(lst.sum(axis=0))print("Max:")print(lst.max(axis=1))print("Min:")print(lst.min(axis=0)) 执行结果:[[10 22] [34 46] [62 68]][[ 5 7 9 11] [17 19 21 23] [32 34 36 28]][[22 25 28 31] [32 35 38 31]]Max:[[ 4 5 6 7] [10 11 12 13] [18 19 20 17]]Min:[[1 2 3 4] [4 5 6 7]]lst1=np.array([10,20,30,40])lst2=np.array([4,3,2,1])print("Add:")print(lst1+lst2)print("Sub:")print(lst1-lst2)print("Mul:")print(lst1*lst2)print("Div:")print(lst1/lst2)print("Square:")print(lst1**2)print("Dot:")print(np.dot(lst1.reshape([2,2]),lst2.reshape([2,2])))print("Concatenate:")print(np.concatenate((lst1,lst2),axis=0))print("vstack:")print(np.vstack((lst1,lst2)))print("hstack:")print(np.hstack((lst1,lst2)))print("Split:")print(np.split(lst1,2))print(np.split(lst1,4))print("Copy:")print(np.copy(lst1))执行结果:Add:[14 23 32 41]Sub:[ 6 17 28 39]Mul:[40 60 60 40]Div:[ 2.5 6.66666667 15. 40. ]Square:[ 100 400 900 1600]Dot:[[ 80 50] [200 130]]Concatenate:[10 20 30 40 4 3 2 1]vstack:[[10 20 30 40] [ 4 3 2 1]]hstack:[10 20 30 40 4 3 2 1]Split:[array([10, 20]), array([30, 40])][array([10]), array([20]), array([30]), array([40])]Copy:[10 20 30 40]
● 线程方程组
import numpy as npfrom numpy.linalg import *def main(): print(np.eye(3)) lst=np.array([[1,2],[3,4]]) print("Inv:") print(inv(lst)) print("T:") print(lst.transpose()) print("Det:") print(det(lst)) print("Eig:") print(eig(lst))if __name__=="__main__": main()执行结果:[[ 1. 0. 0.] [ 0. 1. 0.] [ 0. 0. 1.]]Inv:[[-2. 1. ] [ 1.5 -0.5]]T:[[1 3] [2 4]]Det:-2.0Eig:(array([-0.37228132, 5.37228132]), array([[-0.82456484, -0.41597356], [ 0.56576746, -0.90937671]]))
● numpy其他方面应用
import numpy as npfrom numpy.linalg import *def main(): print("FFT:") print(np.fft.fft(np.array([1,1,1,1,1,1,1,1]))) print("Coef:") print(np.corrcoef([1,0,1],[0,2,1])) print("Poly:") print(np.poly1d([2,1,3]))if __name__=="__main__": main()执行结果:FFT:[ 8.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]Coef:[[ 1. -0.8660254] [-0.8660254 1. ]]Poly: 22 x + 1 x + 3
● matplotlib
● 概述
matplotlib是关键的绘图库。
● 实现
import numpy as npimport matplotlib.pyplot as pltdef main(): #line x=np.linspace(-np.pi,np.pi,256,endpoint=True) c,s=np.cos(x),np.sin(x) plt.figure(1) plt.plot(x,c,color="blue",linewidth=1.0,linestyle="-",label="COS",alpha=0.5) plt.plot(x,s,"r*",label="SIN") plt.title("COS & SIN") ax=plt.gca() ax.spines["right"].set_color("none") ax.spines["top"].set_color("none") ax.spines["left"].set_position(("data",0)) ax.spines["bottom"].set_position(("data",0)) ax.xaxis.set_ticks_position("bottom") ax.yaxis.set_ticks_position("left") plt.show() #scatter fig=plt.figure() ax=fig.add_subplot(3,3,1) n=128 X=np.random.normal(0,1,n) Y=np.random.normal(0,1,n) T=np.arctan2(Y,X) #plt.axes([0.025,0.025,0.95,0.95]) #plt.scatter(X,Y,s=75,c=T,alpha=0.5) ax.scatter(X,Y,s=75,c=T,alpha=0.5) plt.xlim(-1.5,1.5),plt.xticks([]) plt.ylim(-1.5,1.5),plt.yticks([]) plt.axis() plt.title("scatter") plt.xlabel("x") plt.ylabel("y") plt.show() #bar fig.add_subplot(332) n=10 X=np.arange(n) Y1=(1-X/float(n))*np.random.uniform(0.5,1.0,n) Y2=(1-X/float(n))*np.random.uniform(0.5,1.0,n) plt.bar(X,+Y1,facecolor='#9999ff',edgecolor='white') plt.bar(X,-Y2,facecolor='#9999ff',edgecolor='white') for x,y in zip(X,Y1): plt.text(x+0.4,y+0.05,'%.2f' % y,ha='center',va='bottom') for x,y in zip(X,Y2): plt.text(x+0.4,-y-0.05,'%.2f' % y,ha='center',va='bottom') plt.show() #Pie fig.add_subplot(333) n=20 Z=np.ones(n) Z[-1]*=2 plt.pie(Z,explode=Z*.05,colors=['%s' % (i / float(n)) for i in range(n)], labels=['%.2f' % (i / float(n)) for i in range(n)]) plt.gca().set_aspect('equal') plt.xticks([]), plt.yticks([]) plt.show() #polar fig.add_subplot(334) n=20 theta=np.arange(0.0,2*np.pi,2*np.pi/n) radii=10*np.random.rand(n) plt.plot(theta, radii) plt.show() #beatmap fig.add_subplot(335) from matplotlib import cm data=np.random.rand(3,3) cmap=cm.Blues map=plt.imshow(data,interpolation='nearest',cmap=cmap,aspect='auto',vmin=0,vmax=1) plt.show() #hot map fig.add_subplot(313) def f(x,y): return (1-x/2+x**5+y**3)*np.exp(-x**2-y**2) n=256 x=np.linspace(-3,3,n) y=np.linspace(-3,3,n) X,Y=np.meshgrid(x,y) plt.contourf(X,Y,f(X,Y),8,alpha=.75,cmap=plt.cm.hot) plt.show() #3D ax=fig.add_subplot(336,projection="3d") ax.scatter(1,1,3,s=100) plt.show()if __name__=="__main__": main()
● scipy
● 简介
数值计算库
● 积分
程序:import numpy as npfrom scipy.integrate import quad,dblquad,nquaddef main(): # Integral print(quad(lambda x:np.exp(-x),0,np.inf)) print(dblquad(lambda t,x:np.exp(-x*t)/t**3,0,np.inf,lambda x:1,lambda x:np.inf)) def f(x,y): return x*y def bound_y(): return [0,0.5] def bound_x(y): return [0,1-2*y] print(nquad(f,[bound_x,bound_y]))if __name__=="__main__": main()执行结果:(1.0000000000000002, 5.842607038578007e-11)(0.3333333333366853, 1.3888461883425516e-08)(0.010416666666666668, 4.101620128472366e-16)
● 优化器
import numpy as npfrom scipy.optimize import minimizedef main(): # Optimizer def rosen(x): return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0+(1-x[:-1])**2.0) x0=np.array([1.3,0.7,0.8,1.9,1.2]) res=minimize(rosen,x0,method="nelder-mead",options={"xtol":1e-8,"disp":True}) print("ROSE MINI:", res)if __name__=="__main__": main()执行结果:Optimization terminated successfully. Current function value: 0.000000 Iterations: 339 Function evaluations: 571ROSE MINI: final_simplex: (array([[ 1. , 1. , 1. , 1. , 1. ], [ 1. , 1. , 1. , 1. , 1. ], [ 1. , 1. , 1. , 1.00000001, 1.00000001], [ 1. , 1. , 1. , 1. , 1. ], [ 1. , 1. , 1. , 1. , 1. ], [ 1. , 1. , 1. , 1. , 0.99999999]]), array([ 4.86115343e-17, 7.65182843e-17, 8.11395684e-17, 8.63263255e-17, 8.64080682e-17, 2.17927418e-16])) fun: 4.8611534334221152e-17 message: 'Optimization terminated successfully.' nfev: 571 nit: 339 status: 0 success: True x: array([ 1., 1., 1., 1., 1.])
● 插值
import numpy as npfrom scipy.interpolate import interplddef main(): def fun(x): return x+2*np.cos(x) sol=root(fun,0.1) print("ROOT:",sol.x,sol.fun) #Interpolation x=np.linspace(0,1,10) y=np.sin(2*np.pi*x) li=interpld(x,y,kind="cubic") x_new=np.linspace(0,1,50) y_new=li(x_new) figure() plot(x,y,"r") plot(x_new,y_new,"k") show() print(y_new)if __name__=="__main__": main()
● 线性计算与矩阵分解
程序:import numpy as npfrom scipy import linalg as lgdef main(): arr=np.array([[1,2],[3,4]]) print("Det:",lg.det(arr)) print("Inv:",lg.inv(arr)) b=np.array([6,14]) print("Sol:",lg.solve(arr,b)) print("Eig:",lg.eig(arr)) print("LU:",lg.lu(arr)) print("QR:",lg.qr(arr)) print("SVD:",lg.svd(arr)) print("Schur:",lg.schur(arr))if __name__=="__main__": main()执行结果:Det: -2.0Inv: [[-2. 1. ] [ 1.5 -0.5]]Sol: [ 2. 2.]Eig: (array([-0.37228132+0.j, 5.37228132+0.j]), array([[-0.82456484, -0.41597356], [ 0.56576746, -0.90937671]]))LU: (array([[ 0., 1.], [ 1., 0.]]), array([[ 1. , 0. ], [ 0.33333333, 1. ]]), array([[ 3. , 4. ], [ 0. , 0.66666667]]))QR: (array([[-0.31622777, -0.9486833 ], [-0.9486833 , 0.31622777]]), array([[-3.16227766, -4.42718872], [ 0. , -0.63245553]]))SVD: (array([[-0.40455358, -0.9145143 ], [-0.9145143 , 0.40455358]]), array([ 5.4649857 , 0.36596619]), array([[-0.57604844, -0.81741556], [ 0.81741556, -0.57604844]]))Schur: (array([[-0.37228132, -1. ], [ 0. , 5.37228132]]), array([[-0.82456484, -0.56576746], [ 0.56576746, -0.82456484]]))
● pandas
● 简介
数据分析库
● 基础数据分析技术
import numpy as npimport pandas as pddef main(): #Data Structure s=pd.Series([i*2 for i in range(1,11)]) print(type(s)) dates=pd.date_range("20170301",periods=8) df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE")) print(df) #Basic print(df.head(3)) print(df.tail(3)) print(df.index) print(df.values) print(df.T) print(df.sort(columns="C")) print(df.sort_index(axis=1,ascending=False)) print(df.describe()) #Select print(type(df["A"])) print(df[:3]) print(df["20170301":"20170304"]) print(df.loc[dates[0]]) print(df.loc["20170301":"20170304",["B","D"]]) print(df.iloc[1:2,2:4]) print(df.iloc[1,4]) print(df[df.B>0][df.A<0]) print(df[df>0]) print(df[df["E"].isin([1,2])]) #Set s1=pd.Series(list(range(10,18)),index=pd.date_range("20170301",periods=8)) df["F"]=s1 print(df) df.at[dates[0],"A"]=0 print(df) df.iat[1,1]=1 df.loc[:,"D"]=np.array([4]*len(df)) df2=df.copy() df2[df2>0]=df2 print(df2) #Missing Value df1=df.reindex(index=dates[:4],columns=list("ABCD")+["G"]) df1.loc[dates[0]:dates[1],"G"]=1 print(df1) print(df1.dropna()) print(df1.fillna(value=2)) #Concat pieces=[df[:3],df[-3:]] print(pd.concat(pieces)) left=pd.DataFrame({"key":["x","y"],"value":[1,2]}) right=pd.DataFrame({"key":["x","z"],"value":[3,4]}) print("LEFT:",left) print("RIFHT:",right) print(pd.merge(left,right,on="key",how="left")) df3=pd.DataFrame({"A":["a","b","c","b"],"B":list(range(4))}) print(df3.groupby("A").sum())if __name__=="__main__": main()
● 时间、绘图
import numpy as npimport pandas as pdfrom pylab import *def main(): #Time Series t_exam=pd.date_range("20170301",periods=10,freq="S") print(t_exam) #Graph ts=pd.Series(np.random.randn(1000),index=pd.date_range("20170301",periods=1000)) ts=ts.cumsum() ts.plot() show()if __name__=="__main__": main()
● scikit-learn
● 简介
数据挖掘建模、机器学习
● 机器学习与决策树
机器学习:因子–>结果
结果:
不带标记–>无监督学习(聚类);带标记–>监督学习
有限离散–>分类;连续–>回归
决策树:监督学习;树形结构
● Iris数据集
● 花萼长度
● 花萼宽度
● 花瓣长度
● 花瓣宽度
● 种类:Iris Setosa(山鸢尾)、Iris Versicolour(杂色鸢尾)、Iris Virginica(维吉尼亚鸢尾)
● 实现
import numpy as npimport pandas as pdfrom sklearn.datasets import load_irisfrom sklearn.cross_validation import train_test_splitfrom sklearn import treefrom sklearn import metricsdef main(): #Pre-processing iris=load_iris() print(iris) print(len(iris["data"])) train_data,test_data,train_target,test_target=train_test_split(iris.data,iris.target,test_size=0.2,random_state=1) #Model clf=tree.DecisionTreeClassifier(criterion="entropy") clf.fit(train_data,train_target) y_pred=clf.predict(test_data) #Verify print(metrics.accuracy_score(y_true=test_target,y_pred=y_pred)) print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))if __name__=="__main__": main()
● keras
● 简介
人工神经网络
● 简单神经网络实现
Keras安装步骤:Anaconda CMD;conda install mingw libpython;pip install keras;pip install np_utils
● 实例
注意:需要需要C:/user/username/.keras/keras.json,具体改后内容如下:{“backend”: “theano”,”image_data_format”: “th”,”epsilon”: 1e-07,”floatx”: “float32”}。
import numpy as npfrom keras.models import Sequentialfrom keras.layers import Dense,Activationfrom keras.optimizers import SGDfrom sklearn.datasets import load_irisfrom sklearn.preprocessing import LabelBinarizerfrom sklearn.cross_validation import train_test_splitdef main(): pass iris=load_iris() print(iris["target"]) LabelBinarizer().fit_transform(iris["target"]) train_data,test_data,train_target,test_target=train_test_split(iris.data,iris.target,test_size=0.2,random_state=1) labels_train=LabelBinarizer().fit_transform(train_target) labels_test=LabelBinarizer().fit_transform(test_target) model=Sequential( [ Dense(5,input_dim=4), Activation("relu"), Dense(3), Activation("sigmoid"), ] ) # 优化器 sgd=SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True) model.compile(optimizer=sgd,loss="categorical_crossentropy") model.fit(train_data,labels_train,nb_epoch=200,batch_size=40) print(model.predict_classes(test_data)) #model.save_weights("D:/w") #model.load_weights("D:/w")if __name__=="__main__": main()
(欢迎私信小编有干货分享哦!)
标签: #python总结500字