python预热第十课(Anaconda Navigator—Jupyter)

阿里云双11来了!从本博客参与阿里云,服务器最低只要86元/年!

import numpy as np
x=np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
#re
X=np.arange(15).reshape(5,3)
X
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

访问

x[2]
2
X[3]
array([ 9, 10, 11])
X[1,1]
4
X[1][1]#不建议使用
4

切片

x[5:]
array([5, 6, 7, 8, 9])
#行切片,列切片
X[2:4,1:]
array([[ 7,  8],
       [10, 11]])
X[2:4,:2]
array([[ 6,  7],
       [ 9, 10]])
X[4,:2]
array([12, 13])
#如果 不关心reshape的另外一个参数,我们可以写成-1,numpy自动推导出这个参数
X.reshape(5,-1)
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

numpy的运算

numpy’s universal function

X
   array([[ 0,  1,  2],
           [ 3,  4,  5],
           [ 6,  7,  8],
           [ 9, 10, 11],
           [12, 13, 14]])
X+1
    array([[ 1,  2,  3],
           [ 4,  5,  6],
           [ 7,  8,  9],
           [10, 11, 12],
           [13, 14, 15]])
X*2
array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22],
       [24, 26, 28]])
np.sin(X)
array([[ 0.        ,  0.84147098,  0.90929743],
       [ 0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825],
       [ 0.41211849, -0.54402111, -0.99999021],
       [-0.53657292,  0.42016704,  0.99060736]])

numpy中的argsort

x = np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
# 随机打乱
# from random import shuffle
np.random.shuffle(x)
x
array([10,  2, 14, 12,  7,  3,  0,  5,  9,  1, 15, 11,  6, 13,  8,  4])
np.argsort(x)#返回值打乱后所在的位置
array([ 6,  9,  1,  5, 15,  7, 12,  4, 14,  8,  0, 11,  3, 13,  2, 10],
      dtype=int64)

Numpy中的布尔索引

names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
names
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')
#使用np.random模块的randn生成一些正太分布的随机数据
data = np.random.randn(7,4)
data
array([[ 2.64666692,  0.43832261, -0.56481495, -0.48259482],
       [ 1.36386359,  1.18923864, -2.50651769,  0.1124804 ],
       [-0.45201577,  0.67006344,  0.1701233 , -0.46859946],
       [ 0.21783159, -0.62597811, -0.13269659, -0.67377082],
       [-1.40902121,  1.35656025, -2.58419   , -1.73861427],
       [ 0.62707218, -0.41343801,  1.31311074, -0.66913306],
       [-0.92292776,  0.90768608, -0.35496417,  1.51008486]])
#假设每个名字对应data数组的一行
names=='Bob'
array([ True, False, False,  True, False, False, False])
data[4,]
array([-0.78962714,  0.38283768,  0.42142592,  1.19701245])
# 布尔型索引可以应用于数据的筛选
data[names=='Bob']
array([[-0.4374545 ,  0.28913484,  0.16863966,  1.18365619],
       [ 1.19645079,  0.91440211,  0.03827419,  0.12495232]])
# 布尔索引应用于修改值
#选取所有Joe的行并且全部值 赋值为666
data[names=='Joe']=0
data
array([[-0.4374545 ,  0.28913484,  0.16863966,  1.18365619],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 1.50409921, -1.51568977,  0.69259129,  1.64054882],
       [ 1.19645079,  0.91440211,  0.03827419,  0.12495232],
       [-0.78962714,  0.38283768,  0.42142592,  1.19701245],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])
# 选取所有的Will的行,并且将选取的数据的后两列复制为0
data_will=data[names=='Will',2:]=0
data
array([[ 2.64666692,  0.43832261, -0.56481495, -0.48259482],
       [ 1.36386359,  1.18923864, -2.50651769,  0.1124804 ],
       [-0.45201577,  0.67006344,  0.        ,  0.        ],
       [ 0.21783159, -0.62597811, -0.13269659, -0.67377082],
       [-1.40902121,  1.35656025,  0.        ,  0.        ],
       [ 0.62707218, -0.41343801,  1.31311074, -0.66913306],
       [-0.92292776,  0.90768608, -0.35496417,  1.51008486]])
data_will=data[names=='Will']
data_will[0:3,2:]=0
data
array([[ 2.64666692,  0.43832261, -0.56481495, -0.48259482],
       [ 1.36386359,  1.18923864, -2.50651769,  0.1124804 ],
       [-0.45201577,  0.67006344,  0.        ,  0.        ],
       [ 0.21783159, -0.62597811, -0.13269659, -0.67377082],
       [-1.40902121,  1.35656025,  0.        ,  0.        ],
       [ 0.62707218, -0.41343801,  1.31311074, -0.66913306],
       [-0.92292776,  0.90768608, -0.35496417,  1.51008486]])

绘制散点图

from matplotlib import pyplot as plt
import numpy as np
x = [i for i in range(10)]
x
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
y= [i for i in range(10)]
y
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
plt.scatter(x,y)
plt.show()


x,y散点图.png

z =np.arange(10)
w =np.arange(10)
plt.scatter(z,w)
plt.show()


z,w散点图.png

# 绘制正太分布的散点图
#均值为0 方差为1 的100个数据
x = np.random.normal(0,1,100)
y = np.random.normal(0,1,100)
plt.scatter(x,y)
plt.show()
# 绘制正太分布的散点图
#均值为0 方差为1 的100个数据
x = np.random.normal(0,0.8,10000)
y = np.random.normal(0,0.8,10000)
plt.scatter(x,y)
plt.show()


正太分布散点图.png

鸢尾花数据集散点图绘制

import warnings    #libraries to deal with warnings
warnings.filterwarnings("ignore")#将警告设置为忽略
from sklearn import datasets
from matplotlib import pyplot as plt
iris = datasets.load_iris()
type(iris) 
iris.keys()
#iris['DESCR']
iris.DESCR
print(iris.DESCR)
X = iris.data
X
X.ndim
X.shape
X.size
iris.feature_names
y=iris.target
y
iris.target_names
# 绘制萼片维度
X = iris.data[:,:2]
X

1、

y==0#setosa
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
X[y==0][:,0] #setosa 第一列
array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. ])
# 绘制萼片散点图
plt.scatter(X[y==0][:,0],X[y==0][:,1],color='r')#setosa
plt.scatter(X[y==1][:,0],X[y==1][:,1],color='b')#versicolor
plt.scatter(X[y==2][:,0],X[y==2][:,1],color='g')#virginnica
plt.show()

2、

X[y==0,0]
array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. ])
# 绘制萼片散点图
plt.scatter(X[y==0,0],X[y==0,1],color='r')#setosa
plt.scatter(X[y==1,0],X[y==1,1],color='b')#versicolor
plt.scatter(X[y==2,0],X[y==2,1],color='g')#virginnica
plt.show()


萼片散点图.png

# 绘制花瓣维度
X = iris.data[:, 2:]
X
# 绘制花瓣散点图
plt.scatter(X[y==0,0],X[y==0,1],color='r')#setosa
plt.scatter(X[y==1,0],X[y==1,1],color='b')#versicolor
plt.scatter(X[y==2,0],X[y==2,1],color='g')#virginnica
plt.show()


花瓣散点图.png


自己实现KNN算法

import numpy as np
from matplotlib import pyplot as plt
raw_data_X = [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343808831, 3.368360954],
              [3.582294042, 4.679179110],
              [2.280362439, 2.866990263],
              [7.423436942, 4.696522875],
              [5.745051997, 3.533989803],
              [9.172168622, 2.511101045],
              [7.792783481, 3.424088941],
              [7.939820817, 0.791637231]
             ]
raw_data_X
[[3.393533211, 2.331273381],
 [3.110073483, 1.781539638],
 [1.343808831, 3.368360954],
 [3.582294042, 4.67917911],
 [2.280362439, 2.866990263],
 [7.423436942, 4.696522875],
 [5.745051997, 3.533989803],
 [9.172168622, 2.511101045],
 [7.792783481, 3.424088941],
 [7.939820817, 0.791637231]]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
# 转化成ndarray类型
X_train = np.array(raw_data_X)
X_train
array([[3.39353321, 2.33127338],
       [3.11007348, 1.78153964],
       [1.34380883, 3.36836095],
       [3.58229404, 4.67917911],
       [2.28036244, 2.86699026],
       [7.42343694, 4.69652288],
       [5.745052  , 3.5339898 ],
       [9.17216862, 2.51110105],
       [7.79278348, 3.42408894],
       [7.93982082, 0.79163723]])
y_train=np.array(raw_data_y)
y_train
[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
### 对数据进行可视化
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
plt.show()


散点图.png

预测

# 设新来一个样本数据判断x是恶性还是良性
x = np.array([8.093607318, 3.365731514])
plt.scatter(X_train[y_train==0,0],X_train[y_train==0,1],color='g')
plt.scatter(X_train[y_train==1,0],X_train[y_train==1,1],color='r')
plt.scatter(x[0],x[1],color='b')
plt.show()


散点图.png

通过knn算法来预测

from math import sqrt
# 计算x距离所有的是十个个点的距离,然后选距离最近的前k个
# distances = []
# for x_train in X_train:
#     d = sqrt(np.sum((x_train-x)**2))
#     distances.append(d)
distances = [sqrt(np.sum((x_train-x)**2)) for x_train in X_train]
distances
[4.812566907609877,
 5.229270827235305,
 6.749798999160064,
 4.6986266144110695,
 5.83460014556857,
 1.4900114024329525,
 2.354574897431513,
 1.3761132675144652,
 0.3064319992975,
 2.5786840957478887]
nearst = np.argsort(distances)
nearst
array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)
# 假设我们指定K的值是6
k =6 
top_k_y =[y_train[i] for i in  nearst[:6]]
top_k_y
[1, 1, 1, 1, 1, 0]
# 数据统计量大的话使用的统计办法
from collections import Counter
votes = Counter(top_k_y)
votes
Counter({1: 5, 0: 1})
# 返回数量前 i 的数据信息
votes.most_common(1)
[(1, 5)]
predict_y = votes.most_common(1)[0][0]
predict_y
1
#x患者是恶性肿瘤的可能性大

https://www.jianshu.com/p/d5e5a5eb582d

Python量化投资网携手4326手游为资深游戏玩家推荐:《《消零世界》:七夕快乐!夏米尔表情包已上架微信,官方贴吧也已经开通啦~

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!
0 条回复 A 作者 M 管理员
    所有的伟大,都源于一个勇敢的开始!
欢迎您,新朋友,感谢参与互动!欢迎您 {{author}},您在本站有{{commentsCount}}条评论