numpy基础 – Python量化投资

numpy基础

numpy.array 基础

import numpy
numpy.__version__
'1.12.1'
import numpy as np
np.__version__
'1.12.1'

Python List的特点

L = [i for i in range(10)]
L
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
L[5]
5
L[5] = 100
L
[0, 1, 2, 3, 4, 100, 6, 7, 8, 9]
L[5] = "Machine Learning"
L
[0, 1, 2, 3, 4, 'Machine Learning', 6, 7, 8, 9]

Python的List不要求存储同样的类型,带来效率问题。

import array
arr = array.array('i', [i for i in range(10)])
arr
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5]
5
arr[5] = 100
arr
array('i', [0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
arr[5] = "Machine Learning"
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-14-e74bffddd7b6> in <module>()
----> 1 arr[5] = "Machine Learning"
TypeError: an integer is required (got type str)
arr[5] = 5.0
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-f30bba6fbd5a> in <module>()
----> 1 arr[5] = 5.0
TypeError: integer argument expected, got float

array的缺点是没有将数据当做向量或者矩阵,不支持基本运算。

numpy.array

nparr = np.array([i for i in range(10)])
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
nparr[5] = 100
nparr
array([  0,   1,   2,   3,   4, 100,   6,   7,   8,   9])
nparr[5] = "Machine Learning"
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-21-df6cd071861b> in <module>()
----> 1 nparr[5] = "Machine Learning"
ValueError: invalid literal for int() with base 10: 'Machine Learning'
nparr.dtype
dtype('int64')
nparr[5] = 5.0
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
nparr.dtype
dtype('int64')
nparr[5] = 3.14
nparr
array([0, 1, 2, 3, 4, 3, 6, 7, 8, 9])
nparr2 = np.array([1, 2, 3.0])
nparr2.dtype
dtype('float64')

创建 numpy.array

import numpy as np

numpy.array

nparr = np.array([i for i in range(10)])
nparr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

其他创建 numpy.array 的方法

zeros

np.zeros(10)
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
np.zeros(10, dtype=float)
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
np.zeros((3, 5))
array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])
np.zeros(shape=(3, 5), dtype=int)
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

ones

np.ones(10)
array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])
np.ones((3, 5))
array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

full

np.full((3, 5), 666)
array([[666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666]])
np.full(fill_value=666, shape=(3, 5))
array([[666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666]])

arange

[i for i in range(0, 20, 2)]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
np.arange(0, 20, 2)
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
[i for i in range(0, 1, 0.2)]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-d0579096bf02> in <module>()
----> 1 [i for i in range(0, 1, 0.2)]
TypeError: 'float' object cannot be interpreted as an integer
np.arange(0, 1, 0.2)
array([ 0. ,  0.2,  0.4,  0.6,  0.8])
[i for i in range(0, 10)]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
np.arange(0, 10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
[i for i in range(10)]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

linspace

np.linspace(0, 20, 10)
array([  0.        ,   2.22222222,   4.44444444,   6.66666667,
         8.88888889,  11.11111111,  13.33333333,  15.55555556,
        17.77777778,  20.        ])
np.linspace(0, 20, 11)
array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.])
np.linspace(0, 1, 5)
array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

random

randint
np.random.randint(0, 10)    # [0, 10)之间的随机数
5
np.random.randint(0, 10, 10)
array([2, 6, 1, 8, 1, 6, 8, 0, 1, 4])
np.random.randint(0, 1, 10)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.random.randint(0, 10, size=10)
array([3, 4, 9, 9, 5, 2, 3, 3, 2, 1])
np.random.randint(0, 10, size=(3,5))
array([[1, 5, 3, 8, 5],
       [2, 7, 9, 6, 0],
       [0, 9, 9, 9, 7]])
np.random.randint(10, size=(3,5))
array([[4, 8, 3, 7, 2],
       [9, 9, 2, 4, 4],
       [1, 5, 1, 7, 7]])
seed
np.random.seed(666)
np.random.randint(0, 10, size=(3, 5))
array([[2, 6, 9, 4, 3],
       [1, 0, 8, 7, 5],
       [2, 5, 5, 4, 8]])
np.random.seed(666)
np.random.randint(0, 10, size=(3,5))
array([[2, 6, 9, 4, 3],
       [1, 0, 8, 7, 5],
       [2, 5, 5, 4, 8]])
random
np.random.random()
0.7315955468480113
np.random.random((3,5))
array([[ 0.8578588 ,  0.76741234,  0.95323137,  0.29097383,  0.84778197],
       [ 0.3497619 ,  0.92389692,  0.29489453,  0.52438061,  0.94253896],
       [ 0.07473949,  0.27646251,  0.4675855 ,  0.31581532,  0.39016259]])
normal
np.random.normal()
0.9047266176428719
np.random.normal(10, 100)
-72.62832650185376
np.random.normal(0, 1, (3, 5))
array([[ 0.82101369,  0.36712592,  1.65399586,  0.13946473, -1.21715355],
       [-0.99494737, -1.56448586, -1.62879004,  1.23174866, -0.91360034],
       [-0.27084407,  1.42024914, -0.98226439,  0.80976498,  1.85205227]])

np.random.<TAB> 查看random中的更多方法

np.random?
np.random.normal?
help(np.random.normal)
Help on built-in function normal:
normal(...) method of mtrand.RandomState instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : float or array_like of floats
        Standard deviation (spread or "width") of the distribution.
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``loc`` and ``scale`` are both scalars.
        Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.
    
    Returns
    -------
    out : ndarray or scalar
        Drawn samples from the parameterized normal distribution.
    
    See Also
    --------
    scipy.stats.norm : probability density function, distribution or
        cumulative density function, etc.
    
    Notes
    -----
    The probability density for the Gaussian distribution is
    
    .. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                     e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },
    
    where :math:`\mu` is the mean and :math:`\sigma` the standard
    deviation. The square of the standard deviation, :math:`\sigma^2`,
    is called the variance.
    
    The function has its peak at the mean, and its "spread" increases with
    the standard deviation (the function reaches 0.607 times its maximum at
    :math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
    `numpy.random.normal` is more likely to return samples lying close to
    the mean, rather than those far away.
    
    References
    ----------
    .. [1] Wikipedia, "Normal distribution",
           http://en.wikipedia.org/wiki/Normal_distribution
    .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
           Random Variables and Random Signal Principles", 4th ed., 2001,
           pp. 51, 51, 125.
    
    Examples
    --------
    Draw samples from the distribution:
    
    >>> mu, sigma = 0, 0.1 # mean and standard deviation
    >>> s = np.random.normal(mu, sigma, 1000)
    
    Verify the mean and the variance:
    
    >>> abs(mu - np.mean(s)) < 0.01
    True
    
    >>> abs(sigma - np.std(s, ddof=1)) < 0.01
    True
    
    Display the histogram of the samples, along with
    the probability density function:
    
    >>> import matplotlib.pyplot as plt
    >>> count, bins, ignored = plt.hist(s, 30, normed=True)
    >>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
    ...                np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
    ...          linewidth=2, color='r')
    >>> plt.show()

numpy.array 基本操作

import numpy as np
np.random.seed(0)
x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X = np.arange(15).reshape((3, 5))
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

numpy.array 的基本属性

x.ndim
1
X.ndim
2
x.shape
(10,)
X.shape
(3, 5)
x.size
10
X.size
15

numpy.array 的数据访问

x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[0]
0
x[-1]
9
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
X[0][0] # 不建议!
0
X[0, 0]
0
X[0, -1]
4
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[0:5]
array([0, 1, 2, 3, 4])
x[:5]
array([0, 1, 2, 3, 4])
x[5:]
array([5, 6, 7, 8, 9])
x[4:7]
array([4, 5, 6])
x[::2]
array([0, 2, 4, 6, 8])
x[1::2]
array([1, 3, 5, 7, 9])
x[::-1]
array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
X[:2, :3]
array([[0, 1, 2],
       [5, 6, 7]])
X[:2][:3] # 结果不一样,在numpy中使用","做多维索引
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
X[:2, ::2]
array([[0, 2, 4],
       [5, 7, 9]])
X[::-1, ::-1]
array([[14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])
X[0, :]
array([0, 1, 2, 3, 4])
X[:, 0]
array([ 0,  5, 10])

Subarray of numpy.array

subX = X[:2, :3]
subX
array([[0, 1, 2],
       [5, 6, 7]])
subX[0, 0] = 100
subX
array([[100,   1,   2],
       [  5,   6,   7]])
X
array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])
X[0, 0] = 0
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
subX
array([[0, 1, 2],
       [5, 6, 7]])
subX = X[:2, :3].copy()
subX[0, 0] = 100
subX
array([[100,   1,   2],
       [  5,   6,   7]])
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

Reshape

x.shape
(10,)
x.ndim
1
x.reshape(2, 5)
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
A = x.reshape(2, 5)
A
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
B = x.reshape(1, 10)
B
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
B.ndim
2
B.shape
(1, 10)
x.reshape(-1, 10)
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
x.reshape(10, -1)
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])
x.reshape(2, -1)
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
x.reshape(3, -1)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-53-12a588b09f7f> in <module>()
----> 1 x.reshape(3, -1)
ValueError: cannot reshape array of size 10 into shape (3,newaxis)

numpy.array 合并和分割

import numpy as np

numpy.array 的合并

x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])
array([1, 2, 3, 3, 2, 1])
z = np.array([666, 666, 666])
np.concatenate([x, y, z])
array([  1,   2,   3,   3,   2,   1, 666, 666, 666])
A = np.array([[1, 2, 3],
              [4, 5, 6]])
np.concatenate([A, A])
array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])
np.concatenate([A, A], axis=1)
array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])
np.concatenate([A, z])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-148a822297cf> in <module>()
----> 1 np.concatenate([A, z])
ValueError: all the input arrays must have same number of dimensions
np.concatenate([A, z.reshape(1, -1)])
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])
np.vstack([A, z])
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 666, 666]])
B = np.full((2,2), 100)
np.hstack([A, B])
array([[  1,   2,   3, 100, 100],
       [  4,   5,   6, 100, 100]])
np.hstack([A, z])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-d5b9fc6fb0a8> in <module>()
----> 1 np.hstack([A, z])
/Users/yuanzhang/anaconda/lib/python3.6/site-packages/numpy/core/shape_base.py in hstack(tup)
    286         return _nx.concatenate(arrs, 0)
    287     else:
--> 288         return _nx.concatenate(arrs, 1)
    289 
    290 def stack(arrays, axis=0):
ValueError: all the input arrays must have same number of dimensions

numpy.array 的分割

x = np.arange(10)
x    
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x1, x2, x3 = np.split(x, [3, 7])
x1
array([0, 1, 2])
x2
array([3, 4, 5, 6])
x3
array([7, 8, 9])
x1, x2 = np.split(x, [5])
x1
array([0, 1, 2, 3, 4])
x2
array([5, 6, 7, 8, 9])
A = np.arange(16).reshape((4, 4))
A
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
A1, A2 = np.split(A, [2])
A1
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
A2
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
A1, A2 = np.split(A, [2], axis=1)
A1
array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])
A2
array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])
upper, lower = np.vsplit(A, [2])
upper
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
lower
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
left, right = np.hsplit(A, [2])
left
array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])
right
array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])
data = np.arange(16).reshape((4, 4))
data
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
X, y = np.hsplit(data, [-1])
X
array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])
y
array([[ 3],
       [ 7],
       [11],
       [15]])
y[:, 0]
array([ 3,  7, 11, 15])

numpy.array 中的运算

给定一个数组,让数组中每一个数乘以2

n = 10
L = [i for i in range(n)]
2 * L
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
A = []
for e in L:
    A.append(2*e)
n = 1000000
L = [i for i in range(n)]
%%time
A = []
for e in L:
    A.append(2*e)
CPU times: user 253 ms, sys: 30 ms, total: 283 ms
Wall time: 303 ms
%%time
A = [2*e for e in L]
CPU times: user 93.6 ms, sys: 25.8 ms, total: 119 ms
Wall time: 128 ms
import numpy as np
L = np.arange(n)
%%time
A = np.array(2*e for e in L)
CPU times: user 15.1 ms, sys: 8.97 ms, total: 24.1 ms
Wall time: 24.8 ms
%%time
A = 2 * L
CPU times: user 3.79 ms, sys: 4.36 ms, total: 8.14 ms
Wall time: 8.03 ms
n = 10
L = np.arange(n)
2 * L
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

NumPy’s UFuncs (Universal Functions)

X = np.arange(1, 16).reshape((3, 5))
X
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
X + 1
array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16]])
X - 1
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
X * 2
array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30]])
X / 2
array([[ 0.5,  1. ,  1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ,  4.5,  5. ],
       [ 5.5,  6. ,  6.5,  7. ,  7.5]])
X // 2
array([[0, 1, 1, 2, 2],
       [3, 3, 4, 4, 5],
       [5, 6, 6, 7, 7]])
X ** 2
array([[  1,   4,   9,  16,  25],
       [ 36,  49,  64,  81, 100],
       [121, 144, 169, 196, 225]])
X % 2
array([[1, 0, 1, 0, 1],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1]])
1 / X
array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ,  0.2       ],
       [ 0.16666667,  0.14285714,  0.125     ,  0.11111111,  0.1       ],
       [ 0.09090909,  0.08333333,  0.07692308,  0.07142857,  0.06666667]])
np.abs(X)
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
np.sin(X)
array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111],
       [-0.99999021, -0.53657292,  0.42016704,  0.99060736,  0.65028784]])
np.cos(X)
array([[ 0.54030231, -0.41614684, -0.9899925 , -0.65364362,  0.28366219],
       [ 0.96017029,  0.75390225, -0.14550003, -0.91113026, -0.83907153],
       [ 0.0044257 ,  0.84385396,  0.90744678,  0.13673722, -0.75968791]])
np.tan(X)
array([[  1.55740772e+00,  -2.18503986e+00,  -1.42546543e-01,
          1.15782128e+00,  -3.38051501e+00],
       [ -2.91006191e-01,   8.71447983e-01,  -6.79971146e+00,
         -4.52315659e-01,   6.48360827e-01],
       [ -2.25950846e+02,  -6.35859929e-01,   4.63021133e-01,
          7.24460662e+00,  -8.55993401e-01]])
np.arctan(X)
array([[ 0.78539816,  1.10714872,  1.24904577,  1.32581766,  1.37340077],
       [ 1.40564765,  1.42889927,  1.44644133,  1.46013911,  1.47112767],
       [ 1.48013644,  1.48765509,  1.49402444,  1.49948886,  1.50422816]])
np.exp(X)
array([[  2.71828183e+00,   7.38905610e+00,   2.00855369e+01,
          5.45981500e+01,   1.48413159e+02],
       [  4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
          8.10308393e+03,   2.20264658e+04],
       [  5.98741417e+04,   1.62754791e+05,   4.42413392e+05,
          1.20260428e+06,   3.26901737e+06]])
np.exp2(X)
array([[  2.00000000e+00,   4.00000000e+00,   8.00000000e+00,
          1.60000000e+01,   3.20000000e+01],
       [  6.40000000e+01,   1.28000000e+02,   2.56000000e+02,
          5.12000000e+02,   1.02400000e+03],
       [  2.04800000e+03,   4.09600000e+03,   8.19200000e+03,
          1.63840000e+04,   3.27680000e+04]])
np.power(3, X)
array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]])
np.log(X)
array([[ 0.        ,  0.69314718,  1.09861229,  1.38629436,  1.60943791],
       [ 1.79175947,  1.94591015,  2.07944154,  2.19722458,  2.30258509],
       [ 2.39789527,  2.48490665,  2.56494936,  2.63905733,  2.7080502 ]])
np.log2(X)
array([[ 0.        ,  1.        ,  1.5849625 ,  2.        ,  2.32192809],
       [ 2.5849625 ,  2.80735492,  3.        ,  3.169925  ,  3.32192809],
       [ 3.45943162,  3.5849625 ,  3.70043972,  3.80735492,  3.9068906 ]])
np.log10(X)
array([[ 0.        ,  0.30103   ,  0.47712125,  0.60205999,  0.69897   ],
       [ 0.77815125,  0.84509804,  0.90308999,  0.95424251,  1.        ],
       [ 1.04139269,  1.07918125,  1.11394335,  1.14612804,  1.17609126]])

矩阵运算

A = np.arange(4).reshape(2, 2)
A
array([[0, 1],
       [2, 3]])
B = np.full((2, 2), 10)
B
array([[10, 10],
       [10, 10]])
A + B
array([[10, 11],
       [12, 13]])
A - B
array([[-10,  -9],
       [ -8,  -7]])
A * B
array([[ 0, 10],
       [20, 30]])
A.dot(B)
array([[10, 10],
       [50, 50]])
A.T
array([[0, 2],
       [1, 3]])
C = np.full((3, 3), 666)
A + C
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-145-cb7c4a36a7ba> in <module>()
----> 1 A + C
ValueError: operands could not be broadcast together with shapes (2,2) (3,3) 

向量和矩阵的运算

加法

v = np.array([1, 2])
v + A
array([[1, 3],
       [3, 5]])

v + A 是可以的,但是在这个课程中,我们不研究其中的计算法则。有兴趣的同学可以查询资料自学numpy.array的broadcast

np.vstack([v] * A.shape[0])
array([[1, 2],
       [1, 2]])
np.vstack([v] * A.shape[0]) + A
array([[1, 3],
       [3, 5]])
np.tile(v, (2, 1))
array([[1, 2],
       [1, 2]])
np.tile(v, (2, 1)) + A
array([[1, 3],
       [3, 5]])
np.tile(v, (2, 2))
array([[1, 2, 1, 2],
       [1, 2, 1, 2]])

乘法

v * A
array([[0, 2],
       [2, 6]])
v.dot(A)
array([4, 7])
A.dot(v)
array([2, 8])

矩阵的逆

np.linalg.inv(A)
array([[-1.5,  0.5],
       [ 1. ,  0. ]])
invA = np.linalg.inv(A)
A.dot(invA)
array([[ 1.,  0.],
       [ 0.,  1.]])
invA.dot(A)
array([[ 1.,  0.],
       [ 0.,  1.]])
X = np.arange(16).reshape((2, 8))
invX = np.linalg.inv(X)
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-207-60b1a25f4891> in <module>()
----> 1 invX = np.linalg.inv(X)
/Users/yuanzhang/anaconda/lib/python3.6/site-packages/numpy/linalg/linalg.py in inv(a)
    515     a, wrap = _makearray(a)
    516     _assertRankAtLeast2(a)
--> 517     _assertNdSquareness(a)
    518     t, result_t = _commonType(a)
    519 
/Users/yuanzhang/anaconda/lib/python3.6/site-packages/numpy/linalg/linalg.py in _assertNdSquareness(*arrays)
    210     for a in arrays:
    211         if max(a.shape[-2:]) != min(a.shape[-2:]):
--> 212             raise LinAlgError('Last 2 dimensions of the array must be square')
    213 
    214 def _assertFinite(*arrays):
LinAlgError: Last 2 dimensions of the array must be square

矩阵的伪逆

pinvX = np.linalg.pinv(X)
pinvX
array([[ -1.35416667e-01,   5.20833333e-02],
       [ -1.01190476e-01,   4.16666667e-02],
       [ -6.69642857e-02,   3.12500000e-02],
       [ -3.27380952e-02,   2.08333333e-02],
       [  1.48809524e-03,   1.04166667e-02],
       [  3.57142857e-02,   8.67361738e-18],
       [  6.99404762e-02,  -1.04166667e-02],
       [  1.04166667e-01,  -2.08333333e-02]])
X.dot(pinvX)
array([[  1.00000000e+00,  -9.71445147e-17],
       [ -1.33226763e-15,   1.00000000e+00]])

矩阵的伪逆又被称为“广义逆矩阵”,有兴趣的同学可以翻看线性教材课本查看更多额广义逆矩阵相关的性质。中文wiki链接: https://zh.wikipedia.org/wiki/%E5%B9%BF%E4%B9%89%E9%80%86%E9%98%B5



Numpy 中的聚合操作

sum

import numpy as np
L = np.random.random(100)
sum(L)
52.675554310672098
np.sum(L)
52.675554310672105
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)
10 loops, best of 3: 173 ms per loop
1000 loops, best of 3: 1.02 ms per loop

min, max

np.min(big_array)
2.2765289564574687e-07
np.max(big_array)
0.99999686126703025
big_array.min()
2.2765289564574687e-07
big_array.max()
0.99999686126703025
big_array.sum()
500454.89231729991

多维度聚合

X = np.arange(16).reshape(4,-1)
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
np.sum(X)
120
np.sum(X, axis=0)
array([24, 28, 32, 36])
np.sum(X, axis=1)
array([ 6, 22, 38, 54])

注意:axis描述的是将要被压缩的维度。

其他聚合操作

np.prod(X)
0
np.prod(X + 1)
20922789888000
np.mean(X)
7.5
np.median(X)
7.5
v = np.array([1, 1, 2, 2, 10])
np.mean(v)
3.2000000000000002
np.median(v)
2.0
np.percentile(big_array, q=50)
0.50056612640031206
np.median(big_array)
0.50056612640031206
np.percentile(big_array, q=100)
0.99999686126703025
np.max(big_array)
0.99999686126703025
for percent in [0, 25, 50, 75, 100]:
    print(np.percentile(big_array, q=percent))
2.27652895646e-07
0.250501365819
0.5005661264
0.750543416185
0.999996861267
np.var(big_array)
0.083379660489048227
np.std(big_array)
0.28875536443336985
x = np.random.normal(0, 1, 1000000)
np.mean(x)
-0.00044876833100538597
np.std(x)
1.0000457010611321

Numpy 中arg运算

import numpy as np
x = np.random.normal(0, 1, 1000000)

索引

np.argmin(x)
886266
x[886266]
-4.8354963762015108
np.min(x)
-4.8354963762015108
np.argmax(x)
4851
x[4851]
4.5860138951376461
np.max(x)
4.5860138951376461

排序和使用索引

x = np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
np.random.shuffle(x)
x
array([13,  2,  6,  7, 11, 10,  3,  4,  8,  0,  5,  1,  9, 14, 12, 15])
np.sort(x)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x
array([13,  2,  6,  7, 11, 10,  3,  4,  8,  0,  5,  1,  9, 14, 12, 15])
x.sort()
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
X = np.random.randint(10, size=(4,4))
X
array([[8, 8, 5, 8],
       [1, 2, 2, 4],
       [5, 5, 9, 9],
       [3, 9, 3, 4]])
np.sort(X, axis=0)
array([[1, 2, 2, 4],
       [3, 5, 3, 4],
       [5, 8, 5, 8],
       [8, 9, 9, 9]])
np.sort(X, axis=1)
array([[5, 8, 8, 8],
       [1, 2, 2, 4],
       [5, 5, 9, 9],
       [3, 3, 4, 9]])

使用索引

x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
np.random.shuffle(x)
x
array([14, 15,  8,  7, 10,  4,  9,  1,  6,  5,  3, 12,  2, 11,  0, 13])
np.argsort(x)
array([14,  7, 12, 10,  5,  9,  8,  3,  2,  6,  4, 13, 11, 15,  0,  1])
np.partition(x, 3)
array([ 1,  0,  2,  3,  4,  5,  7,  8,  6,  9, 10, 12, 11, 13, 15, 14])
np.argpartition(x, 3)
array([ 7, 14, 12, 10,  5,  9,  3,  2,  8,  6,  4, 11, 13, 15,  1,  0])
X
array([[8, 8, 5, 8],
       [1, 2, 2, 4],
       [5, 5, 9, 9],
       [3, 9, 3, 4]])
np.argsort(X, axis=1)
array([[2, 0, 1, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 2, 3, 1]])
np.argpartition(X, 2, axis=1)
array([[2, 1, 0, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 2, 3, 1]])

Numpy 中的比较和Fancy Indexing

Fancy Indexing

import numpy as np
x = np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x[3]
3
x[3:9]
array([3, 4, 5, 6, 7, 8])
x[3:9:2]
array([3, 5, 7])
[x[3], x[5], x[7]]
[3, 5, 7]
ind = [3, 5, 7]
x[ind]
array([3, 5, 7])
ind = np.array([[0, 2], [1, 3]])
x[ind]
array([[0, 2],
       [1, 3]])

Fancy Indexing 应用在二维数组

X = x.reshape(4, -1)
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
row = np.array([0, 1, 2])
col = np.array([1, 2, 3])
X[row, col]
array([ 1,  6, 11])
X[0, col]
array([1, 2, 3])
X[:2, col]
array([[1, 2, 3],
       [5, 6, 7]])
col = [True, False, True, True]
X[0, col]
array([0, 2, 3])

numpy.array 的比较

x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x < 3
array([ True,  True,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False], dtype=bool)
x > 3
array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True], dtype=bool)
x <= 3
array([ True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False], dtype=bool)
x >= 3
array([False, False, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True], dtype=bool)
x == 3
array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False], dtype=bool)
x != 3
array([ True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True], dtype=bool)
2 * x == 24 - 4 * x
array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False], dtype=bool)
X < 6
array([[ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]], dtype=bool)

使用 numpy.array 的比较结果

np.count_nonzero( x <= 3)
4
np.sum(x <= 3)
4
np.sum(X % 2 == 0, axis=0)
array([4, 0, 4, 0])
np.sum(X % 2 == 0, axis=1)
array([2, 2, 2, 2])
np.any(x == 0)
True
np.any(x < 0)
False
np.all(x > 0)
False
np.all(x >= 0)
True
np.all(X > 0, axis=1)
array([False,  True,  True,  True], dtype=bool)
np.sum((x > 3) & (x < 10))
6
np.sum((x > 3) && (x < 10))
  File "<ipython-input-45-780ca9b7c144>", line 1
    np.sum((x > 3) && (x < 10))
                    ^
SyntaxError: invalid syntax
np.sum((x % 2 == 0) | (x > 10))
11
np.sum(~(x == 0))
15

比较结果和Fancy Indexing

x < 5
array([ True,  True,  True,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False], dtype=bool)
x[x < 5]
array([0, 1, 2, 3, 4])
x[x % 2 == 0]
array([ 0,  2,  4,  6,  8, 10, 12, 14])
X[X[:,3] % 3 == 0, :]
array([[ 0,  1,  2,  3],
       [12, 13, 14, 15]])

https://www.jianshu.com/p/37639e2aaa13

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!
0 条回复 A 作者 M 管理员
    所有的伟大,都源于一个勇敢的开始!
欢迎您,新朋友,感谢参与互动!欢迎您 {{author}},您在本站有{{commentsCount}}条评论