NumPy基础（二）(新手速来！)_飞道的博客

NumPy基础（二）(新手速来！)

2020-06-04 20:21 525人阅读评论(0)

NumPy 是一个为 Python 提供高性能向量、矩阵和高维数据结构的科学计算库。它通过 C 和 Fortran 实现，因此用向量和矩阵建立方程并实现数值计算有非常好的性能。NumPy 基本上是所有使用 Python 进行数值计算的框架和包的基础，例如 TensorFlow 和 PyTorch，构建机器学习模型最基础的内容就是学会使用 NumPy 搭建计算过程。

基础运算

数组中的算术运算一般是元素级的运算，运算结果会产生一个新的数组。如下所示减法、加法、平方、对应元素乘积和逻辑运算都是元素级的操作。


  
   
    
     
    
    
     
      >>> a = np.array( [
      20,
      30,
      40,
      50] )
     
    
   
    
     
    
    
     
      >>> b = np.arange( 
      4 )
     
    
   
    
     
    
    
     
      >>> b
     
    
   
    
     
    
    
     
      array([
      0, 
      1, 
      2, 
      3])
     
    
   
    
     
    
    
     
      >>> c = a-b
     
    
   
    
     
    
    
     
      >>> c
     
    
   
    
     
    
    
     
      array([
      20, 
      29, 
      38, 
      47])
     
    
   
    
     
    
    
     
      >>> b**
      2
     
    
   
    
     
    
    
     
      array([
      0, 
      1, 
      4, 
      9])
     
    
   
    
     
    
    
     
      >>> 
      10*np.sin(a)
     
    
   
    
     
    
    
     
      array([ 
      9.12945251, 
      -9.88031624,  
      7.4511316 , 
      -2.62374854])
     
    
   
    
     
    
    
     
      >>> a<
      35
     
    
   
    
     
    
    
     
      array([ 
      True, 
      True, 
      False, 
      False])

不同于许多科学计算语言，乘法算子 * 或 multiple 函数在 NumPy 数组中用于元素级的乘法运算，矩阵乘法可用 dot 函数或方法来执行。


  
   
    
     
    
    
     
      >>> A = np.array( [[
      1,
      1],
     
    
   
    
     
    
    
     
      ...             [
      0,
      1]] )
     
    
   
    
     
    
    
     
      >>> B = np.array( [[
      2,
      0],
     
    
   
    
     
    
    
     
      ...             [
      3,
      4]] )
     
    
   
    
     
    
    
     
      >>> A*B                         
      # elementwise product
     
    
   
    
     
    
    
     
      array([[
      2, 
      0],
     
    
   
    
     
    
    
     
             [
      0, 
      4]])
     
    
   
    
     
    
    
     
      >>> A.dot(B)                    
      # matrix product
     
    
   
    
     
    
    
     
      array([[
      5, 
      4],
     
    
   
    
     
    
    
     
             [
      3, 
      4]])
     
    
   
    
     
    
    
     
      >>> np.dot(A, B)                
      # another matrix product
     
    
   
    
     
    
    
     
      array([[
      5, 
      4],
     
    
   
    
     
    
    
     
             [
      3, 
      4]])

有一些操作，如 += 和 *=，其输出结果会改变一个已存在的数组，而不是如上述运算创建一个新数组。


  
   
    
     
    
    
     
      >>> a = np.ones((
      2,
      3), dtype=int)
     
    
   
    
     
    
    
     
      >>> b = np.random.random((
      2,
      3))
     
    
   
    
     
    
    
     
      >>> a *= 
      3
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[
      3, 
      3, 
      3],
     
    
   
    
     
    
    
     
             [
      3, 
      3, 
      3]])
     
    
   
    
     
    
    
     
      >>> b += a
     
    
   
    
     
    
    
     
      >>> b
     
    
   
    
     
    
    
     
      array([[ 
      3.417022  ,  
      3.72032449,  
      3.00011437],
     
    
   
    
     
    
    
     
             [ 
      3.30233257,  
      3.14675589,  
      3.09233859]])
     
    
   
    
     
    
    
     
      >>> a += b                  
      # b is not automatically converted to integer type
     
    
   
    
     
    
    
     
      Traceback (most recent call last):
     
    
   
    
     
    
    
     
        ...
     
    
   
    
     
    
    
     
      TypeError: Cannot cast ufunc add output 
      from dtype( float64 ) to dtype( int64 ) 
      with casting rule  same_kind

当操作不同数据类型的数组时，最后输出的数组类型一般会与更普遍或更精准的数组相同（这种行为叫做 Upcasting）。


  
   
    
     
    
    
     
      >>> a = np.ones(
      3, dtype=np.int32)
     
    
   
    
     
    
    
     
      >>> b = np.linspace(
      0,pi,
      3)
     
    
   
    
     
    
    
     
      >>> b.dtype.name
     
    
   
    
     
    
    
     
       float64
     
    
   
    
     
    
    
     
      >>> c = a+b
     
    
   
    
     
    
    
     
      >>> c
     
    
   
    
     
    
    
     
      array([ 
      1.        ,  
      2.57079633,  
      4.14159265])
     
    
   
    
     
    
    
     
      >>> c.dtype.name
     
    
   
    
     
    
    
     
       float64
     
    
   
    
     
    
    
     
      >>> d = np.exp(c*
      1j)
     
    
   
    
     
    
    
     
      >>> d
     
    
   
    
     
    
    
     
      array([ 
      0.54030231+
      0.84147098j, 
      -0.84147098+
      0.54030231j,
     
    
   
    
     
    
    
            
      -0.54030231
      -0.84147098j])
     
    
   
    
     
    
    
     
      >>> d.dtype.name
     
    
   
    
     
    
    
     
       complex128

许多一元运算，如计算数组中所有元素的总和，是属于 ndarray 类的方法。


  
   
    
     
    
    
     
      >>> a = np.random.random((
      2,
      3))
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[ 
      0.18626021,  
      0.34556073,  
      0.39676747],
     
    
   
    
     
    
    
     
             [ 
      0.53881673,  
      0.41919451,  
      0.6852195 ]])
     
    
   
    
     
    
    
     
      >>> a.sum()
     
    
   
    
     
    
    
     
      2.5718191614547998
     
    
   
    
     
    
    
     
      >>> a.min()
     
    
   
    
     
    
    
     
      0.1862602113776709
     
    
   
    
     
    
    
     
      >>> a.max()
     
    
   
    
     
    
    
     
      0.6852195003967595

默认状态下，这些运算会把数组视为一个数列而不论它的 shape。然而，如果在指定 axis 参数下，你可以指定针对哪一个维度进行运算。如下 axis=0 将针对每一个列进行运算，例如 b.sum(axis=0) 将矩阵 b 中每一个列的所有元素都相加为一个标量。


  
   
    
     
    
    
     
      >>> b = np.arange(
      12).reshape(
      3,
      4)
     
    
   
    
     
    
    
     
      >>> b
     
    
   
    
     
    
    
     
      array([[ 
      0,  
      1,  
      2,  
      3],
     
    
   
    
     
    
    
     
             [ 
      4,  
      5,  
      6,  
      7],
     
    
   
    
     
    
    
     
             [ 
      8,  
      9, 
      10, 
      11]])
     
    
   
    
     
    
    
     
      >>>
     
    
   
    
     
    
    
     
      >>> b.sum(axis=
      0)                            
      # sum of each column
     
    
   
    
     
    
    
     
      array([
      12, 
      15, 
      18, 
      21])
     
    
   
    
     
    
    
     
      >>>
     
    
   
    
     
    
    
     
      >>> b.min(axis=
      1)                            
      # min of each row
     
    
   
    
     
    
    
     
      array([
      0, 
      4, 
      8])
     
    
   
    
     
    
    
     
      >>>
     
    
   
    
     
    
    
     
      >>> b.cumsum(axis=
      1)                         
      # cumulative sum along each row
     
    
   
    
     
    
    
     
      array([[ 
      0,  
      1,  
      3,  
      6],
     
    
   
    
     
    
    
     
             [ 
      4,  
      9, 
      15, 
      22],
     
    
   
    
     
    
    
     
             [ 
      8, 
      17, 
      27, 
      38]])

索引、截取和迭代

一维数组可以被索引、截取（Slicing）和迭代，就像 Python 列表和元组一样。注意其中 a[0:6:2] 表示从第 1 到第 6 个元素，并对每两个中的第二个元素进行操作。


  
   
    
     
    
    
     
      >>> a = np.arange(
      10)**
      3
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([  
      0,   
      1,   
      8,  
      27,  
      64, 
      125, 
      216, 
      343, 
      512, 
      729])
     
    
   
    
     
    
    
     
      >>> a[
      2]
     
    
   
    
     
    
    
     
      8
     
    
   
    
     
    
    
     
      >>> a[
      2:
      5]
     
    
   
    
     
    
    
     
      array([ 
      8, 
      27, 
      64])
     
    
   
    
     
    
    
     
      >>> a[:
      6:
      2] = 
      -1000    
      # equivalent to a[0:6:2] = -1000; from start to position 6, exclusive, set every 2nd element to -1000
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([
      -1000,     
      1, 
      -1000,    
      27, 
      -1000,   
      125,   
      216,   
      343,   
      512,   
      729])
     
    
   
    
     
    
    
     
      >>> a[ : :
      -1]                                 
      # reversed a
     
    
   
    
     
    
    
     
      array([  
      729,   
      512,   
      343,   
      216,   
      125, 
      -1000,    
      27, 
      -1000,     
      1, 
      -1000])
     
    
   
    
     
    
    
     
      >>> 
      for i 
      in a:
     
    
   
    
     
    
    
     
      ...     print(i**(
      1/
      3.))
     
    
   
    
     
    
    
     
      ...
     
    
   
    
     
    
    
     
      nan
     
    
   
    
     
    
    
     
      1.0
     
    
   
    
     
    
    
     
      nan
     
    
   
    
     
    
    
     
      3.0
     
    
   
    
     
    
    
     
      nan
     
    
   
    
     
    
    
     
      5.0
     
    
   
    
     
    
    
     
      6.0
     
    
   
    
     
    
    
     
      7.0
     
    
   
    
     
    
    
     
      8.0
     
    
   
    
     
    
    
     
      9.0

多维数组每个轴都可以有一个索引。这些索引在元组中用逗号分隔：


  
   
    
     
    
    
     
      >>> 
      def f(x,y):
     
    
   
    
     
    
    
     
      ...     
      return 
      10*x+y
     
    
   
    
     
    
    
     
      ...
     
    
   
    
     
    
    
     
      >>> b = np.fromfunction(f,(
      5,
      4),dtype=int)
     
    
   
    
     
    
    
     
      >>> b
     
    
   
    
     
    
    
     
      array([[ 
      0,  
      1,  
      2,  
      3],
     
    
   
    
     
    
    
     
             [
      10, 
      11, 
      12, 
      13],
     
    
   
    
     
    
    
     
             [
      20, 
      21, 
      22, 
      23],
     
    
   
    
     
    
    
     
             [
      30, 
      31, 
      32, 
      33],
     
    
   
    
     
    
    
     
             [
      40, 
      41, 
      42, 
      43]])
     
    
   
    
     
    
    
     
      >>> b[
      2,
      3]
     
    
   
    
     
    
    
     
      23
     
    
   
    
     
    
    
     
      >>> b[
      0:
      5, 
      1]                       
      # each row in the second column of b
     
    
   
    
     
    
    
     
      array([ 
      1, 
      11, 
      21, 
      31, 
      41])
     
    
   
    
     
    
    
     
      >>> b[ : ,
      1]                        
      # equivalent to the previous example
     
    
   
    
     
    
    
     
      array([ 
      1, 
      11, 
      21, 
      31, 
      41])
     
    
   
    
     
    
    
     
      >>> b[
      1:
      3, : ]                      
      # each column in the second and third row of b
     
    
   
    
     
    
    
     
      array([[
      10, 
      11, 
      12, 
      13],
     
    
   
    
     
    
    
     
             [
      20, 
      21, 
      22, 
      23]])

当有些维度没有指定索引时，空缺的维度被默认为取所有元素。


  
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      >>> b[
      -1]                                  
      # the last row. Equivalent to b[-1,:]
     
    
   
    
     
    
    
     
      array([
      40, 
      41, 
      42, 
      43])

如上因为省略了第二维，b[i] 表示输出第 i 行。当然我们也可以用「:」表示省略的维度，例如 b[i] 等价于 b[i, :]。此外，NumPy 还允许使用 dots (...) 表示足够多的冒号来构建完整的索引元组。

比如，如果 x 是 5 维数组：

x[1,2,...] 等于 x[1,2,:,:,:],
x[...,3] 等于 x[:,:,:,:,3]
x[4,...,5,:] 等于 x[4,:,:,5,:]


  
   
    
     
    
    
     
      >>> c = np.array( [[[  
      0,  
      1,  
      2],               
      # a 3D array (two stacked 2D arrays)
     
    
   
    
     
    
    
     
      ...                 [ 
      10, 
      12, 
      13]],
     
    
   
    
     
    
    
     
      ...                [[
      100,
      101,
      102],
     
    
   
    
     
    
    
     
      ...                 [
      110,
      112,
      113]]])
     
    
   
    
     
    
    
     
      >>> c.shape
     
    
   
    
     
    
    
     
      (
      2, 
      2, 
      3)
     
    
   
    
     
    
    
     
      >>> c[
      1,...]                                   
      # same as c[1,:,:] or c[1]
     
    
   
    
     
    
    
     
      array([[
      100, 
      101, 
      102],
     
    
   
    
     
    
    
     
             [
      110, 
      112, 
      113]])
     
    
   
    
     
    
    
     
      >>> c[...,
      2]                                   
      # same as c[:,:,2]
     
    
   
    
     
    
    
     
      array([[  
      2,  
      13],
     
    
   
    
     
    
    
     
             [
      102, 
      113]])

多维数组中的迭代以第一条轴为参照完成，如下每一次循环都输出一个 b[i]：


  
   
    
     
    
    
     
      >>> 
      for row 
      in b:
     
    
   
    
     
    
    
     
      ...     print(row)
     
    
   
    
     
    
    
     
      ...
     
    
   
    
     
    
    
     
      [
      0 
      1 
      2 
      3]
     
    
   
    
     
    
    
     
      [
      10 
      11 
      12 
      13]
     
    
   
    
     
    
    
     
      [
      20 
      21 
      22 
      23]
     
    
   
    
     
    
    
     
      [
      30 
      31 
      32 
      33]
     
    
   
    
     
    
    
     
      [
      40 
      41 
      42 
      43]

然而，如果想在数组的每个元素上进行操作，可以用 flat 方法。flat 是一个在数组所有元素中运算的迭代器，如下将逐元素地对数组进行操作。


  
   
    
     
    
    
     
      >>> 
      for element 
      in b.flat:
     
    
   
    
     
    
    
     
      ...     print(element)
     
    
   
    
     
    
    
     
      ...

Shape 变换

一个数组的 shape 是由轴及其元素数量决定的，它一般由一个整型元组表示，且元组中的整数表示对应维度的元素数。


  
   
    
     
    
    
     
      >>> a = np.floor(
      10*np.random.random((
      3,
      4)))
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      8.,  
      0.,  
      6.],
     
    
   
    
     
    
    
     
             [ 
      4.,  
      5.,  
      1.,  
      1.],
     
    
   
    
     
    
    
     
             [ 
      8.,  
      9.,  
      3.,  
      6.]])
     
    
   
    
     
    
    
     
      >>> a.shape
     
    
   
    
     
    
    
     
      (
      3, 
      4)

一个数组的 shape 可以由许多方法改变。例如以下三种方法都可输出一个改变 shape 后的新数组，它们都不会改变原数组。其中 reshape 方法在实践中会经常用到，因为我们需要改变数组的维度以执行不同的运算。


  
   
    
     
    
    
     
      >>> a.ravel()  
      # returns the array, flattened
     
    
   
    
     
    
    
     
      array([ 
      2.,  
      8.,  
      0.,  
      6.,  
      4.,  
      5.,  
      1.,  
      1.,  
      8.,  
      9.,  
      3.,  
      6.])
     
    
   
    
     
    
    
     
      >>> a.reshape(
      6,
      2)  
      # returns the array with a modified shape
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      6.],
     
    
   
    
     
    
    
     
             [ 
      4.,  
      5.],
     
    
   
    
     
    
    
     
             [ 
      1.,  
      1.],
     
    
   
    
     
    
    
     
             [ 
      8.,  
      9.],
     
    
   
    
     
    
    
     
             [ 
      3.,  
      6.]])
     
    
   
    
     
    
    
     
      >>> a.T  
      # returns the array, transposed
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      4.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      8.,  
      5.,  
      9.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      1.,  
      3.],
     
    
   
    
     
    
    
     
             [ 
      6.,  
      1.,  
      6.]])
     
    
   
    
     
    
    
     
      >>> a.T.shape
     
    
   
    
     
    
    
     
      (
      4, 
      3)
     
    
   
    
     
    
    
     
      >>> a.shape
     
    
   
    
     
    
    
     
      (
      3, 
      4)

ravel() 和 flatten() 都是将多维数组降位一维，flatten() 返回一份新的数组，且对它所做的修改不会影响原始数组，而 ravel() 返回的是 view，会影响原始矩阵。

在矩阵的转置中，行和列的维度将交换，且矩阵中每一个元素将沿主对角线对称变换。此外，reshape 如下所示返回修改过维度的新数组，而 resize 方法将直接修改原数组本身的维度。


  
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      8.,  
      0.,  
      6.],
     
    
   
    
     
    
    
     
             [ 
      4.,  
      5.,  
      1.,  
      1.],
     
    
   
    
     
    
    
     
             [ 
      8.,  
      9.,  
      3.,  
      6.]])
     
    
   
    
     
    
    
     
      >>> a.resize((
      2,
      6))
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      8.,  
      0.,  
      6.,  
      4.,  
      5.],
     
    
   
    
     
    
    
     
             [ 
      1.,  
      1.,  
      8.,  
      9.,  
      3.,  
      6.]])

如果在 shape 变换中一个维度设为-1，那么这一个维度包含的元素数将会被自动计算。如下所示，a 一共有 12 个元素，在确定一共有 3 行后，-1 会自动计算出应该需要 4 列才能安排所有的元素。


  
   
    
     
    
    
     
      >>> a.reshape(
      3,
      -1)
     
    
   
    
     
    
    
     
      array([[ 
      2.,  
      8.,  
      0.,  
      6.],
     
    
   
    
     
    
    
     
             [ 
      4.,  
      5.,  
      1.,  
      1.],
     
    
   
    
     
    
    
     
             [ 
      8.,  
      9.,  
      3.,  
      6.]])

数组堆叠

数组可以在不同轴上被堆叠在一起。如下所示 vstack 将在第二个维度（垂直）将两个数组拼接在一起，而 hstack 将在第一个维度（水平）将数组拼接在一起。


  
   
    
     
    
    
     
      >>> a = np.floor(
      10*np.random.random((
      2,
      2)))
     
    
   
    
     
    
    
     
      >>> a
     
    
   
    
     
    
    
     
      array([[ 
      8.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      0.]])
     
    
   
    
     
    
    
     
      >>> b = np.floor(
      10*np.random.random((
      2,
      2)))
     
    
   
    
     
    
    
     
      >>> b
     
    
   
    
     
    
    
     
      array([[ 
      1.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      4.]])
     
    
   
    
     
    
    
     
      >>> np.vstack((a,b))
     
    
   
    
     
    
    
     
      array([[ 
      8.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      0.],
     
    
   
    
     
    
    
     
             [ 
      1.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      4.]])
     
    
   
    
     
    
    
     
      >>> np.hstack((a,b))
     
    
   
    
     
    
    
     
      array([[ 
      8.,  
      8.,  
      1.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      0.,  
      0.,  
      4.]])

column_stack 函数可堆叠一维数组为二维数组的列，作用相等于针对二维数组的 hstack 函数。


  
   
    
     
    
    
     
      >>> 
      from numpy 
      import newaxis
     
    
   
    
     
    
    
     
      >>> np.column_stack((a,b))     
      # with 2D arrays
     
    
   
    
     
    
    
     
      array([[ 
      8.,  
      8.,  
      1.,  
      8.],
     
    
   
    
     
    
    
     
             [ 
      0.,  
      0.,  
      0.,  
      4.]])
     
    
   
    
     
    
    
     
      >>> a = np.array([
      4.,
      2.])
     
    
   
    
     
    
    
     
      >>> b = np.array([
      3.,
      8.])
     
    
   
    
     
    
    
     
      >>> np.column_stack((a,b))     
      # returns a 2D array
     
    
   
    
     
    
    
     
      array([[ 
      4., 
      3.],
     
    
   
    
     
    
    
     
             [ 
      2., 
      8.]])
     
    
   
    
     
    
    
     
      >>> np.hstack((a,b))           
      # the result is different
     
    
   
    
     
    
    
     
      array([ 
      4., 
      2., 
      3., 
      8.])
     
    
   
    
     
    
    
     
      >>> a[:,newaxis]               
      # this allows to have a 2D columns vector
     
    
   
    
     
    
    
     
      array([[ 
      4.],
     
    
   
    
     
    
    
     
             [ 
      2.]])
     
    
   
    
     
    
    
     
      >>> np.column_stack((a[:,newaxis],b[:,newaxis]))
     
    
   
    
     
    
    
     
      array([[ 
      4.,  
      3.],
     
    
   
    
     
    
    
     
             [ 
      2.,  
      8.]])
     
    
   
    
     
    
    
     
      >>> np.hstack((a[:,newaxis],b[:,newaxis]))   
      # the result is the same
     
    
   
    
     
    
    
     
      array([[ 
      4.,  
      3.],
     
    
   
    
     
    
    
     
             [ 
      2.,  
      8.]])

与 column_stack 相似，row_stack 函数相等于二维数组中的 vstack。一般在高于二维的情况中，hstack 沿第二个维度堆叠、vstack 沿第一个维度堆叠，而 concatenate 更进一步可以在任意给定的维度上堆叠两个数组，当然这要求其它维度的长度都相等。concatenate 在很多深度模型中都有应用，例如权重矩阵的堆叠或 DenseNet 特征图的堆叠。

在复杂情况中，r_ 和 c_ 可以有效地在创建数组时帮助沿着一条轴堆叠数值，它们同样允许使用范围迭代「:」生成数组。

当用数组为参数时，r_ 和 c_ 在默认行为下与 vstack 和 hstack 相似，但它们如 concatenate 一样允许给定需要堆叠的维度。

转载：https://blog.csdn.net/qq_41823684/article/details/106556838

查看评论

飞道的博客

飞道的博客

个人资料

文章分类

文章存档

阅读排行

评论排行

推荐文章