當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

数据分析学习02-numpy

發布時間：2024/9/15 编程问答 26 豆豆

生活随笔收集整理的這篇文章主要介紹了数据分析学习02-numpy 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

簡介

NumPy是Python中科學計算的基礎包。它是一個Python庫，提供多維數組對象，各種派生對象（如掩碼數組和矩陣），以及用于數組快速操作的各種API，有包括數學、邏輯、形狀操作、排序、選擇、輸入輸出、離散傅立葉變換、基本線性代數，基本統計運算和隨機模擬等等。

使用

我們僅需要簡單的通過import numpy as np就可以使用numpy了。

為什么要用numpy？

如果我們希望兩個列表對應項相加，則我們需要這樣做，使用Python列表這樣的代碼是冗余的，而使用numpy則大大減少了代碼的冗余。

基本操作

數組的形狀

import numpy as npt1 = np.arange(12) #生成數組 print(t1) # [ 0 1 2 3 4 5 6 7 8 9 10 11]print(t1.shape) # 查看數組的形狀 # (12,) # 表示12個數t2 = np.array([[1,2,3],[4,5,6]]) print(t2) # [[1 2 3] # [4 5 6]]print(t2.shape)# 查看數組的形狀 # (2, 3) 表示2行3列t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]]) print(t3) # [[[ 1 2 3] # [ 4 5 6]] # # [[ 7 8 9] # [10 11 12]]] print(t3.shape) # 查看數組的形狀 # (2, 2, 3) import numpy as npt4 = np.arange(12) print(t4) # [ 0 1 2 3 4 5 6 7 8 9 10 11]print(t4.reshape((3,4))) # 把t4 變成3行4列 # [[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11]] import numpy as npt5 = np.arange(24).reshape((2,3,4))# 2塊每塊3行4列 print(t5) # [[[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11]] # # [[12 13 14 15] # [16 17 18 19] # [20 21 22 23]]]print(t5.reshape(4,6)) # 修改數組的形狀 # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]]print(t5.flatten()) #展開把數組轉化為1維度的數據 # [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]

數組的運算

當兩個數組的形狀并不相同的時候，我們可以通過擴展數組的方法來實現相加、相減、相乘等操作，這種機制叫做廣播（broadcasting）

import numpy as npt5 = np.arange(24).reshape((2,3,4))# 2塊每塊3行4列 #print(t5) # [[[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11]] # # [[12 13 14 15] # [16 17 18 19] # [20 21 22 23]]] print(t5+2) # 數組里面所有數均加2 # [[[ 2 3 4 5] # [ 6 7 8 9] # [10 11 12 13]] # # [[14 15 16 17] # [18 19 20 21] # [22 23 24 25]]] import numpy as npt5 = np.arange(24).reshape((2,3,4))# 2塊每塊3行4列 #print(t5) # [[[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11]] # # [[12 13 14 15] # [16 17 18 19] # [20 21 22 23]]] print(t5*2) # 數組里面所有數均乘2 # [[[ 0 2 4 6] # [ 8 10 12 14] # [16 18 20 22]] # # [[24 26 28 30] # [32 34 36 38] # [40 42 44 46]]] import numpy as npt5 = np.arange(24).reshape((2,3,4))# 2塊每塊3行4列 #print(t5) # [[[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11]] # # [[12 13 14 15] # [16 17 18 19] # [20 21 22 23]]] print(t5/2) # 數組里面所有數均除2 # [[[ 0. 0.5 1. 1.5] # [ 2. 2.5 3. 3.5] # [ 4. 4.5 5. 5.5]] # # [[ 6. 6.5 7. 7.5] # [ 8. 8.5 9. 9.5] # [10. 10.5 11. 11.5]]]

數組相加，乘，除

import numpy as npt5 = np.arange(24).reshape((4,6)) # print(t5) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]] t6 = np.arange(100,124).reshape((4,6)) # print(t6) # [[100 101 102 103 104 105] # [106 107 108 109 110 111] # [112 113 114 115 116 117] # [118 119 120 121 122 123]] print(t5+t6) # [[100 102 104 106 108 110] # [112 114 116 118 120 122] # [124 126 128 130 132 134] # [136 138 140 142 144 146]] import numpy as npt5 = np.arange(24).reshape((4,6)) # print(t5) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]] t6 = np.arange(100,124).reshape((4,6)) # print(t6) # [[100 101 102 103 104 105] # [106 107 108 109 110 111] # [112 113 114 115 116 117] # [118 119 120 121 122 123]] print(t5*t6) # [[ 0 101 204 309 416 525] # [ 636 749 864 981 1100 1221] # [1344 1469 1596 1725 1856 1989] # [2124 2261 2400 2541 2684 2829]] import numpy as npt5 = np.arange(24).reshape((4,6)) # print(t5) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]] t6 = np.arange(100,124).reshape((4,6)) # print(t6) # [[100 101 102 103 104 105] # [106 107 108 109 110 111] # [112 113 114 115 116 117] # [118 119 120 121 122 123]] print(t6/t5) # [[ inf 101. 51. 34.33333333 26. # 21. ] # [ 17.66666667 15.28571429 13.5 12.11111111 11. # 10.09090909] # [ 9.33333333 8.69230769 8.14285714 7.66666667 7.25 # 6.88235294] # [ 6.55555556 6.26315789 6. 5.76190476 5.54545455 # 5.34782609]] import numpy as npt5 = np.arange(24).reshape((4,6)) # print(t5) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]] t7 = np.arange(0,6) #print(t7) # [0 1 2 3 4 5] print(t5-t7) # [[ 0 0 0 0 0 0] # [ 6 6 6 6 6 6] # [12 12 12 12 12 12] # [18 18 18 18 18 18]] import numpy as npt5 = np.arange(24).reshape((4,6)) # print(t5) # [[ 0 1 2 3 4 5] # [ 6 7 8 9 10 11] # [12 13 14 15 16 17] # [18 19 20 21 22 23]]t8 = np.arange(4).reshape((4,1)) # print(t8) # [[0] # [1] # [2] # [3]] print(t5-t8) # [[ 0 1 2 3 4 5] # [ 5 6 7 8 9 10] # [10 11 12 13 14 15] # [15 16 17 18 19 20]]

軸（axis）

在numpy中可以理解為方向，使用0,1，2…數字表示，對于一個一維數組，只用一個0軸，對于2維數組(shape(2,2))，有0軸和1軸，對于一個三維數組（shape(2,2,3))，有0,1,2軸。
1軸表示列 0軸表示行

1軸表示列 0軸表示行 2軸表示塊

有了軸的概念之后，我們在計算會更加方便，比如計算一個2維數據的平均值，必須指定是計算哪個方向上面的數字的平均值

numpy讀取文件數據與轉置

import numpy as np data = "D:/py/數據分析學習/numpy學習/數據.csv" t1 = np.loadtxt(data,delimiter=",",dtype="int") #delimiter=","按照逗號進行分割,dtype="int"指定數字類型 print(t1) print("*"*50+"分割符"+"*"*50) t2 = np.loadtxt(data,delimiter=",",dtype="int",unpack=True) # 【轉置】旋轉列變行默認unpack=False print(t2)

轉置是一種變換，對于numpy中的數組來說，就是在對角線方向交換數據，目的也是為了更方便的處理數據

numpy索引和切片

對于上述加載出來的數據，我們如果只想選擇其中某一列（行）如下
取行

import numpy as np data = "D:/py/數據分析學習/numpy學習/數據.csv" t1 = np.loadtxt(data,delimiter=",",dtype="int") #delimiter=","按照逗號進行分割,dtype="int"指定數字類型 print(t1) #所有數據 print("*"*50+"取一行"+"*"*50) print(t1[2]) print("*"*50+"取多行"+"*"*50) print(t1[2:]) print("*"*50+"取指定行"+"*"*50) print(t1[[0,2,4]])

取列

# -*- coding: utf-8 -*- import numpy as npdata = "C:/Users/gpc/Desktop/python/其他/test01.csv" t1 = np.loadtxt(data,delimiter=",",dtype="int") #delimiter=","按照逗號進行分割,dtype="int"指定數字類型 print(t1) #所有數據 print("*"*50+"取一列"+"*"*50) print(t1[:,1]) print("*"*50+"取連續多列"+"*"*50) print(t1[:,2:]) print("*"*50+"取指定列"+"*"*50) print(t1[:,[0,2,4]])

# -*- coding: utf-8 -*- import numpy as npdata = "C:/Users/gpc/Desktop/python/其他/test01.csv" t1 = np.loadtxt(data,delimiter=",",dtype="int") #delimiter=","按照逗號進行分割,dtype="int"指定數字類型 print(t1) #所有數據 print("*"*20+"取多行和多列，取第3行，第4列得值"+"*"*20) print(t1[2,3]) print("*"*20+"取多行和多列，取第3行到第5行，第2列到第4列得結果"+"*"*20) print(t1[2:5,1:4]) print("*"*20+"取多個不相鄰的點"+"*"*20) # (0,0),(2,1),(2,3) print(t1[[0,2,2],[0,1,3]])

numpy數值的修改

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) print(t1)print("-"*50)t1[[2],[1]]=100 print(t1)

numpy中布爾值索引

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) print(t1)print("-"*50)print(t1<10)print("-"*50)t1[t1<10] = 3print(t1)

numpy中三元運算符

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) print(t1) print(np.where(t1<=11,0,100))#t1中小于等于11的替換成0其余的全部替換成100

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) print(t1) print("-"*50) print(t1.clip(11,18)) # 小于11的替換成11，大于18的替換成18

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) print(t1) print("-"*50) t1 =t1.astype(float)#浮點數 t1[3,3]=np.nan print(t1)

numpy中nan和inf

nan（NAN，Nan）：not a number 表示不是一個數字
- 當我們讀取本地的文件為float的時候，如果有缺失，就會出現nan當做了一個不適合的計算的時候（比如無窮大（inf）減去無窮大）
inf（-inf,inf）:infinity.inf表示正無窮，-inf表示負無窮
- 比如一個數字除以0，（python中會直接報錯，numpy中是一個inf或者-inf）

# -*- coding: utf-8 -*- import numpy as npprint(np.nan == np.nan) # False print(np.nan != np.nan) # True t1 = np.arange(24).reshape((4,6)) t1 =t1.astype(float)#浮點數 t1[3,3]=np.nan print(t1) t1[:,0]=0 print(t1) print(np.count_nonzero(t1)) # 20 # 不為0的個數是20個 print(np.isnan(t1)) # 判斷那些值是nan的 print(np.count_nonzero(np.isnan(t1))) # 統計nan的個數 # nan和任何值計算都為nan print(np.sum(t1)) # nan t2 = np.arange(12).reshape(3,4) print(np.sum(t2)) # 66 # 橫行豎列 print(np.sum(t2,axis=0)) # 每列的和 # [12 15 18 21] print(np.sum(t2,axis=1))# 每行的和 # [ 6 22 38] print(np.sum(t1,axis=0)) # nan和任何值計算都為nan # [ 0. 40. 44. nan 52. 56.]

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(24).reshape((4,6)) t1 =t1.astype(float)#浮點數 t1[3,3]=np.nan print(t1) # 橫行豎列 print(t1.sum(axis=0)) # 沒列的和 # [36. 40. 44. nan 52. 56.] print(t1.mean(axis=0)) # 計算每列的均值 # [ 9. 10. 11. nan 13. 14.] print(np.median(t1,axis=0)) # 計算每列的中值 # [ 9. 10. 11. nan 13. 14.] print(t1.max(axis=0)) # 數組中最大值的一列 # [18. 19. 20. nan 22. 23.] print(t1.min(axis=0)) # 最小值的一列 # [ 0. 1. 2. nan 4. 5.] print(np.ptp(t1,axis=0)) # 極值按列計算數組中最大值和最小值的差 # [18. 18. 18. nan 18. 18.] print(t1.std(axis=0)) # 標準差 # [6.70820393 6.70820393 6.70820393 nan 6.70820393 6.70820393]

數組的拼接

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(1,13).reshape((2,6)) print(t1) print("-"*50) t2 = np.arange(13,25).reshape(2,6) print(t2) print("-"*50) print(np.vstack((t1,t2)))# 豎直拼接 print("-"*50) print(np.hstack((t1,t2)))# 水平拼接

# -*- coding: utf-8 -*- import numpy as np# t1 = np.arange(12).reshape(3,4).astype("float") # t1[1,2:]=np.nan #print(t1) def fill_ndarray(t1):for i in range(t1.shape[1]): # 遍歷沒列temp_col = t1[:,i] # 當前的一列nan_num = np.count_nonzero(temp_col!=temp_col)if nan_num != 0: # 不為0，說明當前這一列中有nantemp_not_nan_col = temp_col[temp_col==temp_col] # 當前一列不為nan的array#temp_not_nan_col.mean()temp_col[np.isnan(temp_col)]=temp_not_nan_col.mean() # 選中當前為nan的位置，把值賦值為不為nan的均值return t1if __name__=='__main__':t1 = np.arange(12).reshape(3, 4).astype("float")t1[1, 2:] = np.nanprint(t1)t1 = fill_ndarray(t1)print(t1)

數組交換

# -*- coding: utf-8 -*- import numpy as npt1 = np.arange(1,25).reshape((4,6)) print(t1) print("-"*50) t1[[1,2],:]=t1[[2,1],:]#行交換 print(t1) print("-"*50) t1[:,[0,5]]=t1[:,[5,0]]#列交換 print(t1)

# -*- coding: utf-8 -*- import numpy as npprint(np.ones((2,3))) #創建全為1的數據 print(np.zeros((2,3))) #全為0的數據

# -*- coding: utf-8 -*- import numpy as np# 加載文件數據 data1 = "C:/Users/gpc/Desktop/python/其他/test01.csv" data2 = "C:/Users/gpc/Desktop/python/其他/test02.csv"t1 = np.loadtxt(data1,delimiter=",",dtype="int") #delimiter=","按照逗號進行分割,dtype="int"指定數字類型 t2 = np.loadtxt(data2,delimiter=",",dtype="int")# 添加信息 # 構造全為0的數據 zeros_data = np.zeros((t1.shape[0],1)) ones_data = np.ones((t2.shape[0],1))# 分別添加一列全為0，1的數組 t1 = np.hstack((t1,zeros_data)).astype(int) t2 = np.hstack((t2,ones_data)).astype(int) # 拼接兩組數據 t3= np.vstack((t1,t2)) print(t3)

numpy生成隨機數

# -*- coding: utf-8 -*- import numpy as npt1 = np.random.randint(10,20,(4,5)) print(t1)

# -*- coding: utf-8 -*- import numpy as npnp.random.seed(10) # 隨機生成種子，每次隨機得到的結果和上次一樣 t1 = np.random.randint(10,20,(4,5)) # 生成10-20的隨機數，4列5行 print(t1)

# -*- coding: utf-8 -*- import numpy as npnp.random.seed(10) # 隨機生成種子，每次隨機得到的結果和上次一樣 t1 = np.random.randint(10,20,(4,5)) # 生成10-20的隨機數，4列5行 print(t1) t2 = t1[:]# 視圖操作，一種切片會創建新的對象，但是t2的數據完全由t1保管，數據變化一致 print(t2) t3 = t1.copy() # 復制，t3,t1互不影響 print(t3)

總結

以上是生活随笔為你收集整理的数据分析学习02-numpy的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

数据
NumPy