#平滑噪聲—等深分箱—均值平滑import pandas as pd
import numpy as np
defaequilatus_box_mean(data,bins):length=data.shape[0]labels=[]for i inrange(bins):labels.append('a'+str(i+1))#添加標簽new_data=pd.qcut(data.iloc[:,0],bins,labels=labels)#等深分箱data['label']=new_datafor label in labels:label_index_min=data[data.label==label].index.min()#分箱后索引最小值label_index_max=data[data.label==label].index.max()#分箱后索引最大值data.loc[label_index_min:label_index_max,data.columns[0]]=np.mean(data.A[label_index_min:label_index_max+1,])#根據label及索引,修改A為各箱均值return dataif __name__=="__main__":data=pd.DataFrame({'A':[11,13,15,20,20,23,26,29,35]})bins=3print("均值平滑")print(aequilatus_box_mean(data,3))#平滑噪聲—等深分箱—中值平滑import pandas as pd
import numpy as np
defaequilatus_box_median(data,bins):length=data.shape[0]labels=[]for i inrange(bins):labels.append('a'+str(i+1))new_data=pd.qcut(data.A,bins,labels=labels)#等深分箱data['label']=new_datafor label in labels:label_index_min=data[data.label==label].index.min()#分箱后索引最小值label_index_max=data[data.label==label].index.max()#分箱后索引最大值data.loc[label_index_min:label_index_max,'A']=np.median(data.A[label_index_min:label_index_max+1,])#根據label及索引,修改A為各箱均值return data
if __name__=="__main__":data=pd.DataFrame({'A':[11,13,15,20,20,23,26,29,35]})bins=3print("中值平滑")print(aequilatus_box_median(data,3))#平滑噪聲—等深分箱—邊界平滑import pandas as pd
import numpy as np
defaequilatus_box_border(data,bins):length=data.shape[0]labels=[]for i inrange(bins):labels.append('a'+str(i+1))new_data=pd.qcut(data.A,bins,labels=labels)#等深分箱data['label']=new_datafor label in labels:label_index_min=data[data.label==label].index.min()label_index_max=data[data.label==label].index.max()data_min=np.min(data.A[label_index_min:label_index_max+1,])data_max=np.max(data.A[label_index_min:label_index_max+1,])for i inrange(label_index_min,label_index_max):if(data.loc[i,'A']==data_min or data.loc[i,'A']==data_max):data.loc[i,'A']=data.loc[i,'A']elif(np.abs(data.loc[i,'A']-data_min)<=np.abs(data.loc[i,'A']-data_max)):data.loc[i,'A']=data_minelse:data.loc[i,'A']=data_maxreturn data
if __name__=="__main__":data=pd.DataFrame({'A':[11,12,15,21,20,23,26,29,35]})bins=3print("邊界平滑")print(aequilatus_box_border(data,3))