日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問(wèn) 生活随笔!

生活随笔

當(dāng)前位置: 首頁(yè) > 编程资源 > 编程问答 >内容正文

编程问答

金融风控实战——集成学习

發(fā)布時(shí)間:2025/4/5 编程问答 18 豆豆
生活随笔 收集整理的這篇文章主要介紹了 金融风控实战——集成学习 小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

xgb依然要去除共線性、變量選擇

lr bivar要嚴(yán)格單調(diào),xgb、lightGBM不需要

LightGBM評(píng)分卡

import pandas as pd from sklearn.metrics import roc_auc_score,roc_curve,auc from sklearn.model_selection import train_test_split from sklearn import metrics from sklearn.linear_model import LogisticRegression import numpy as np import random import math import time import lightgbm as lgbdata = pd.read_csv('Bcard.txt') data.head()

data.shape #(95806, 13) #看一下月份分布,我們用最后一個(gè)月做為跨時(shí)間驗(yàn)證集合 data.obs_mth.unique() #array(['2018-10-31', '2018-07-31', '2018-09-30', '2018-06-30', # '2018-11-30'], dtype=object) df_train = data[data.obs_mth != '2018-11-30'].reset_index().copy() val = data[data.obs_mth == '2018-11-30'].reset_index().copy() #這是我們?nèi)康淖兞?#xff0c;info結(jié)尾的是自己做的無(wú)監(jiān)督系統(tǒng)輸出的個(gè)人表現(xiàn),score結(jié)尾的是收費(fèi)的外部征信數(shù)據(jù) lst = ['person_info','finance_info','credit_info','act_info','td_score','jxl_score','mj_score','rh_score']df_train = df_train.sort_values(by = 'obs_mth',ascending = False) df_train.head()

df_train = df_train.sort_values(by = 'obs_mth',ascending = False)rank_lst = [] for i in range(1,len(df_train)+1):rank_lst.append(i)df_train['rank'] = rank_lstdf_train['rank'] = df_train['rank']/len(df_train)pct_lst = [] for x in df_train['rank']:if x <= 0.2:x = 1elif x <= 0.4:x = 2elif x <= 0.6:x = 3elif x <= 0.8:x = 4else:x = 5pct_lst.append(x) df_train['rank'] = pct_lst #train = train.drop('obs_mth',axis = 1) df_train.head()

df_train['rank'].groupby(df_train['rank']).count() #rank #1 15966 #2 15966 #3 15966 #4 15966 #5 15967 #Name: rank, dtype: int64 #定義lgb函數(shù) def LGB_test(train_x,train_y,test_x,test_y):from multiprocessing import cpu_countclf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,max_depth=2, n_estimators=800, objective='binary',subsample=0.7, colsample_bytree=0.7, subsample_freq=1,learning_rate=0.05, min_child_weight=50,random_state=None,n_jobs=cpu_count()-1,num_iterations = 800 #迭代次數(shù))clf.fit(train_x, train_y,eval_set=[(train_x, train_y),(test_x,test_y)],eval_metric='auc',early_stopping_rounds=100)print(clf.n_features_)return clf,clf.best_score_[ 'valid_1']['auc']feature_lst = {} ks_train_lst = [] ks_test_lst = [] for rk in set(df_train['rank']): # 測(cè)試集8.18以后作為跨時(shí)間驗(yàn)證集#定義模型訓(xùn)練集與測(cè)試集ttest = df_train[df_train['rank'] == rk]ttrain = df_train[df_train['rank'] != rk]train = ttrain[lst]train_y = ttrain.bad_indtest = ttest[lst]test_y = ttest.bad_ind start = time.time()model,auc = LGB_test(train,train_y,test,test_y) end = time.time()#模型貢獻(xiàn)度放在feture中feature = pd.DataFrame({'name' : model.booster_.feature_name(),'importance' : model.feature_importances_}).sort_values(by = ['importance'],ascending = False)#計(jì)算訓(xùn)練集、測(cè)試集、驗(yàn)證集上的KS和AUCy_pred_train_lgb = model.predict_proba(train)[:, 1]y_pred_test_lgb = model.predict_proba(test)[:, 1]train_fpr_lgb, train_tpr_lgb, _ = roc_curve(train_y, y_pred_train_lgb)test_fpr_lgb, test_tpr_lgb, _ = roc_curve(test_y, y_pred_test_lgb)train_ks = abs(train_fpr_lgb - train_tpr_lgb).max()test_ks = abs(test_fpr_lgb - test_tpr_lgb).max()train_auc = metrics.auc(train_fpr_lgb, train_tpr_lgb)test_auc = metrics.auc(test_fpr_lgb, test_tpr_lgb)ks_train_lst.append(train_ks)ks_test_lst.append(test_ks) feature_lst[str(rk)] = feature[feature.importance>=20].nametrain_ks = np.mean(ks_train_lst) test_ks = np.mean(ks_test_lst)ft_lst = {} for i in range(1,6):ft_lst[str(i)] = feature_lst[str(i)]fn_lst=list(set(ft_lst['1']) & set(ft_lst['2']) & set(ft_lst['3']) & set(ft_lst['4']) &set(ft_lst['5']))print('train_ks: ',train_ks) print('test_ks: ',test_ks) print('ft_lst: ',fn_lst ) #[LightGBM] [Warning] Unknown parameter: max_features #[1] training's auc: 0.726731 training's binary_logloss: 0.0827979 valid_1's auc: 0.742666 valid_1's binary_logloss: 0.12066 #[2] training's auc: 0.769499 training's binary_logloss: 0.0822062 valid_1's auc: 0.753919 valid_1's binary_logloss: 0.119728 #[3] training's auc: 0.788952 training's binary_logloss: 0.0816227 valid_1's auc: 0.762911 valid_1's binary_logloss: 0.118777 #. . . #[188] training's auc: 0.827082 training's binary_logloss: 0.0777181 valid_1's auc: 0.786679 valid_1's binary_logloss: 0.078782 #[189] training's auc: 0.827128 training's binary_logloss: 0.0777136 valid_1's auc: 0.786756 valid_1's binary_logloss: 0.0787781 #[190] training's auc: 0.827162 training's binary_logloss: 0.0777108 valid_1's auc: 0.786696 valid_1's binary_logloss: 0.0787811#train_ks: 0.4907124806547195 #test_ks: 0.47382530047645305 #ft_lst: ['credit_info', 'person_info', 'finance_info'] lst = ['person_info','finance_info','credit_info','act_info']train = data[data.obs_mth != '2018-11-30'].reset_index().copy() evl = data[data.obs_mth == '2018-11-30'].reset_index().copy()x = train[lst] y = train['bad_ind']evl_x = evl[lst] evl_y = evl['bad_ind']model,auc = LGB_test(x,y,evl_x,evl_y)y_pred = model.predict_proba(x)[:,1] fpr_lgb_train,tpr_lgb_train,_ = roc_curve(y,y_pred) train_ks = abs(fpr_lgb_train - tpr_lgb_train).max() print('train_ks : ',train_ks)y_pred = model.predict_proba(evl_x)[:,1] fpr_lgb,tpr_lgb,_ = roc_curve(evl_y,y_pred) evl_ks = abs(fpr_lgb - tpr_lgb).max() print('evl_ks : ',evl_ks)from matplotlib import pyplot as plt plt.plot(fpr_lgb_train,tpr_lgb_train,label = 'train LR') plt.plot(fpr_lgb,tpr_lgb,label = 'evl LR') plt.plot([0,1],[0,1],'k--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC Curve') plt.legend(loc = 'best') plt.show() #[1] training's binary_logloss: 0.090317 training's auc: 0.712883 valid_1's binary_logloss: 0.0986629 valid_1's auc: 0.678619 #Training until validation scores don't improve for 100 rounds. #[2] training's binary_logloss: 0.0896369 training's auc: 0.779216 valid_1's binary_logloss: 0.0978883 valid_1's auc: 0.755811 #[3] training's binary_logloss: 0.0885026 training's auc: 0.779149 valid_1's binary_logloss: 0.0966811 valid_1's auc: 0.749375 #[4] training's binary_logloss: 0.087998 training's auc: 0.780539 valid_1's binary_logloss: 0.0961527 valid_1's auc: 0.759009 #... #[179] training's binary_logloss: 0.0784288 training's auc: 0.812571 valid_1's binary_logloss: 0.0900886 valid_1's auc: 0.779962 #[180] training's binary_logloss: 0.0784267 training's auc: 0.812602 valid_1's binary_logloss: 0.0900914 valid_1's auc: 0.779887 #[181] training's binary_logloss: 0.078425 training's auc: 0.812601 valid_1's binary_logloss: 0.0900941 valid_1's auc: 0.779927 #[182] training's binary_logloss: 0.0784229 training's auc: 0.8126 valid_1's binary_logloss: 0.0900964 valid_1's auc: 0.779932 #Early stopping, best iteration is: #[82] training's binary_logloss: 0.0788374 training's auc: 0.811646 valid_1's binary_logloss: 0.089958 valid_1's auc: 0.779946 #4 #train_ks : 0.4801091876625077 #evl_ks : 0.4416674980164514


LightGBM其實(shí)效果確實(shí)是比較LR要好的,但是我們LR也可以逼近這個(gè)效果,下節(jié)課我們會(huì)具體來(lái)做。

評(píng)分卡公式變形
600+50×ln?P0P1ln?2,P0為好人,P1為壞人600+50 \times \frac{\ln \frac{P_{0}}{P_{1}}}{\ln 2},P_{0}為好人,P_{1}為壞人 600+50×ln2lnP1?P0???,P0?P1?600+50×ln?1?xbetaxbetaln?2600+50 \times \frac{\ln \frac{1-xbeta}{xbeta}}{\ln 2} 600+50×ln2lnxbeta1?xbeta??600+50×log?21?xbetaxbeta600+50 \times \log _{2} \frac{1-{ xbeta }}{{ xbeta }} 600+50×log2?xbeta1?xbeta?

#['person_info','finance_info','credit_info','act_info'] #算分?jǐn)?shù)onekey def score(xbeta):score = 1000+500*(math.log2(1-xbeta)/xbeta) #好人的概率/壞人的概率return score evl['xbeta'] = model.predict_proba(evl_x)[:,1] evl['score'] = evl.apply(lambda x : score(x.xbeta) ,axis=1)fpr_lr,tpr_lr,_ = roc_curve(evl_y,evl['score']) evl_ks = abs(fpr_lr - tpr_lr).max() print('val_ks : ',evl_ks) #val_ks : 0.4416674980164514 #生成報(bào)告 row_num, col_num = 0, 0 bins = 20 Y_predict = evl['xbeta'] Y = evl_y nrows = Y.shape[0] lis = [(Y_predict[i], Y[i]) for i in range(nrows)] ks_lis = sorted(lis, key=lambda x: x[0], reverse=True) bin_num = int(nrows/bins+1) bad = sum([1 for (p, y) in ks_lis if y > 0.5]) good = sum([1 for (p, y) in ks_lis if y <= 0.5]) bad_cnt, good_cnt = 0, 0 KS = [] BAD = [] GOOD = [] BAD_CNT = [] GOOD_CNT = [] BAD_PCTG = [] BADRATE = [] dct_report = {} for j in range(bins):ds = ks_lis[j*bin_num: min((j+1)*bin_num, nrows)]bad1 = sum([1 for (p, y) in ds if y > 0.5])good1 = sum([1 for (p, y) in ds if y <= 0.5])bad_cnt += bad1good_cnt += good1bad_pctg = round(bad_cnt/sum(evl_y),3)badrate = round(bad1/(bad1+good1),3)ks = round(math.fabs((bad_cnt / bad) - (good_cnt / good)),3)KS.append(ks)BAD.append(bad1)GOOD.append(good1)BAD_CNT.append(bad_cnt)GOOD_CNT.append(good_cnt)BAD_PCTG.append(bad_pctg)BADRATE.append(badrate)dct_report['KS'] = KSdct_report['BAD'] = BADdct_report['GOOD'] = GOODdct_report['BAD_CNT'] = BAD_CNTdct_report['GOOD_CNT'] = GOOD_CNTdct_report['BAD_PCTG'] = BAD_PCTGdct_report['BADRATE'] = BADRATE val_repot = pd.DataFrame(dct_report) val_repot

《新程序員》:云原生和全面數(shù)字化實(shí)踐50位技術(shù)專家共同創(chuàng)作,文字、視頻、音頻交互閱讀

總結(jié)

以上是生活随笔為你收集整理的金融风控实战——集成学习的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。

主站蜘蛛池模板: 成人18视频 | 男人天堂综合网 | 最新毛片网 | 色哟哟精品观看 | 中国超碰 | 亚洲天堂影院在线观看 | 久久久久久一区二区 | 国产精品一区二区三区久久 | 亚洲美女综合网 | 国产一级做a爱免费视频 | 欧美在线视频不卡 | 91视频免费看 | 337p亚洲欧洲色噜噜噜 | 男生操男生网站 | 国产精品一级二级三级 | 日本美女黄色 | 国产91熟女高潮一区二区 | 欲色综合 | 欧美视频www| 大奶子在线观看 | 香蕉国产在线观看 | 国产靠逼视频 | 欧美va天堂 | 成人性做爰aaa片免费 | 中国超碰 | 韩国一级一片高清免费观看 | 午夜视频网 | 亚洲a级精品 | 国产成人久久精品麻豆二区 | 91在线影院 | 久久密桃| 91久久人人 | 国产三区av| 日韩三级免费 | 欧美1314 | 天天躁日日躁狠狠躁喷水 | 欧美在线 | 黄色片视频播放 | 北条麻妃一区二区三区在线观看 | 182tv午夜福利在线观看 | 亚洲av无码潮喷在线观看 | 国产日韩精品一区二区三区 | 成人aaaa| 亚洲小视频在线播放 | 欧美日韩激情一区 | 污的视频在线观看 | 香蕉av一区二区三区 | 51热门大瓜今日大瓜 | 在线中文字幕第一页 | 深夜视频在线看 | 国产精品入口66mio | 夜夜免费视频 | 成人午夜精品视频 | 中文字幕一区二区三区乱码人妻 | 色哟哟网站在线观看 | 99久久久国产精品无码免费 | 激情超碰 | 成人拍拍| 国产精品无码一区二区三区 | 国产一区二区在线电影 | 无人在线观看高清视频 单曲 | 蜜桃传媒一区二区亚洲 | 丝袜在线视频 | 波多野结衣成人在线 | 欧美 国产 精品 | 欧美在线一区二区三区四区 | 免费看毛片的网站 | 欧美hdxxxx | 亚洲区自拍 | 国产麻豆剧传媒精品国产 | 四虎5151久久欧美毛片 | 熟妇高潮一区二区三区在线播放 | 国产影视一区二区 | 久久精品tv| 涩涩在线观看 | 欧美日本韩国一区二区 | 男女男精品视频网站 | 黑森林福利视频导航 | av成人免费观看 | 处女朱莉第一次 | 夜夜综合网 | 中文在线一区 | 日本午夜激情 | 一本到免费视频 | 久久裸体视频 | 91香蕉在线视频 | 亚洲1页| 欧美黄片一区二区三区 | 欧美美女一区二区 | 日本在线有码 | 和漂亮岳做爰3中文字幕 | 欧洲精品视频在线 | 特级毛片在线观看 | 蜜桃视频中文字幕 | 久久亚洲av无码精品色午夜麻豆 | 最新激情网 | 精品欧美乱码久久久久久 | 美女福利视频 | 香蕉视频官方网站 |