日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

【算法竞赛学习】资金流入流出预测-挑战Baseline_建模预测

發布時間:2023/12/15 编程问答 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 【算法竞赛学习】资金流入流出预测-挑战Baseline_建模预测 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

賽題簡介

螞蟻金服擁有上億會員并且業務場景中每天都涉及大量的資金流入和流出,面對如此龐大的用戶群,資金管理壓力會非常大。在既保證資金流動性風險最小,又滿足日常業務運轉的情況下,精準地預測資金的流入流出情況變得尤為重要。此屆大賽以《資金流入流出預測》為題,期望參賽者能夠通過對例如余額寶用戶的申購贖回數據的把握,精準預測未來每日的資金流入流出情況。對貨幣基金而言,資金流入意味著申購行為,資金流出為贖回行為 。

賽題與數據

競賽中使用的數據主要包含四個部分,分別為用戶基本信息數據、用戶申購贖回數據、收益率表和銀行間拆借利率表。https://tianchi.aliyun.com/competition/entrance/231573/information

建模預測

import pandas as pd import sklearn as skr import numpy as np import datetime import matplotlib.pyplot as plt import seaborn as sns from dateutil.relativedelta import relativedelta from typing import * import random from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from sklearn.linear_model import LinearRegression from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import GradientBoostingRegressor from sklearn.neural_network import MLPRegressor import xgboost as xgbimport warnings warnings.filterwarnings('ignore') np.random.seed(1024)labels = ['total_purchase_amt', 'total_redeem_amt'] # 分割數據集def split_data_underline(data: pd.DataFrame)->pd.DataFrame:trainset = data[(datetime.date(2014,4,1) <= data['date']) & (data['date'] < datetime.date(2014,8,1))]testset = data[(datetime.date(2014,8,1) <= data['date']) & (data['date'] < datetime.date(2014,9,1))]return trainset, testsetdef split_data_online(data: pd.DataFrame)->pd.DataFrame:trainset = data[(datetime.date(2014,4,1) <= data['date']) & (data['date'] < datetime.date(2014,9,1))]testset = data[(datetime.date(2014,9,1) <= data['date']) & (data['date'] < datetime.date(2014,10,1))]return trainset, testset # 定義評價函數def AE(y: Iterable, yhat: Iterable)->Iterable:return np.abs(y - yhat) / np.abs(y)def total_AE(purchasehat: Iterable, redeemhat: Iterable, purchase: Iterable, redeem: Iterable, h: int = 0.3)->Iterable:return sum(map(lambda x : np.exp(-x/h)*10, AE(purchase, purchasehat))) * 0.45 + sum(map(lambda x : np.exp(-x/h)*10, AE(redeem, redeemhat))) * 0.55 # 在不同的時間段對模型進行驗證def week_evalution_single(data: pd.DataFrame, model: object, types: str)->pd.DataFrame:results = []a_month = relativedelta(months=1)for i in [datetime.date(2014, 8, 1), datetime.date(2014, 7, 25), datetime.date(2014, 7, 18), datetime.date(2014, 7, 11), datetime.date(2014, 7, 4), datetime.date(2014, 6, 27), datetime.date(2014, 6,20)]:trainset = data[(i - 4 * a_month <= data['date']) & (data['date'] < i)]testset = data[(i <= data['date']) & (data['date'] < i + a_month)]if len(testset) == 0 or len(trainset) == 0:i = datetime.date(2014, 4, 20)trainset = data[(i - 4 * a_month <= data['date']) & (data['date'] < i)]testset = data[(i <= data['date']) & (data['date'] < datetime.date(2014, 9, 1))]feature = [x for x in trainset.columns if x not in ['total_purchase_amt','total_redeem_amt','date']]model.fit(X=trainset[feature], y=trainset['total_' + types + '_amt'])result_lr = model.predict(testset[feature])h = 0.3results.append(sum(AE(testset['total_' + types + '_amt'], result_lr).apply(lambda x : np.exp(-x/h))*10))return pd.DataFrame(results) # 輸出評級表格def draw_eva_table(df: pd.DataFrame)->pd.DataFrame:rest = df.copy()rest['interval'] = [datetime.date(2014, 8, 1), datetime.date(2014, 7, 25), datetime.date(2014, 7, 18), datetime.date(2014, 7, 11), datetime.date(2014, 7, 4), datetime.date(2014, 6, 27), datetime.date(2014, 6,20)]return rest # 對生成結果進行可視化def visual(result_purchase_lr: Iterable, result_redeem_lr: Iterable, testset: pd.DataFrame)->None:fig = plt.figure(figsize=(10,4))plt.plot(testset['date'], result_purchase_lr, label='predicted_purchase')plt.plot(testset['date'], testset['total_purchase_amt'], label='real_redeem')plt.legend(loc='best')plt.title("The distribution of real and predict purchase")plt.xlabel("Time")plt.ylabel("Amount")plt.show()fig = plt.figure(figsize=(10,4))sns.barplot(testset['date'].dt.day ,result_purchase_lr - testset['total_purchase_amt'])fig = plt.figure(figsize=(10,4))plt.plot(testset['date'], result_redeem_lr, label='predicted_redeem')plt.plot(testset['date'], testset['total_redeem_amt'], label='real_redeem')plt.legend(loc='best')plt.title("The distribution of real and predict redeem")plt.xlabel("Time")plt.ylabel("Amount")plt.show()fig = plt.figure(figsize=(10,4))sns.barplot(testset['date'].dt.day ,result_redeem_lr - testset['total_redeem_amt']) # 定義提取線下最好效果特征的函數def feature_extract(data: pd.DataFrame, model: object, types: str)->Tuple[List[str], List[float]]:features = [x for x in data.columns if x not in labels + ['date']]random.shuffle(features)results = []score = -1for i in features:score_update = np.mean(week_evalution_single(data[results + [i] + labels + ['date']], model, types))if score_update > score:score = score_updateresults.append(i)return results, scoredef robust_feature_extract(data: pd.DataFrame, model: object, types: str):results = []score = -1for i in range(10):results_update, score_update = feature_extract(data, model, types)if score_update > score:score = score_updateresults = results_updateprint(results_update, score_update)return results # 定義AIC,BIC評價指標def AIC(L: Iterable, delta: float, n_features: int):return L * np.log10(delta) + 2 * (n_features + 1) def AIC(L: Iterable, delta: float, n_features: int):return L * np.log10(delta) + (n_features + 1) * np.log10(L) # 使用AIC指標融合模型def feature_extract_AIC(data: pd.DataFrame, model: object, types: str)->Tuple[List[str], float]:features = [x for x in data.columns if x not in labels + ['date']]random.shuffle(features)results = []test_score = 1e9train_score = 0for i in features:test_score_update = np.mean(week_evalution_single(data[results + [i] + labels + ['date']], model, types)[0])if test_score_update < test_score:test_score = test_score_updateresults.append(i)trainset, testset = split_data_underline(data)feature = resultsmodel.fit(X=trainset[feature], y=trainset['total_' + types + '_amt'])train_result_lr = model.predict(trainset[feature])delta = mean_squared_error(train_result_lr, trainset['total_' + types + '_amt'])#delta = np.sum(AE(trainset['total_' + types + '_amt'], train_result_lr).apply(lambda x : np.exp(-x/0.1))*10)return results, AIC(len(trainset), delta, len(feature))def multi_model(data: pd.DataFrame, model: object, types: str)->Tuple[List[List[str]], float]:features = []weights = []for i in range(100):results_update, score_update = feature_extract_AIC(data, model, types)features.append(results_update)weights.append(score_update)avg = np.mean(weights)weights = [x - avg for x in weights]weights = [np.power((-1 * x / 2), 10) for x in weights]summ = np.sum(weights)weights = [x / summ for x in weights]return features, weights # 生成線上結果def generate_online_result(df: pd.DataFrame, feature: Iterable, model = LinearRegression(), target:str = 'total_purchase_amt')->Iterable:trainset, testset = split_data_online(df)model.fit(X=trainset[feature], y=trainset[target])result_purchase_lr = model.predict(testset[feature])return result_purchase_lr def generate_under_result(df: pd.DataFrame, feature: Iterable, model = LinearRegression(), target:str = 'total_purchase_amt')->Iterable:trainset, testset = split_data_underline(df)model.fit(X=trainset[feature], y=trainset[target])result_purchase_lr = model.predict(testset[feature])return result_purchase_lr # 生成線上提交的格式def normalize_upload_file(result_purchase_lr: Iterable, result_redeem_lr: Iterable, testset: pd.DataFrame)->pd.DataFrame:testset['total_purchase_amt'] = result_purchase_lrtestset['total_redeem_amt'] = result_redeem_lronline_upload = testset[['date','total_purchase_amt','total_redeem_amt']]online_upload['date'] = online_upload['date'].astype(str)online_upload['date'] = online_upload['date'].str.replace('-','')return online_upload # 線上結果可視化def draw_result(result_purchase_lr: Iterable, result_redeem_lr: Iterable, testset: pd.DataFrame):fig = plt.figure(figsize=(10,4))plt.plot(testset['date'].dt.day, result_purchase_lr, label='online_purchase')plt.plot(testset['date'].dt.day, result_redeem_lr, label='online_redeem')plt.legend(loc='best')plt.title("The predict values")plt.xlabel("Time")plt.ylabel("Amount") # 重載DataFrame加法def add_two_df(df1, df2, features = None, left_a = 0.45, right_a = 0.55):data = df1.copy()if not features:features = [x for x in data.columns if x != 'interval']for i in features:data[i] = (data[i] * left_a + df2[i] * right_a)return data # 重載DataFrame乘法def scale_df(df1, features = None, eta = 1):data = df1.copy()if not features:features = [x for x in data.columns if x != 'interval']for i in features:data[i] *= etareturn data

建模測試

僅使用IS特征

data = pd.read_csv('Dataset/feature0522.csv') data['date'] = pd.to_datetime(data['date']) trainset, testset = split_data_underline(data) result_purchase_lr = generate_under_result(data, [x for x in data.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_purchase_amt') result_redeem_lr = generate_under_result(data, [x for x in data.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_redeem_amt')

在八月份預測結果

total_AE(result_purchase_lr, result_redeem_lr, testset['total_purchase_amt'], testset['total_redeem_amt'])

滑窗測試結果

draw_eva_table(week_evalution_single(data, model=LinearRegression(), types = 'purchase'))

draw_eva_table(week_evalution_single(data, LinearRegression(), 'redeem'))

八月份預測圖與真實圖

visual(result_purchase_lr, result_redeem_lr, testset)




result_purchase_lr = generate_online_result(data, [x for x in trainset.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], LinearRegression(),'total_purchase_amt') result_redeem_lr = generate_online_result(data, [x for x in trainset.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], LinearRegression(),'total_redeem_amt')

九月份預測效果圖(線性)

trainset, testset = split_data_online(data) draw_result(result_purchase_lr, result_redeem_lr, testset)

normalize_upload_file(result_purchase_lr, result_redeem_lr, testset).to_csv('20190612_only_is.csv',index=False,header=None)

多模型對比

def multi_model_eva(data, types:str = 'purchase'):results = pd.DataFrame()for model in [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(solver='lbfgs'), xgb.XGBRegressor(objective='reg:squarederror')]:if results.empty:results = draw_eva_table(week_evalution_single(data, model, types)).rename(columns={0: repr(model).split('(')[0]})else:results = pd.merge(results, \draw_eva_table(week_evalution_single(data, model, types)).rename(columns={0: repr(model).split('(')[0]}), on='interval')results = results[['interval'] + [x for x in results.columns if x != 'interval']]return results add_two_df(multi_model_eva(data, 'purchase'), multi_model_eva(data, 'redeem'))

劣汰后特征對比

data_purchase = pd.read_csv('Feature/purchase_feature_droped_0614.csv') data_purchase['date'] = pd.to_datetime(data_purchase['date']) data_redeem = pd.read_csv('Feature/redeem_feature_droped_0614.csv') data_redeem['date'] = pd.to_datetime(data_redeem['date']) trainset_purchase, testset_purchase = split_data_underline(data_purchase) result_purchase_lr = generate_under_result(data_purchase, [x for x in data_purchase.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_purchase_amt') trainset_redeem, testset_redeem = split_data_underline(data_redeem) result_redeem_lr = generate_under_result(data_redeem, [x for x in data_redeem.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_redeem_amt') total_AE(result_purchase_lr, result_redeem_lr, testset_purchase['total_purchase_amt'], testset_redeem['total_redeem_amt']) add_two_df(multi_model_eva(data_purchase, 'purchase'), multi_model_eva(data_redeem, 'redeem'))

八月份預測效果(線性)

trainset, testset = split_data_underline(data) visual(result_purchase_lr, result_redeem_lr, testset)




result_purchase_lr = generate_online_result(data_purchase, [x for x in data_purchase.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], LinearRegression(),'total_purchase_amt') result_redeem_lr = generate_online_result(data_redeem, [x for x in data_redeem.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], LinearRegression(),'total_redeem_amt')

生成線上效果(線性)

trainset, testset = split_data_online(data) draw_result(result_purchase_lr, result_redeem_lr, testset)


purchase feature

‘dis_to_nowork’, ‘dis_to_work’, ‘dis_from_work’, ‘purchase_weekdayrate’,
‘redeem_dayrate’, ‘weekday_onehot_5’, ‘weekday_onehot_6’,
‘dis_from_nowork’, ‘is_holiday’, ‘weekday_onehot_1’, ‘weekday_onehot_2’,
‘weekday_onehot_0’, ‘dis_from_middleofweek’, ‘dis_from_holiendday’,
‘weekday_onehot_3’, ‘is_lastday_of_holiday’, ‘is_firstday_of_holiday’,
‘weekday_onehot_4’, ‘is_worked_yestday’, ‘is_second_week’,
‘is_third_week’, ‘dis_from_startofmonth’, ‘dis_from_holiday’,
‘dis_to_nowork%%%%dis_from_purchase_peak’, ‘total_purchase_amt’,
‘total_redeem_amt’, ‘date’

Redeem feature

‘is_work’, ‘dis_from_redeem_valley’, ‘purchase_weekdayrate’,
‘redeem_dayrate’, ‘weekday_onehot_5’, ‘is_gonna_work_tomorrow’,
‘is_holiday’, ‘dis_from_nowork’, ‘weekday_onehot_0’, ‘weekday_onehot_1’,
‘is_firstday_of_holiday’, ‘weekday_onehot_2’, ‘is_lastday_of_holiday’,
‘dis_from_holiday’, ‘is_work_on_sunday’, ‘is_firstday_of_work’,
‘is_secday_of_month’, ‘dis_from_holiendday’,
‘dis_from_redeem_valley%%%%dis_from_redeem_peak’, ‘total_purchase_amt’,
‘total_redeem_amt’, ‘date’

normalize_upload_file(result_purchase_lr, result_redeem_lr, testset).to_csv('20190614_droped.csv',index=False,header=None)

生成線上效果(MLP)

result_purchase_lr = generate_online_result(data_purchase, [x for x in data_purchase.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], MLPRegressor(solver='lbfgs'),'total_purchase_amt') result_redeem_lr = generate_online_result(data_redeem, [x for x in data_redeem.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], MLPRegressor(solver='lbfgs'),'total_redeem_amt') trainset, testset = split_data_online(data) draw_result(result_purchase_lr, result_redeem_lr, testset)

normalize_upload_file(result_purchase_lr, result_redeem_lr, testset).to_csv('20190614_droped_MLP.csv',index=False,header=None)

生成線上效果(Xgboost)

result_purchase_lr = generate_online_result(data_purchase, [x for x in data_purchase.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], xgb.XGBRegressor(objective='reg:squarederror'),'total_purchase_amt') result_redeem_lr = generate_online_result(data_redeem, [x for x in data_redeem.columns if x not in ['total_purchase_amt','total_redeem_amt','date']], xgb.XGBRegressor(objective='reg:squarederror'),'total_redeem_amt') trainset, testset = split_data_online(data) draw_result(result_purchase_lr, result_redeem_lr, testset)

normalize_upload_file(result_purchase_lr, result_redeem_lr, testset).to_csv('20190615_droped_XGB.csv',index=False,header=None)

AIC模型平均

purchase_features, purchase_weight = multi_model(data_purchase, model=LinearRegression(), types = 'purchase') redeem_features, redeem_weight = multi_model(data_redeem, model=LinearRegression(), types = 'redeem') def eva_for_aic(data_purchase, purchase_features, purchase_weight):results = pd.DataFrame()for index, feature in enumerate(purchase_features):if results.empty:results = scale_df(multi_model_eva(data_purchase[['date'] + labels + feature], 'purchase'), eta = purchase_weight[index])else:results = add_two_df(results, multi_model_eva(data_purchase[['date'] + labels + feature], 'purchase'), left_a = 1,right_a = purchase_weight[index])return results add_two_df(eva_for_aic(data_purchase, purchase_features, purchase_weight), eva_for_aic(data_redeem, redeem_features, redeem_weight))

針對殘差建模

data_purchase = pd.read_csv('Feature/residual_feature_purchase_0621.csv') data_purchase['date'] = pd.to_datetime(data_purchase['date']) data_redeem = pd.read_csv('Feature/residual_feature_redeem_0621.csv') data_redeem['date'] = pd.to_datetime(data_redeem['date']) base = pd.read_csv('Dataset/base.csv') def generate_residual_result(data, base, model=LinearRegression(), types = 'purchase', split_time = datetime.date(2014,8,1)):a_month = relativedelta(months=1)trainset = data[(datetime.date(2014,4,1) <= data['date']) & (data['date'] < split_time)]testset = data[(split_time <= data['date']) & (data['date'] < split_time + a_month)]feature = [x for x in data_purchase.columns if x not in ['total_purchase_amt','total_redeem_amt','date']]model.fit(X=trainset[feature], y=trainset['total_' + types + '_amt'])result_purchase_rate = model.predict(testset[feature])base['date'] = pd.to_datetime(base['date'], format= "%Y%m%d")result_purchase_cycle = np.array(base[(base['date'] >= split_time)&(base['date'] < split_time + a_month)]['total_'+types+'_predicted_by_cycle'])result_purchase_residual = result_purchase_rate * np.array(result_purchase_cycle)return result_purchase_residual def generate_evaluate_for_residual(model=LinearRegression()):result = []for i in [datetime.date(2014, 8, 1), datetime.date(2014, 7, 25), datetime.date(2014, 7, 18), datetime.date(2014, 7, 11), datetime.date(2014, 7, 4), datetime.date(2014, 6, 27), datetime.date(2014, 6,20)]:result_purchase_residual = generate_residual_result(data_purchase, base, model=model, types='purchase', split_time = i)result_redeem_residual = generate_residual_result(data_purchase, base, model=model, types='redeem', split_time= i)a_month = relativedelta(months=1)testset = data[(data['date'] >= i) & (data['date'] < i + a_month)]real_purchase = testset['total_purchase_amt']real_redeem = testset['total_redeem_amt']result.append(total_AE(result_purchase_residual, result_redeem_residual, real_purchase, real_redeem))return pd.DataFrame(result) def multi_model_eva_for_residual():results = pd.DataFrame()for model in [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), GradientBoostingRegressor(), MLPRegressor(solver='lbfgs'), xgb.XGBRegressor(objective='reg:squarederror')]:if results.empty:results = draw_eva_table(generate_evaluate_for_residual(model)).rename(columns={0: repr(model).split('(')[0]})else:results = pd.merge(results, \draw_eva_table(generate_evaluate_for_residual(model)).rename(columns={0: repr(model).split('(')[0]}))results = results[['interval'] + [x for x in results.columns if x != 'interval']]return results def generate_evaluate_for_cycle():result = []for i in [datetime.date(2014, 8, 1), datetime.date(2014, 7, 25), datetime.date(2014, 7, 18), datetime.date(2014, 7, 11), datetime.date(2014, 7, 4), datetime.date(2014, 6, 27), datetime.date(2014, 6,20)]:a_month = relativedelta(months=1)testset = base[(base['date'] >= i) & (base['date'] < i + a_month)].reset_index(drop=True)result_purchase_residual = testset['total_purchase_predicted_by_cycle']result_redeem_residual = testset['total_redeem_predicted_by_cycle']testset = data[(data['date'] >= i) & (data['date'] < i + a_month)].reset_index(drop=True)real_purchase = testset['total_purchase_amt']real_redeem = testset['total_redeem_amt']result.append(total_AE(result_purchase_residual, result_redeem_residual, real_purchase, real_redeem))return pd.DataFrame(result).rename(columns={0: 'PureTimeSeries'}) pd.merge(multi_model_eva_for_residual(), draw_eva_table(generate_evaluate_for_cycle()))

只使用周期因子在8月份的預測效果

_, testset = split_data_underline(data) real_purchase = testset['total_purchase_amt'] real_redeem = testset['total_redeem_amt'] result_purchase_cycle = np.array(base[(base['date'] >= datetime.date(2014,8,1))&(base['date'] < datetime.date(2014,9,1))]['total_purchase_predicted_by_cycle']) result_redeem_cycle = np.array(base[(base['date'] >= datetime.date(2014,8,1))&(base['date'] < datetime.date(2014,9,1))]['total_redeem_predicted_by_cycle']) total_AE(result_purchase_cycle, result_redeem_cycle, real_purchase, real_redeem) trainset, testset = split_data_underline(data) visual(result_purchase_cycle, result_redeem_cycle, testset)




只使用周期因子+預測殘差在8月份的預測效果(比單純用因子好)

trainset_purchase, testset_purchase = split_data_underline(data_purchase) result_purchase_rate = generate_under_result(data_purchase, [x for x in data_purchase.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_purchase_amt') trainset_redeem, testset_redeem = split_data_underline(data_redeem) result_redeem_rate = generate_under_result(data_redeem, [x for x in data_redeem.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_redeem_amt') total_AE(result_purchase_rate * result_purchase_cycle, result_redeem_rate * result_redeem_cycle, real_purchase, real_redeem) trainset, testset = split_data_underline(data) visual(result_purchase_rate * result_purchase_cycle, result_redeem_rate * result_redeem_cycle, testset)




生成線上結果

trainset_purchase, testset_purchase = split_data_online(data_purchase) result_purchase_rate = generate_online_result(data_purchase, [x for x in data_purchase.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_purchase_amt') trainset_redeem, testset_redeem = split_data_online(data_redeem) result_redeem_rate = generate_online_result(data_redeem, [x for x in data_redeem.columnsif x not in ['total_purchase_amt','total_redeem_amt','date']], target='total_redeem_amt') # 修正一下預測結果試試 result_purchase_rate = result_purchase_rate / np.mean(result_purchase_rate) result_redeem_rate = result_redeem_rate / np.mean(result_redeem_rate) result_purchase_cycle = np.array(base[(base['date'] >= datetime.date(2014,9,1))&(base['date'] < datetime.date(2014,10,1))]['total_purchase_predicted_by_cycle']) result_redeem_cycle = np.array(base[(base['date'] >= datetime.date(2014,9,1))&(base['date'] < datetime.date(2014,10,1))]['total_redeem_predicted_by_cycle']) result_purchase_residual = result_purchase_rate * result_purchase_cycle result_redeem_residual = result_redeem_rate * result_redeem_cycle draw_result(result_purchase_cycle, result_redeem_cycle, testset_redeem)

殘差處理后的結果

draw_result(result_purchase_residual, result_redeem_residual, testset_redeem)

normalize_upload_file(result_purchase_residual, result_redeem_residual, testset_redeem).to_csv('20190622_residual_liner.csv',index=False,header=None) result_score135 = pd.read_csv('Result/timeseries0606.csv',header=None) result_residual = normalize_upload_file(result_purchase_residual, result_redeem_residual, testset_redeem).reset_index(drop=True) result_residual['date'] = result_residual['date'].astype(int) days_need_to_change = [20140906,20140907,20140908,20140928 ] for index,row in result_score135.iterrows():if row[0] in days_need_to_change:result_score135.loc[index, 1] = result_residual.loc[index, 'total_purchase_amt']result_score135.loc[index, 2] = result_residual.loc[index, 'total_redeem_amt'] result_score135.to_csv('result135_fixed_by_residual_0621.csv',index=False,header=None)

總結

以上是生活随笔為你收集整理的【算法竞赛学习】资金流入流出预测-挑战Baseline_建模预测的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。