Google WideDeep Model
谷歌的GooglePlay的推薦模型Wide and Deep Learning 模型,目前已經(jīng)提供了tensorflow的開源版本。
本文使用keras完成:
#coding: utf-8
'''
用keras寫的google Wide&&Deep model
'''
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Merge
from sklearn.preprocessing import MinMaxScaler
#所有的數(shù)據(jù)列
COLUMNS = [
??? "age", "workclass", "fnlwgt", "education", "education_num", "marital_status",
??? "occupation", "relationship", "race", "gender", "capital_gain", "capital_loss",
??? "hours_per_week", "native_country", "income_bracket"
]
#標(biāo)簽列
LABEL_COLUMN = "label"
#類別型特征變量
CATEGORICAL_COLUMNS = [
??? "workclass", "education", "marital_status", "occupation", "relationship",
??? "race", "gender", "native_country"
]
#連續(xù)值特征變量
CONTINUOUS_COLUMNS = [
??? "age", "education_num", "capital_gain", "capital_loss", "hours_per_week"
]
#加載文件
def load(filename):
??? with open(filename, 'r') as f:
??????? skiprows = 1 if 'test' in filename else 0
??????? df = pd.read_csv(
??????????? f, names=COLUMNS, skipinitialspace=True, skiprows=skiprows, engine='python'
??????? )
??????? #缺省值處理
??????? df = df.dropna(how='any', axis=0)
??? return df
#預(yù)處理
def preprocess(df):
??? df[LABEL_COLUMN] = df['income_bracket'].apply(lambda x: ">50K" in x).astype(int)
??? df.pop("income_bracket")
??? y = df[LABEL_COLUMN].values
??? df.pop(LABEL_COLUMN)
?? ?
??? df = pd.get_dummies(df, columns=[x for x in CATEGORICAL_COLUMNS])
??? # TODO: 對(duì)特征進(jìn)行選擇,使得網(wǎng)絡(luò)更高效
?? ?
??? # TODO: 特征工程,比如加入交叉與組合特征
??? # from sklearn.preprocessing import PolynomialFeatures
??? # X = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False).fit_transform(X)
?? ?
??? df = pd.DataFrame(MinMaxScaler().fit_transform(df), columns=df.columns)
??? X = df.values
??? return X, y
def main():
??? df_train = load('E:\\adult.data')
??? df_test = load('E:\\adult.test')
??? df = pd.concat([df_train, df_test])
??? train_len = len(df_train)
?? ?
??? X, y = preprocess(df)
??? X_train = X[:train_len]
??? y_train = y[:train_len]
??? X_test = X[train_len:]
??? y_test = y[train_len:]
?? ?
??? #Wide部分
??? wide = Sequential()
??? wide.add(Dense(1, input_dim=X_train.shape[1]))
?? ?
??? #Deep部分
??? deep = Sequential()
??? # TODO: 添加embedding層
??? deep.add(Dense(input_dim=X_train.shape[1], output_dim=100, activation='relu'))
??? #deep.add(Dense(100, activation='relu'))
??? deep.add(Dense(input_dim=100, output_dim=32, activation='relu'))
??? #deep.add(Dense(50, activation='relu'))
??? deep.add(Dense(input_dim=32, output_dim=8))
??? deep.add(Dense(1, activation='sigmoid'))
?? ?
??? #Wide和Deep拼接
??? model = Sequential()
??? model.add(Merge([wide, deep], mode='concat', concat_axis=1))
??? model.add(Dense(1, activation='sigmoid'))
?? ?
??? #編譯模型
??? model.compile(
??????? optimizer='rmsprop',
??????? loss='binary_crossentropy',
??????? metrics=['accuracy']
??? )
?? ?
??? #模型訓(xùn)練
??? model.fit([X_train, X_train], y_train, nb_epoch=10, batch_size=32)
?? ?
??? #loss與準(zhǔn)確率評(píng)估
??? loss, accuracy = model.evaluate([X_test, X_test], y_test)
??? print('\n', 'test accuracy:', accuracy)
?? ?
if __name__ == '__main__':
??? main()
總結(jié)
以上是生活随笔為你收集整理的Google WideDeep Model的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: BP+SGD+激活函数+代价函数+基本问
- 下一篇: geometry-api-java 学习