点击率预估
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import log_loss
字段说明:
df_train = pd.read_csv('./data/train.csv')
print(df_train.shape)
df_train[:3]
(1599, 41)
Id | Label | I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | ... | C17 | C18 | C19 | C20 | C21 | C22 | C23 | C24 | C25 | C26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10000743 | 1 | 1.0 | 0 | 1.0 | NaN | 227.0 | 1.0 | 173.0 | 18.0 | ... | 3486227d | e88ffc9d | c393dc22 | b1252a9d | 57c90cd9 | NaN | bcdee96c | 4d19a3eb | cb079c2d | 456c12a0 |
1 | 10000159 | 1 | 4.0 | 1 | 1.0 | 2.0 | 27.0 | 2.0 | 4.0 | 2.0 | ... | 07c540c4 | 92555263 | NaN | NaN | 242bb710 | NaN | 3a171ecb | 72c78f11 | NaN | NaN |
2 | 10001166 | 1 | 0.0 | 806 | NaN | NaN | 1752.0 | 142.0 | 2.0 | 0.0 | ... | 07c540c4 | 25c88e42 | 21ddcdc9 | b1252a9d | a0136dd2 | NaN | 32c7478e | 8fc66e78 | 001f3601 | f37f3967 |
3 rows × 41 columns
df_test = pd.read_csv('./data/test.csv')
print(df_test.shape)
df_test[:3]
(400, 40)
Id | I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | ... | C17 | C18 | C19 | C20 | C21 | C22 | C23 | C24 | C25 | C26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10000405 | NaN | -1 | NaN | NaN | 8020.0 | 26.0 | 6.0 | 0.0 | 80.0 | ... | e5ba7672 | 7119e567 | 1d04f4a4 | b1252a9d | d5f54153 | NaN | 32c7478e | a9d771cd | c9f3bea7 | 0a47000d |
1 | 10001189 | NaN | -1 | NaN | NaN | 17881.0 | 9.0 | 8.0 | 0.0 | 0.0 | ... | e5ba7672 | 51369abb | NaN | NaN | d4b6b7e8 | NaN | 32c7478e | 37821b83 | NaN | NaN |
2 | 10000674 | 0.0 | 0 | 2.0 | 13.0 | 2904.0 | 104.0 | 1.0 | 3.0 | 100.0 | ... | e5ba7672 | bd17c3da | 966f1c31 | a458ea53 | 1d1393f4 | ad3062eb | 32c7478e | 3fdb382b | 010f6491 | 49d68486 |
3 rows × 40 columns
# 去掉id列, 把测试集和训练集合并, 填充缺失值
df_train.drop(['Id'], axis=1, inplace=True)
df_test.drop(['Id'], axis=1, inplace=True)
df_test['Label'] = -1
data = pd.concat([df_train, df_test])
data.fillna(-1, inplace=True)
continuous_fea = ['I'+str(i+1) for i in range(13)]
category_fea = ['C'+str(i+1) for i in range(26)]
def data_scaler(data):
# 连续特征归一化
scaler = MinMaxScaler()
for col in continuous_fea:
data[col] = scaler.fit_transform(data[col].values.reshape(-1, 1))
return data
def data_encoder(data, columns_set):
# 离散特征one-hot编码
for col in columns_set:
onehot_feats = pd.get_dummies(data[col], prefix=col)
data.drop([col], axis=1, inplace=True)
data = pd.concat([data, onehot_feats], axis=1)
return data
def get_train_test(data):
feat_columns = [i for i in data.columns if i != 'Label']
train = data[data['Label'] != -1][feat_columns]
target = data[data['Label'] != -1]['Label']
test = data[data['Label'] == -1][feat_columns]
return train, target, test
def split_train(train, target, test_size=0.2, random_state=2020):
x_train, x_val, y_train, y_val = train_test_split(train, target, test_size=test_size, random_state=random_state)
print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)
return x_train, x_val, y_train, y_val
df_lr = data_scaler(data.copy())
df_lr = data_encoder(df_lr, category_fea)
train, target, test = get_train_test(df_lr)
x_train, x_val, y_train, y_val = split_train(train, target)
(1279, 13104) (1279,) (320, 13104) (320,)
x_train[:3]
I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 | ... | C26_fb7edec8 | C26_fbe10aa8 | C26_fcd456fa | C26_fcd5a3f4 | C26_fd6ccd1e | C26_fdd86175 | C26_fe7d4d4a | C26_ff2cdc2b | C26_ff86d5e0 | C26_ffc123e9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
549 | 0.104167 | 0.000381 | 0.000946 | 0.090909 | 0.000122 | 0.001725 | 0.006028 | 0.010949 | 0.001419 | 0.4 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1379 | 0.000000 | 0.000254 | 0.000828 | 0.056818 | 0.007091 | 0.020047 | 0.003617 | 0.009124 | 0.053920 | 0.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1306 | 0.000000 | 0.000127 | 0.000355 | 0.045455 | 0.005946 | 0.000862 | 0.001808 | 0.007299 | 0.000709 | 0.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 rows × 13104 columns
# 建立模型
lr = LogisticRegression()
lr.fit(x_train, y_train)
train_logloss = log_loss(y_train, lr.predict_proba(x_train)[:, 1]) # −(ylog(p)+(1−y)log(1−p)) log_loss
val_logloss = log_loss(y_val, lr.predict_proba(x_val)[:, 1])
print('train_logloss: ', train_logloss)
print('val_logloss: ', val_logloss)
train_logloss: 0.12423395164795313 val_logloss: 0.44407245698751546
df_gbdt = data_encoder(data.copy(), category_fea)
train, target, test = get_train_test(df_gbdt)
x_train, x_val, y_train, y_val = split_train(train, target)
(1279, 13104) (1279,) (320, 13104) (320,)
x_train[:3]
I1 | I2 | I3 | I4 | I5 | I6 | I7 | I8 | I9 | I10 | ... | C26_fb7edec8 | C26_fbe10aa8 | C26_fcd456fa | C26_fcd5a3f4 | C26_fd6ccd1e | C26_fdd86175 | C26_fe7d4d4a | C26_ff2cdc2b | C26_ff86d5e0 | C26_ffc123e9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
549 | 9.0 | 1 | 7.0 | 7.0 | 123.0 | 7.0 | 9.0 | 5.0 | 7.0 | 1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1379 | -1.0 | 0 | 6.0 | 4.0 | 7198.0 | 92.0 | 5.0 | 4.0 | 303.0 | -1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1306 | -1.0 | -1 | 2.0 | 3.0 | 6035.0 | 3.0 | 2.0 | 3.0 | 3.0 | -1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 rows × 13104 columns
# 建模
gbm = lgb.LGBMClassifier(boosting_type='gbdt', # 这里用gbdt
objective='binary',
subsample=0.8,
min_child_weight=0.5,
colsample_bytree=0.7,
num_leaves=100,
max_depth=12,
learning_rate=0.01,
n_estimators=10000
)
gbm.fit(x_train, y_train,
eval_set=[(x_train, y_train), (x_val, y_val)],
eval_names=['train', 'val'],
eval_metric='binary_logloss',
early_stopping_rounds=100,
)
train_logloss = log_loss(y_train, gbm.predict_proba(x_train)[:, 1]) # −(ylog(p)+(1−y)log(1−p)) log_loss
val_logloss = log_loss(y_val, gbm.predict_proba(x_val)[:, 1])
print('train_logloss: ', train_logloss)
print('val_logloss: ', val_logloss)
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead. _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
[1] train's binary_logloss: 0.523857 val's binary_logloss: 0.457806 [2] train's binary_logloss: 0.521371 val's binary_logloss: 0.457213 [3] train's binary_logloss: 0.519084 val's binary_logloss: 0.456616 [4] train's binary_logloss: 0.516882 val's binary_logloss: 0.456046 [5] train's binary_logloss: 0.514449 val's binary_logloss: 0.455649 [6] train's binary_logloss: 0.512277 val's binary_logloss: 0.455319 [7] train's binary_logloss: 0.509973 val's binary_logloss: 0.455039 [8] train's binary_logloss: 0.507717 val's binary_logloss: 0.454523 [9] train's binary_logloss: 0.505668 val's binary_logloss: 0.454546 [10] train's binary_logloss: 0.503491 val's binary_logloss: 0.454134 [11] train's binary_logloss: 0.501469 val's binary_logloss: 0.453151 [12] train's binary_logloss: 0.499463 val's binary_logloss: 0.452609 [13] train's binary_logloss: 0.497257 val's binary_logloss: 0.452419 [14] train's binary_logloss: 0.495206 val's binary_logloss: 0.451944 [15] train's binary_logloss: 0.493127 val's binary_logloss: 0.451356 [16] train's binary_logloss: 0.491215 val's binary_logloss: 0.451181 [17] train's binary_logloss: 0.489258 val's binary_logloss: 0.450615 [18] train's binary_logloss: 0.487424 val's binary_logloss: 0.450754 [19] train's binary_logloss: 0.485589 val's binary_logloss: 0.450428 [20] train's binary_logloss: 0.483753 val's binary_logloss: 0.450043 [21] train's binary_logloss: 0.481871 val's binary_logloss: 0.449924 [22] train's binary_logloss: 0.480233 val's binary_logloss: 0.449518 [23] train's binary_logloss: 0.478629 val's binary_logloss: 0.449086 [24] train's binary_logloss: 0.476933 val's binary_logloss: 0.448878 [25] train's binary_logloss: 0.47504 val's binary_logloss: 0.448413 [26] train's binary_logloss: 0.473416 val's binary_logloss: 0.44809 [27] train's binary_logloss: 0.471608 val's binary_logloss: 0.448072 [28] train's binary_logloss: 0.469969 val's binary_logloss: 0.447721 [29] train's binary_logloss: 0.46833 val's binary_logloss: 0.447358 [30] train's binary_logloss: 0.466517 val's binary_logloss: 0.447445 [31] train's binary_logloss: 0.464722 val's binary_logloss: 0.447317 [32] train's binary_logloss: 0.463079 val's binary_logloss: 0.447152 [33] train's binary_logloss: 0.461319 val's binary_logloss: 0.446674 [34] train's binary_logloss: 0.459722 val's binary_logloss: 0.446455 [35] train's binary_logloss: 0.457986 val's binary_logloss: 0.446345 [36] train's binary_logloss: 0.456293 val's binary_logloss: 0.446229 [37] train's binary_logloss: 0.454638 val's binary_logloss: 0.446135 [38] train's binary_logloss: 0.452929 val's binary_logloss: 0.445476 [39] train's binary_logloss: 0.451349 val's binary_logloss: 0.445208 [40] train's binary_logloss: 0.449759 val's binary_logloss: 0.444538 [41] train's binary_logloss: 0.448181 val's binary_logloss: 0.444151 [42] train's binary_logloss: 0.446656 val's binary_logloss: 0.443786 [43] train's binary_logloss: 0.445243 val's binary_logloss: 0.443472 [44] train's binary_logloss: 0.443697 val's binary_logloss: 0.443249 [45] train's binary_logloss: 0.442193 val's binary_logloss: 0.443062 [46] train's binary_logloss: 0.440697 val's binary_logloss: 0.442784 [47] train's binary_logloss: 0.439353 val's binary_logloss: 0.442828 [48] train's binary_logloss: 0.437963 val's binary_logloss: 0.442524 [49] train's binary_logloss: 0.436672 val's binary_logloss: 0.442423 [50] train's binary_logloss: 0.435301 val's binary_logloss: 0.442193 [51] train's binary_logloss: 0.433889 val's binary_logloss: 0.441743 [52] train's binary_logloss: 0.432548 val's binary_logloss: 0.441336 [53] train's binary_logloss: 0.431017 val's binary_logloss: 0.441085 [54] train's binary_logloss: 0.429677 val's binary_logloss: 0.440932 [55] train's binary_logloss: 0.428271 val's binary_logloss: 0.440676 [56] train's binary_logloss: 0.42698 val's binary_logloss: 0.44047 [57] train's binary_logloss: 0.425539 val's binary_logloss: 0.440397 [58] train's binary_logloss: 0.424123 val's binary_logloss: 0.440136 [59] train's binary_logloss: 0.422809 val's binary_logloss: 0.439885 [60] train's binary_logloss: 0.42146 val's binary_logloss: 0.439716 [61] train's binary_logloss: 0.420068 val's binary_logloss: 0.439454 [62] train's binary_logloss: 0.418745 val's binary_logloss: 0.439493 [63] train's binary_logloss: 0.41755 val's binary_logloss: 0.439334 [64] train's binary_logloss: 0.416342 val's binary_logloss: 0.439451 [65] train's binary_logloss: 0.415058 val's binary_logloss: 0.439173 [66] train's binary_logloss: 0.413658 val's binary_logloss: 0.43881 [67] train's binary_logloss: 0.4123 val's binary_logloss: 0.438656 [68] train's binary_logloss: 0.410981 val's binary_logloss: 0.438352 [69] train's binary_logloss: 0.409717 val's binary_logloss: 0.438017 [70] train's binary_logloss: 0.408442 val's binary_logloss: 0.437799 [71] train's binary_logloss: 0.407242 val's binary_logloss: 0.437543 [72] train's binary_logloss: 0.406051 val's binary_logloss: 0.437359 [73] train's binary_logloss: 0.404727 val's binary_logloss: 0.437274 [74] train's binary_logloss: 0.403419 val's binary_logloss: 0.437135 [75] train's binary_logloss: 0.402132 val's binary_logloss: 0.437162 [76] train's binary_logloss: 0.401026 val's binary_logloss: 0.437043 [77] train's binary_logloss: 0.399681 val's binary_logloss: 0.436681 [78] train's binary_logloss: 0.398398 val's binary_logloss: 0.436374 [79] train's binary_logloss: 0.397186 val's binary_logloss: 0.436306 [80] train's binary_logloss: 0.395998 val's binary_logloss: 0.436238 [81] train's binary_logloss: 0.394678 val's binary_logloss: 0.436146 [82] train's binary_logloss: 0.393508 val's binary_logloss: 0.436026 [83] train's binary_logloss: 0.392385 val's binary_logloss: 0.435897 [84] train's binary_logloss: 0.391328 val's binary_logloss: 0.435948 [85] train's binary_logloss: 0.390191 val's binary_logloss: 0.435803 [86] train's binary_logloss: 0.389034 val's binary_logloss: 0.435726 [87] train's binary_logloss: 0.388039 val's binary_logloss: 0.435854 [88] train's binary_logloss: 0.3868 val's binary_logloss: 0.435683 [89] train's binary_logloss: 0.38566 val's binary_logloss: 0.435685 [90] train's binary_logloss: 0.384527 val's binary_logloss: 0.435758 [91] train's binary_logloss: 0.383388 val's binary_logloss: 0.435756 [92] train's binary_logloss: 0.38231 val's binary_logloss: 0.435815 [93] train's binary_logloss: 0.381094 val's binary_logloss: 0.435682 [94] train's binary_logloss: 0.379807 val's binary_logloss: 0.435561 [95] train's binary_logloss: 0.37872 val's binary_logloss: 0.435383 [96] train's binary_logloss: 0.377682 val's binary_logloss: 0.435011 [97] train's binary_logloss: 0.376613 val's binary_logloss: 0.434783 [98] train's binary_logloss: 0.375463 val's binary_logloss: 0.434865 [99] train's binary_logloss: 0.374318 val's binary_logloss: 0.434926 [100] train's binary_logloss: 0.373331 val's binary_logloss: 0.435077 [101] train's binary_logloss: 0.372209 val's binary_logloss: 0.43519 [102] train's binary_logloss: 0.371232 val's binary_logloss: 0.434997 [103] train's binary_logloss: 0.370158 val's binary_logloss: 0.43491 [104] train's binary_logloss: 0.369108 val's binary_logloss: 0.434854 [105] train's binary_logloss: 0.368046 val's binary_logloss: 0.435009 [106] train's binary_logloss: 0.366972 val's binary_logloss: 0.434982 [107] train's binary_logloss: 0.365952 val's binary_logloss: 0.435174 [108] train's binary_logloss: 0.365084 val's binary_logloss: 0.435282 [109] train's binary_logloss: 0.364023 val's binary_logloss: 0.435066 [110] train's binary_logloss: 0.363132 val's binary_logloss: 0.43526 [111] train's binary_logloss: 0.362204 val's binary_logloss: 0.435537 [112] train's binary_logloss: 0.361217 val's binary_logloss: 0.435766 [113] train's binary_logloss: 0.360214 val's binary_logloss: 0.43583 [114] train's binary_logloss: 0.35936 val's binary_logloss: 0.435792 [115] train's binary_logloss: 0.358383 val's binary_logloss: 0.435522 [116] train's binary_logloss: 0.357288 val's binary_logloss: 0.435637 [117] train's binary_logloss: 0.356216 val's binary_logloss: 0.435376 [118] train's binary_logloss: 0.355096 val's binary_logloss: 0.435324 [119] train's binary_logloss: 0.354101 val's binary_logloss: 0.43536 [120] train's binary_logloss: 0.353266 val's binary_logloss: 0.435261 [121] train's binary_logloss: 0.352268 val's binary_logloss: 0.435374 [122] train's binary_logloss: 0.351148 val's binary_logloss: 0.435321 [123] train's binary_logloss: 0.350471 val's binary_logloss: 0.435319 [124] train's binary_logloss: 0.349503 val's binary_logloss: 0.435183 [125] train's binary_logloss: 0.348856 val's binary_logloss: 0.435206 [126] train's binary_logloss: 0.348053 val's binary_logloss: 0.435206 [127] train's binary_logloss: 0.347443 val's binary_logloss: 0.434997 [128] train's binary_logloss: 0.346512 val's binary_logloss: 0.434941 [129] train's binary_logloss: 0.345885 val's binary_logloss: 0.434922 [130] train's binary_logloss: 0.345112 val's binary_logloss: 0.43477 [131] train's binary_logloss: 0.344716 val's binary_logloss: 0.434628 [132] train's binary_logloss: 0.343697 val's binary_logloss: 0.434359 [133] train's binary_logloss: 0.342596 val's binary_logloss: 0.434238 [134] train's binary_logloss: 0.341666 val's binary_logloss: 0.434321 [135] train's binary_logloss: 0.340742 val's binary_logloss: 0.434473 [136] train's binary_logloss: 0.340167 val's binary_logloss: 0.43421 [137] train's binary_logloss: 0.339296 val's binary_logloss: 0.434303 [138] train's binary_logloss: 0.338391 val's binary_logloss: 0.434198 [139] train's binary_logloss: 0.337404 val's binary_logloss: 0.434061 [140] train's binary_logloss: 0.336569 val's binary_logloss: 0.434292 [141] train's binary_logloss: 0.335612 val's binary_logloss: 0.43427 [142] train's binary_logloss: 0.334555 val's binary_logloss: 0.434261 [143] train's binary_logloss: 0.333721 val's binary_logloss: 0.434249 [144] train's binary_logloss: 0.332994 val's binary_logloss: 0.434215 [145] train's binary_logloss: 0.332136 val's binary_logloss: 0.434258 [146] train's binary_logloss: 0.331524 val's binary_logloss: 0.434092 [147] train's binary_logloss: 0.330627 val's binary_logloss: 0.434013 [148] train's binary_logloss: 0.330059 val's binary_logloss: 0.434193 [149] train's binary_logloss: 0.329078 val's binary_logloss: 0.434141 [150] train's binary_logloss: 0.328427 val's binary_logloss: 0.434367 [151] train's binary_logloss: 0.327484 val's binary_logloss: 0.434264 [152] train's binary_logloss: 0.326809 val's binary_logloss: 0.434526 [153] train's binary_logloss: 0.326118 val's binary_logloss: 0.434624 [154] train's binary_logloss: 0.325297 val's binary_logloss: 0.434752 [155] train's binary_logloss: 0.324526 val's binary_logloss: 0.434965 [156] train's binary_logloss: 0.323737 val's binary_logloss: 0.435022 [157] train's binary_logloss: 0.323309 val's binary_logloss: 0.43498 [158] train's binary_logloss: 0.322412 val's binary_logloss: 0.435164 [159] train's binary_logloss: 0.321956 val's binary_logloss: 0.435101 [160] train's binary_logloss: 0.321577 val's binary_logloss: 0.435013 [161] train's binary_logloss: 0.321154 val's binary_logloss: 0.435011 [162] train's binary_logloss: 0.32053 val's binary_logloss: 0.435055 [163] train's binary_logloss: 0.320052 val's binary_logloss: 0.435117 [164] train's binary_logloss: 0.319495 val's binary_logloss: 0.435364 [165] train's binary_logloss: 0.318879 val's binary_logloss: 0.435365 [166] train's binary_logloss: 0.318222 val's binary_logloss: 0.435419 [167] train's binary_logloss: 0.317357 val's binary_logloss: 0.435392 [168] train's binary_logloss: 0.316691 val's binary_logloss: 0.435438 [169] train's binary_logloss: 0.316256 val's binary_logloss: 0.435394 [170] train's binary_logloss: 0.315882 val's binary_logloss: 0.435375 [171] train's binary_logloss: 0.315545 val's binary_logloss: 0.435288 [172] train's binary_logloss: 0.314797 val's binary_logloss: 0.435324 [173] train's binary_logloss: 0.313944 val's binary_logloss: 0.435237 [174] train's binary_logloss: 0.313389 val's binary_logloss: 0.435224 [175] train's binary_logloss: 0.313052 val's binary_logloss: 0.4353 [176] train's binary_logloss: 0.312292 val's binary_logloss: 0.435154 [177] train's binary_logloss: 0.311895 val's binary_logloss: 0.435003 [178] train's binary_logloss: 0.311528 val's binary_logloss: 0.434973 [179] train's binary_logloss: 0.310734 val's binary_logloss: 0.434731 [180] train's binary_logloss: 0.31011 val's binary_logloss: 0.434715 [181] train's binary_logloss: 0.309429 val's binary_logloss: 0.434621 [182] train's binary_logloss: 0.308658 val's binary_logloss: 0.434776 [183] train's binary_logloss: 0.308014 val's binary_logloss: 0.434838 [184] train's binary_logloss: 0.307468 val's binary_logloss: 0.434988 [185] train's binary_logloss: 0.307104 val's binary_logloss: 0.434971 [186] train's binary_logloss: 0.306441 val's binary_logloss: 0.434988 [187] train's binary_logloss: 0.305569 val's binary_logloss: 0.435141 [188] train's binary_logloss: 0.305271 val's binary_logloss: 0.435215 [189] train's binary_logloss: 0.304607 val's binary_logloss: 0.435181 [190] train's binary_logloss: 0.303981 val's binary_logloss: 0.435055 [191] train's binary_logloss: 0.303315 val's binary_logloss: 0.435251 [192] train's binary_logloss: 0.30274 val's binary_logloss: 0.435405 [193] train's binary_logloss: 0.301864 val's binary_logloss: 0.435372 [194] train's binary_logloss: 0.301484 val's binary_logloss: 0.435452 [195] train's binary_logloss: 0.301014 val's binary_logloss: 0.435727 [196] train's binary_logloss: 0.300083 val's binary_logloss: 0.435756 [197] train's binary_logloss: 0.299544 val's binary_logloss: 0.435986 [198] train's binary_logloss: 0.299242 val's binary_logloss: 0.436053 [199] train's binary_logloss: 0.298515 val's binary_logloss: 0.43649 [200] train's binary_logloss: 0.298193 val's binary_logloss: 0.436511 [201] train's binary_logloss: 0.297521 val's binary_logloss: 0.436551 [202] train's binary_logloss: 0.296716 val's binary_logloss: 0.436666 [203] train's binary_logloss: 0.295858 val's binary_logloss: 0.436609 [204] train's binary_logloss: 0.295111 val's binary_logloss: 0.43681 [205] train's binary_logloss: 0.294636 val's binary_logloss: 0.436886 [206] train's binary_logloss: 0.293799 val's binary_logloss: 0.437118 [207] train's binary_logloss: 0.293252 val's binary_logloss: 0.437264 [208] train's binary_logloss: 0.29258 val's binary_logloss: 0.437418 [209] train's binary_logloss: 0.292261 val's binary_logloss: 0.437434 [210] train's binary_logloss: 0.291606 val's binary_logloss: 0.437469 [211] train's binary_logloss: 0.291045 val's binary_logloss: 0.437558 [212] train's binary_logloss: 0.290564 val's binary_logloss: 0.43754 [213] train's binary_logloss: 0.290205 val's binary_logloss: 0.43755 [214] train's binary_logloss: 0.289993 val's binary_logloss: 0.437641 [215] train's binary_logloss: 0.289417 val's binary_logloss: 0.437635 [216] train's binary_logloss: 0.288719 val's binary_logloss: 0.43774 [217] train's binary_logloss: 0.288504 val's binary_logloss: 0.437777 [218] train's binary_logloss: 0.287934 val's binary_logloss: 0.43793 [219] train's binary_logloss: 0.287508 val's binary_logloss: 0.438196 [220] train's binary_logloss: 0.286953 val's binary_logloss: 0.438378 [221] train's binary_logloss: 0.286314 val's binary_logloss: 0.438319 [222] train's binary_logloss: 0.285475 val's binary_logloss: 0.438295 [223] train's binary_logloss: 0.284867 val's binary_logloss: 0.43833 [224] train's binary_logloss: 0.284166 val's binary_logloss: 0.438426 [225] train's binary_logloss: 0.28362 val's binary_logloss: 0.438208 [226] train's binary_logloss: 0.282989 val's binary_logloss: 0.43842 [227] train's binary_logloss: 0.282301 val's binary_logloss: 0.438607 [228] train's binary_logloss: 0.281832 val's binary_logloss: 0.438457 [229] train's binary_logloss: 0.28132 val's binary_logloss: 0.438614 [230] train's binary_logloss: 0.28054 val's binary_logloss: 0.438824 [231] train's binary_logloss: 0.280026 val's binary_logloss: 0.438863 [232] train's binary_logloss: 0.27956 val's binary_logloss: 0.438812 [233] train's binary_logloss: 0.279157 val's binary_logloss: 0.438906 [234] train's binary_logloss: 0.27876 val's binary_logloss: 0.439077 [235] train's binary_logloss: 0.278232 val's binary_logloss: 0.439069 [236] train's binary_logloss: 0.277598 val's binary_logloss: 0.439115 [237] train's binary_logloss: 0.277383 val's binary_logloss: 0.439168 [238] train's binary_logloss: 0.276814 val's binary_logloss: 0.439099 [239] train's binary_logloss: 0.276113 val's binary_logloss: 0.439041 [240] train's binary_logloss: 0.275878 val's binary_logloss: 0.439028 [241] train's binary_logloss: 0.275332 val's binary_logloss: 0.438959 [242] train's binary_logloss: 0.275045 val's binary_logloss: 0.438895 [243] train's binary_logloss: 0.274489 val's binary_logloss: 0.438907 [244] train's binary_logloss: 0.274296 val's binary_logloss: 0.438981 [245] train's binary_logloss: 0.273753 val's binary_logloss: 0.439018 [246] train's binary_logloss: 0.273214 val's binary_logloss: 0.439103 [247] train's binary_logloss: 0.272726 val's binary_logloss: 0.439223 train_logloss: 0.3306267544890511 val_logloss: 0.4340127102761729
df_gbdt = data_encoder(data.copy(), category_fea)
train, target, test = get_train_test(df_gbdt)
x_train, x_val, y_train, y_val = split_train(train, target)
(1279, 13104) (1279,) (320, 13104) (320,)
gbm = lgb.LGBMClassifier(objective='binary',
subsample= 0.8,
min_child_weight= 0.5,
colsample_bytree= 0.7,
num_leaves=100,
max_depth = 12,
learning_rate=0.01,
n_estimators=1000,
)
gbm.fit(x_train, y_train,
eval_set = [(x_train, y_train), (x_val, y_val)],
eval_names = ['train', 'val'],
eval_metric = 'binary_logloss',
early_stopping_rounds = 100,
)
model = gbm.booster_
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py:726: UserWarning: 'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead. _log_warning("'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. "
[1] train's binary_logloss: 0.523857 val's binary_logloss: 0.457806 [2] train's binary_logloss: 0.521371 val's binary_logloss: 0.457213 [3] train's binary_logloss: 0.519084 val's binary_logloss: 0.456616 [4] train's binary_logloss: 0.516882 val's binary_logloss: 0.456046 [5] train's binary_logloss: 0.514449 val's binary_logloss: 0.455649 [6] train's binary_logloss: 0.512277 val's binary_logloss: 0.455319 [7] train's binary_logloss: 0.509973 val's binary_logloss: 0.455039 [8] train's binary_logloss: 0.507717 val's binary_logloss: 0.454523 [9] train's binary_logloss: 0.505668 val's binary_logloss: 0.454546 [10] train's binary_logloss: 0.503491 val's binary_logloss: 0.454134 [11] train's binary_logloss: 0.501469 val's binary_logloss: 0.453151 [12] train's binary_logloss: 0.499463 val's binary_logloss: 0.452609 [13] train's binary_logloss: 0.497257 val's binary_logloss: 0.452419 [14] train's binary_logloss: 0.495206 val's binary_logloss: 0.451944 [15] train's binary_logloss: 0.493127 val's binary_logloss: 0.451356 [16] train's binary_logloss: 0.491215 val's binary_logloss: 0.451181 [17] train's binary_logloss: 0.489258 val's binary_logloss: 0.450615 [18] train's binary_logloss: 0.487424 val's binary_logloss: 0.450754 [19] train's binary_logloss: 0.485589 val's binary_logloss: 0.450428 [20] train's binary_logloss: 0.483753 val's binary_logloss: 0.450043 [21] train's binary_logloss: 0.481871 val's binary_logloss: 0.449924 [22] train's binary_logloss: 0.480233 val's binary_logloss: 0.449518 [23] train's binary_logloss: 0.478629 val's binary_logloss: 0.449086 [24] train's binary_logloss: 0.476933 val's binary_logloss: 0.448878 [25] train's binary_logloss: 0.47504 val's binary_logloss: 0.448413 [26] train's binary_logloss: 0.473416 val's binary_logloss: 0.44809 [27] train's binary_logloss: 0.471608 val's binary_logloss: 0.448072 [28] train's binary_logloss: 0.469969 val's binary_logloss: 0.447721 [29] train's binary_logloss: 0.46833 val's binary_logloss: 0.447358 [30] train's binary_logloss: 0.466517 val's binary_logloss: 0.447445 [31] train's binary_logloss: 0.464722 val's binary_logloss: 0.447317 [32] train's binary_logloss: 0.463079 val's binary_logloss: 0.447152 [33] train's binary_logloss: 0.461319 val's binary_logloss: 0.446674 [34] train's binary_logloss: 0.459722 val's binary_logloss: 0.446455 [35] train's binary_logloss: 0.457986 val's binary_logloss: 0.446345 [36] train's binary_logloss: 0.456293 val's binary_logloss: 0.446229 [37] train's binary_logloss: 0.454638 val's binary_logloss: 0.446135 [38] train's binary_logloss: 0.452929 val's binary_logloss: 0.445476 [39] train's binary_logloss: 0.451349 val's binary_logloss: 0.445208 [40] train's binary_logloss: 0.449759 val's binary_logloss: 0.444538 [41] train's binary_logloss: 0.448181 val's binary_logloss: 0.444151 [42] train's binary_logloss: 0.446656 val's binary_logloss: 0.443786 [43] train's binary_logloss: 0.445243 val's binary_logloss: 0.443472 [44] train's binary_logloss: 0.443697 val's binary_logloss: 0.443249 [45] train's binary_logloss: 0.442193 val's binary_logloss: 0.443062 [46] train's binary_logloss: 0.440697 val's binary_logloss: 0.442784 [47] train's binary_logloss: 0.439353 val's binary_logloss: 0.442828 [48] train's binary_logloss: 0.437963 val's binary_logloss: 0.442524 [49] train's binary_logloss: 0.436672 val's binary_logloss: 0.442423 [50] train's binary_logloss: 0.435301 val's binary_logloss: 0.442193 [51] train's binary_logloss: 0.433889 val's binary_logloss: 0.441743 [52] train's binary_logloss: 0.432548 val's binary_logloss: 0.441336 [53] train's binary_logloss: 0.431017 val's binary_logloss: 0.441085 [54] train's binary_logloss: 0.429677 val's binary_logloss: 0.440932 [55] train's binary_logloss: 0.428271 val's binary_logloss: 0.440676 [56] train's binary_logloss: 0.42698 val's binary_logloss: 0.44047 [57] train's binary_logloss: 0.425539 val's binary_logloss: 0.440397 [58] train's binary_logloss: 0.424123 val's binary_logloss: 0.440136 [59] train's binary_logloss: 0.422809 val's binary_logloss: 0.439885 [60] train's binary_logloss: 0.42146 val's binary_logloss: 0.439716 [61] train's binary_logloss: 0.420068 val's binary_logloss: 0.439454 [62] train's binary_logloss: 0.418745 val's binary_logloss: 0.439493 [63] train's binary_logloss: 0.41755 val's binary_logloss: 0.439334 [64] train's binary_logloss: 0.416342 val's binary_logloss: 0.439451 [65] train's binary_logloss: 0.415058 val's binary_logloss: 0.439173 [66] train's binary_logloss: 0.413658 val's binary_logloss: 0.43881 [67] train's binary_logloss: 0.4123 val's binary_logloss: 0.438656 [68] train's binary_logloss: 0.410981 val's binary_logloss: 0.438352 [69] train's binary_logloss: 0.409717 val's binary_logloss: 0.438017 [70] train's binary_logloss: 0.408442 val's binary_logloss: 0.437799 [71] train's binary_logloss: 0.407242 val's binary_logloss: 0.437543 [72] train's binary_logloss: 0.406051 val's binary_logloss: 0.437359 [73] train's binary_logloss: 0.404727 val's binary_logloss: 0.437274 [74] train's binary_logloss: 0.403419 val's binary_logloss: 0.437135 [75] train's binary_logloss: 0.402132 val's binary_logloss: 0.437162 [76] train's binary_logloss: 0.401026 val's binary_logloss: 0.437043 [77] train's binary_logloss: 0.399681 val's binary_logloss: 0.436681 [78] train's binary_logloss: 0.398398 val's binary_logloss: 0.436374 [79] train's binary_logloss: 0.397186 val's binary_logloss: 0.436306 [80] train's binary_logloss: 0.395998 val's binary_logloss: 0.436238 [81] train's binary_logloss: 0.394678 val's binary_logloss: 0.436146 [82] train's binary_logloss: 0.393508 val's binary_logloss: 0.436026 [83] train's binary_logloss: 0.392385 val's binary_logloss: 0.435897 [84] train's binary_logloss: 0.391328 val's binary_logloss: 0.435948 [85] train's binary_logloss: 0.390191 val's binary_logloss: 0.435803 [86] train's binary_logloss: 0.389034 val's binary_logloss: 0.435726 [87] train's binary_logloss: 0.388039 val's binary_logloss: 0.435854 [88] train's binary_logloss: 0.3868 val's binary_logloss: 0.435683 [89] train's binary_logloss: 0.38566 val's binary_logloss: 0.435685 [90] train's binary_logloss: 0.384527 val's binary_logloss: 0.435758 [91] train's binary_logloss: 0.383388 val's binary_logloss: 0.435756 [92] train's binary_logloss: 0.38231 val's binary_logloss: 0.435815 [93] train's binary_logloss: 0.381094 val's binary_logloss: 0.435682 [94] train's binary_logloss: 0.379807 val's binary_logloss: 0.435561 [95] train's binary_logloss: 0.37872 val's binary_logloss: 0.435383 [96] train's binary_logloss: 0.377682 val's binary_logloss: 0.435011 [97] train's binary_logloss: 0.376613 val's binary_logloss: 0.434783 [98] train's binary_logloss: 0.375463 val's binary_logloss: 0.434865 [99] train's binary_logloss: 0.374318 val's binary_logloss: 0.434926 [100] train's binary_logloss: 0.373331 val's binary_logloss: 0.435077 [101] train's binary_logloss: 0.372209 val's binary_logloss: 0.43519 [102] train's binary_logloss: 0.371232 val's binary_logloss: 0.434997 [103] train's binary_logloss: 0.370158 val's binary_logloss: 0.43491 [104] train's binary_logloss: 0.369108 val's binary_logloss: 0.434854 [105] train's binary_logloss: 0.368046 val's binary_logloss: 0.435009 [106] train's binary_logloss: 0.366972 val's binary_logloss: 0.434982 [107] train's binary_logloss: 0.365952 val's binary_logloss: 0.435174 [108] train's binary_logloss: 0.365084 val's binary_logloss: 0.435282 [109] train's binary_logloss: 0.364023 val's binary_logloss: 0.435066 [110] train's binary_logloss: 0.363132 val's binary_logloss: 0.43526 [111] train's binary_logloss: 0.362204 val's binary_logloss: 0.435537 [112] train's binary_logloss: 0.361217 val's binary_logloss: 0.435766 [113] train's binary_logloss: 0.360214 val's binary_logloss: 0.43583 [114] train's binary_logloss: 0.35936 val's binary_logloss: 0.435792 [115] train's binary_logloss: 0.358383 val's binary_logloss: 0.435522 [116] train's binary_logloss: 0.357288 val's binary_logloss: 0.435637 [117] train's binary_logloss: 0.356216 val's binary_logloss: 0.435376 [118] train's binary_logloss: 0.355096 val's binary_logloss: 0.435324 [119] train's binary_logloss: 0.354101 val's binary_logloss: 0.43536 [120] train's binary_logloss: 0.353266 val's binary_logloss: 0.435261 [121] train's binary_logloss: 0.352268 val's binary_logloss: 0.435374 [122] train's binary_logloss: 0.351148 val's binary_logloss: 0.435321 [123] train's binary_logloss: 0.350471 val's binary_logloss: 0.435319 [124] train's binary_logloss: 0.349503 val's binary_logloss: 0.435183 [125] train's binary_logloss: 0.348856 val's binary_logloss: 0.435206 [126] train's binary_logloss: 0.348053 val's binary_logloss: 0.435206 [127] train's binary_logloss: 0.347443 val's binary_logloss: 0.434997 [128] train's binary_logloss: 0.346512 val's binary_logloss: 0.434941 [129] train's binary_logloss: 0.345885 val's binary_logloss: 0.434922 [130] train's binary_logloss: 0.345112 val's binary_logloss: 0.43477 [131] train's binary_logloss: 0.344716 val's binary_logloss: 0.434628 [132] train's binary_logloss: 0.343697 val's binary_logloss: 0.434359 [133] train's binary_logloss: 0.342596 val's binary_logloss: 0.434238 [134] train's binary_logloss: 0.341666 val's binary_logloss: 0.434321 [135] train's binary_logloss: 0.340742 val's binary_logloss: 0.434473 [136] train's binary_logloss: 0.340167 val's binary_logloss: 0.43421 [137] train's binary_logloss: 0.339296 val's binary_logloss: 0.434303 [138] train's binary_logloss: 0.338391 val's binary_logloss: 0.434198 [139] train's binary_logloss: 0.337404 val's binary_logloss: 0.434061 [140] train's binary_logloss: 0.336569 val's binary_logloss: 0.434292 [141] train's binary_logloss: 0.335612 val's binary_logloss: 0.43427 [142] train's binary_logloss: 0.334555 val's binary_logloss: 0.434261 [143] train's binary_logloss: 0.333721 val's binary_logloss: 0.434249 [144] train's binary_logloss: 0.332994 val's binary_logloss: 0.434215 [145] train's binary_logloss: 0.332136 val's binary_logloss: 0.434258 [146] train's binary_logloss: 0.331524 val's binary_logloss: 0.434092 [147] train's binary_logloss: 0.330627 val's binary_logloss: 0.434013 [148] train's binary_logloss: 0.330059 val's binary_logloss: 0.434193 [149] train's binary_logloss: 0.329078 val's binary_logloss: 0.434141 [150] train's binary_logloss: 0.328427 val's binary_logloss: 0.434367 [151] train's binary_logloss: 0.327484 val's binary_logloss: 0.434264 [152] train's binary_logloss: 0.326809 val's binary_logloss: 0.434526 [153] train's binary_logloss: 0.326118 val's binary_logloss: 0.434624 [154] train's binary_logloss: 0.325297 val's binary_logloss: 0.434752 [155] train's binary_logloss: 0.324526 val's binary_logloss: 0.434965 [156] train's binary_logloss: 0.323737 val's binary_logloss: 0.435022 [157] train's binary_logloss: 0.323309 val's binary_logloss: 0.43498 [158] train's binary_logloss: 0.322412 val's binary_logloss: 0.435164 [159] train's binary_logloss: 0.321956 val's binary_logloss: 0.435101 [160] train's binary_logloss: 0.321577 val's binary_logloss: 0.435013 [161] train's binary_logloss: 0.321154 val's binary_logloss: 0.435011 [162] train's binary_logloss: 0.32053 val's binary_logloss: 0.435055 [163] train's binary_logloss: 0.320052 val's binary_logloss: 0.435117 [164] train's binary_logloss: 0.319495 val's binary_logloss: 0.435364 [165] train's binary_logloss: 0.318879 val's binary_logloss: 0.435365 [166] train's binary_logloss: 0.318222 val's binary_logloss: 0.435419 [167] train's binary_logloss: 0.317357 val's binary_logloss: 0.435392 [168] train's binary_logloss: 0.316691 val's binary_logloss: 0.435438 [169] train's binary_logloss: 0.316256 val's binary_logloss: 0.435394 [170] train's binary_logloss: 0.315882 val's binary_logloss: 0.435375 [171] train's binary_logloss: 0.315545 val's binary_logloss: 0.435288 [172] train's binary_logloss: 0.314797 val's binary_logloss: 0.435324 [173] train's binary_logloss: 0.313944 val's binary_logloss: 0.435237 [174] train's binary_logloss: 0.313389 val's binary_logloss: 0.435224 [175] train's binary_logloss: 0.313052 val's binary_logloss: 0.4353 [176] train's binary_logloss: 0.312292 val's binary_logloss: 0.435154 [177] train's binary_logloss: 0.311895 val's binary_logloss: 0.435003 [178] train's binary_logloss: 0.311528 val's binary_logloss: 0.434973 [179] train's binary_logloss: 0.310734 val's binary_logloss: 0.434731 [180] train's binary_logloss: 0.31011 val's binary_logloss: 0.434715 [181] train's binary_logloss: 0.309429 val's binary_logloss: 0.434621 [182] train's binary_logloss: 0.308658 val's binary_logloss: 0.434776 [183] train's binary_logloss: 0.308014 val's binary_logloss: 0.434838 [184] train's binary_logloss: 0.307468 val's binary_logloss: 0.434988 [185] train's binary_logloss: 0.307104 val's binary_logloss: 0.434971 [186] train's binary_logloss: 0.306441 val's binary_logloss: 0.434988 [187] train's binary_logloss: 0.305569 val's binary_logloss: 0.435141 [188] train's binary_logloss: 0.305271 val's binary_logloss: 0.435215 [189] train's binary_logloss: 0.304607 val's binary_logloss: 0.435181 [190] train's binary_logloss: 0.303981 val's binary_logloss: 0.435055 [191] train's binary_logloss: 0.303315 val's binary_logloss: 0.435251 [192] train's binary_logloss: 0.30274 val's binary_logloss: 0.435405 [193] train's binary_logloss: 0.301864 val's binary_logloss: 0.435372 [194] train's binary_logloss: 0.301484 val's binary_logloss: 0.435452 [195] train's binary_logloss: 0.301014 val's binary_logloss: 0.435727 [196] train's binary_logloss: 0.300083 val's binary_logloss: 0.435756 [197] train's binary_logloss: 0.299544 val's binary_logloss: 0.435986 [198] train's binary_logloss: 0.299242 val's binary_logloss: 0.436053 [199] train's binary_logloss: 0.298515 val's binary_logloss: 0.43649 [200] train's binary_logloss: 0.298193 val's binary_logloss: 0.436511 [201] train's binary_logloss: 0.297521 val's binary_logloss: 0.436551 [202] train's binary_logloss: 0.296716 val's binary_logloss: 0.436666 [203] train's binary_logloss: 0.295858 val's binary_logloss: 0.436609 [204] train's binary_logloss: 0.295111 val's binary_logloss: 0.43681 [205] train's binary_logloss: 0.294636 val's binary_logloss: 0.436886 [206] train's binary_logloss: 0.293799 val's binary_logloss: 0.437118 [207] train's binary_logloss: 0.293252 val's binary_logloss: 0.437264 [208] train's binary_logloss: 0.29258 val's binary_logloss: 0.437418 [209] train's binary_logloss: 0.292261 val's binary_logloss: 0.437434 [210] train's binary_logloss: 0.291606 val's binary_logloss: 0.437469 [211] train's binary_logloss: 0.291045 val's binary_logloss: 0.437558 [212] train's binary_logloss: 0.290564 val's binary_logloss: 0.43754 [213] train's binary_logloss: 0.290205 val's binary_logloss: 0.43755 [214] train's binary_logloss: 0.289993 val's binary_logloss: 0.437641 [215] train's binary_logloss: 0.289417 val's binary_logloss: 0.437635 [216] train's binary_logloss: 0.288719 val's binary_logloss: 0.43774 [217] train's binary_logloss: 0.288504 val's binary_logloss: 0.437777 [218] train's binary_logloss: 0.287934 val's binary_logloss: 0.43793 [219] train's binary_logloss: 0.287508 val's binary_logloss: 0.438196 [220] train's binary_logloss: 0.286953 val's binary_logloss: 0.438378 [221] train's binary_logloss: 0.286314 val's binary_logloss: 0.438319 [222] train's binary_logloss: 0.285475 val's binary_logloss: 0.438295 [223] train's binary_logloss: 0.284867 val's binary_logloss: 0.43833 [224] train's binary_logloss: 0.284166 val's binary_logloss: 0.438426 [225] train's binary_logloss: 0.28362 val's binary_logloss: 0.438208 [226] train's binary_logloss: 0.282989 val's binary_logloss: 0.43842 [227] train's binary_logloss: 0.282301 val's binary_logloss: 0.438607 [228] train's binary_logloss: 0.281832 val's binary_logloss: 0.438457 [229] train's binary_logloss: 0.28132 val's binary_logloss: 0.438614 [230] train's binary_logloss: 0.28054 val's binary_logloss: 0.438824 [231] train's binary_logloss: 0.280026 val's binary_logloss: 0.438863 [232] train's binary_logloss: 0.27956 val's binary_logloss: 0.438812 [233] train's binary_logloss: 0.279157 val's binary_logloss: 0.438906 [234] train's binary_logloss: 0.27876 val's binary_logloss: 0.439077 [235] train's binary_logloss: 0.278232 val's binary_logloss: 0.439069 [236] train's binary_logloss: 0.277598 val's binary_logloss: 0.439115 [237] train's binary_logloss: 0.277383 val's binary_logloss: 0.439168 [238] train's binary_logloss: 0.276814 val's binary_logloss: 0.439099 [239] train's binary_logloss: 0.276113 val's binary_logloss: 0.439041 [240] train's binary_logloss: 0.275878 val's binary_logloss: 0.439028 [241] train's binary_logloss: 0.275332 val's binary_logloss: 0.438959 [242] train's binary_logloss: 0.275045 val's binary_logloss: 0.438895 [243] train's binary_logloss: 0.274489 val's binary_logloss: 0.438907 [244] train's binary_logloss: 0.274296 val's binary_logloss: 0.438981 [245] train's binary_logloss: 0.273753 val's binary_logloss: 0.439018 [246] train's binary_logloss: 0.273214 val's binary_logloss: 0.439103 [247] train's binary_logloss: 0.272726 val's binary_logloss: 0.439223
gbdt_feats_train = model.predict(train, pred_leaf = True)
gbdt_feats_test = model.predict(test, pred_leaf = True)
print('gbdt_feats_train shape:', gbdt_feats_train.shape)
gbdt_feats_train[0]
gbdt_feats_train shape: (1599, 147)
array([12, 12, 30, 6, 5, 5, 14, 13, 5, 13, 21, 10, 8, 12, 11, 13, 16, 1, 13, 8, 8, 19, 25, 31, 40, 3, 14, 8, 8, 21, 7, 8, 13, 16, 8, 7, 7, 25, 12, 1, 3, 24, 10, 17, 13, 24, 3, 27, 3, 3, 20, 17, 27, 14, 34, 22, 3, 1, 3, 26, 9, 7, 17, 2, 9, 30, 21, 25, 4, 23, 31, 11, 3, 26, 38, 19, 3, 6, 20, 5, 4, 31, 26, 14, 0, 7, 21, 6, 19, 26, 7, 2, 9, 6, 8, 1, 5, 20, 4, 1, 9, 2, 13, 28, 34, 6, 10, 21, 21, 16, 5, 25, 2, 28, 23, 4, 4, 19, 4, 0, 16, 4, 15, 2, 4, 16, 8, 35, 4, 0, 6, 15, 33, 17, 22, 10, 25, 15, 16, 6, 3, 40, 17, 10, 16, 11, 16])
有多少颗树,就有多少个特征
gbdt_feats_name = ['gbdt_leaf_' + str(i) for i in range(gbdt_feats_train.shape[1])]
gbdt_feats_name[:3]
['gbdt_leaf_0', 'gbdt_leaf_1', 'gbdt_leaf_2']
df_train_gbdt_feats = pd.DataFrame(gbdt_feats_train, columns = gbdt_feats_name)
df_test_gbdt_feats = pd.DataFrame(gbdt_feats_test, columns = gbdt_feats_name)
df_train_gbdt_feats[:2]
gbdt_leaf_0 | gbdt_leaf_1 | gbdt_leaf_2 | gbdt_leaf_3 | gbdt_leaf_4 | gbdt_leaf_5 | gbdt_leaf_6 | gbdt_leaf_7 | gbdt_leaf_8 | gbdt_leaf_9 | ... | gbdt_leaf_137 | gbdt_leaf_138 | gbdt_leaf_139 | gbdt_leaf_140 | gbdt_leaf_141 | gbdt_leaf_142 | gbdt_leaf_143 | gbdt_leaf_144 | gbdt_leaf_145 | gbdt_leaf_146 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 12 | 12 | 30 | 6 | 5 | 5 | 14 | 13 | 5 | 13 | ... | 15 | 16 | 6 | 3 | 40 | 17 | 10 | 16 | 11 | 16 |
1 | 12 | 7 | 29 | 19 | 9 | 1 | 14 | 1 | 15 | 1 | ... | 14 | 21 | 27 | 0 | 28 | 26 | 0 | 33 | 0 | 26 |
2 rows × 147 columns
train = pd.concat([train, df_train_gbdt_feats], axis = 1)
test = pd.concat([test, df_test_gbdt_feats], axis = 1)
train_len = train.shape[0]
data_new = pd.concat([train, test])
df_gbdt_lr = data_scaler(data_new.copy())
df_gbdt_lr = data_encoder(df_gbdt_lr, gbdt_feats_name)
train = df_gbdt_lr[: train_len]
test = df_gbdt_lr[train_len:]
x_train, x_val, y_train, y_val = split_train(train, target, test_size = 0.3, random_state = 2018)
(1119, 20046) (1119,) (480, 20046) (480,)
lr = LogisticRegression()
lr.fit(x_train, y_train)
train_logloss = log_loss(y_train, lr.predict_proba(x_train)[:, 1])
print('train_logloss: ', train_logloss)
val_logloss = log_loss(y_val, lr.predict_proba(x_val)[:, 1])
print('val_logloss: ', val_logloss)
train_logloss: 0.012043875601048207 val_logloss: 0.3052770537230522