import xgboost as xgb
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(
pca2_results_train,
y_train, test_size=0.2)
d_train = xgb.DMatrix(x_train, label=y_train)
d_valid = xgb.DMatrix(x_valid, label=y_valid)
d_test = xgb.DMatrix(x_test)
d_test = xgb.DMatrix(pca2_results_test)
params = {}
params['objective'] = 'reg:linear'
params['eta'] = 0.02
params['max_depth'] = 4
def xgb_r2_score(preds, dtrain):
labels = dtrain.get_label()
return 'r2', r2_score(labels, preds)
watchlist = [(d_train, 'train'), (d_valid, 'valid')]
clf = xgb.train(params, d_train,
1000, watchlist, early_stopping_rounds=50,
feval=xgb_r2_score, maximize=True, verbose_eval=10)
I have been using this but an error is poping up as below:-
Found input variables with inconsistent numbers of samples: [4209, 3367]
Please help