[jvm-packages] xgboost4j-spark 0.82: customized objective function -No constructor for type EvalTrait


#1

Similar to the example code in #3468, attached please find my testing code:

package com.isf.menasor.trainers.objectives

import breeze.numerics.abs
import ml.dmlc.xgboost4j.java.XGBoostError
import ml.dmlc.xgboost4j.scala.{DMatrix, EvalTrait}
import org.apache.commons.logging.{Log, LogFactory}

class wMapeEval extends EvalTrait{
  private[objectives] var evalMetric: String = "wMAPE"

  private val logger: Log = LogFactory.getLog(classOf[wMapeEval])

  /**
    * get evaluate metric
    *
    * @return evalMetric
    */
  override def getMetric: String = evalMetric

  /**
    * evaluate with predicts and data
    *
    * @param predicts predictions as array
    * @param dmat     data matrix to evaluate
    * @return result of the metric
    */
  override def eval(predicts: Array[Array[Float]], dmat: DMatrix): Float = {
    var error: Float = 0f
    var labels: Array[Float] = null
    var sumLabel: Float = 0f
    try {
      labels = dmat.getLabel
    } catch {
      case ex: XGBoostError =>
        logger.error(ex)
        return -1f
    }
    val nrow: Int = predicts.length
    for (i <- 0 until nrow) {
      error += abs(labels(i)-predicts(i)(0))
      sumLabel += labels(i)
      //logger.info("---------calculate wMAPE finished!---------------")
    }
    error / sumLabel
  }
}

And I depoyed the custom Obj via:

val xgb = estimator.asInstanceOf[XGBoostRegressor]
new ParamMap()
.put(xgb.objective, props.getProperty(“objective”))
.put(xgb.evalMetric, props.getProperty(“evalMetric”))
.put(xgb.eta, props.getProperty(“eta”).toDouble)
.put(xgb.maxDepth, props.getProperty(“maxDepth”).toInt)
.put(xgb.subsample, props.getProperty(“subsample”).toDouble)
.put(xgb.colsampleBytree, props.getProperty(“colsampleBytree”).toDouble)
.put(xgb.colsampleBylevel, props.getProperty(“colsampleBylevel”).toDouble)
.put(xgb.lambda, props.getProperty(“lambda”).toDouble)
.put(xgb.alpha, props.getProperty(“alpha”).toDouble)
.put(xgb.numRound, props.getProperty(“numRound”).toInt)
.put(xgb.numWorkers, props.getProperty(“numWorkers”).toInt)
.put(xgb.nthread, props.getProperty(“nthread”).toInt)
.put(xgb.useExternalMemory, props.getProperty(“useExternalMemory”).toBoolean)
.put(xgb.timeoutRequestWorkers, props.getProperty(“timeoutRequestWorkers”).toLong)
.put(xgb.customObj,
try {
Class.forName(props.getProperty(“customObj”)).newInstance().asInstanceOf[ObjectiveTrait]
}
catch {
case _: Throwable => null
}
)
.put(xgb.seed, props.getProperty(“seed”).toLong)
.put(xgb.numEarlyStoppingRounds, props.getProperty(“numEarlyStoppingRounds”).toInt)
.put(xgb.maximizeEvaluationMetrics, props.getProperty(“maximizeEvaluationMetrics”).toBoolean)
.put(xgb.checkpointPath, props.getProperty(“checkpointPath”))
.put(xgb.checkpointInterval, props.getProperty(“checkpointInterval”).toInt)
.put(xgb.treeMethod, props.getProperty(“treeMethod”))
.put(xgb.growPolicy,props.getProperty(“growPolicy”))
.put(xgb.customEval,
try {
Class.forName(props.getProperty(“customEval”)).newInstance().asInstanceOf[EvalTrait]
}
catch {
case _: Throwable => null
})

This run successfully with model-training, I saved the model succesffuly, but went wrong when I tried to load the model to do predcition:

Exception in thread “main” org.json4s.package$MappingException: No constructor for type EvalTrait, JObject(List())
at org.json4s.Extraction$ClassInstanceBuilder.org$json4s$Extraction$ClassInstanceBuilder$$constructor(Extraction.scala:417)
at org.json4s.Extraction$ClassInstanceBuilder.org$json4s$Extraction$ClassInstanceBuilder$$instantiate(Extraction.scala:468)
at org.json4s.Extraction$ClassInstanceBuilder$$anonfun$result$6.apply(Extraction.scala:515)
at org.json4s.Extraction$ClassInstanceBuilder$$anonfun$result$6.apply(Extraction.scala:512)
at org.json4s.Extraction$.org$json4s$Extraction$$customOrElse(Extraction.scala:524)
at org.json4s.Extraction$ClassInstanceBuilder.result(Extraction.scala:512)
at org.json4s.Extraction$.extract(Extraction.scala:351)
at org.json4s.Extraction$.extract(Extraction.scala:42)
at org.json4s.ExtractableJsonAstNode.extract(ExtractableJsonAstNode.scala:21)
at ml.dmlc.xgboost4j.scala.spark.params.CustomEvalParam.jsonDecode(CustomParams.scala:43)
at ml.dmlc.xgboost4j.scala.spark.params.CustomEvalParam.jsonDecode(CustomParams.scala:27)
at ml.dmlc.xgboost4j.scala.spark.params.DefaultXGBoostParamsReader$$anonfun$getAndSetParams$1.apply(DefaultXGBoostParamsReader.scala:117)
at ml.dmlc.xgboost4j.scala.spark.params.DefaultXGBoostParamsReader$$anonfun$getAndSetParams$1.apply(DefaultXGBoostParamsReader.scala:115)
at scala.collection.immutable.List.foreach(List.scala:381)
at ml.dmlc.xgboost4j.scala.spark.params.DefaultXGBoostParamsReader$.getAndSetParams(DefaultXGBoostParamsReader.scala:115)
at ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel$XGBoostRegressionModelReader.load(XGBoostRegressor.scala:447)
at ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel$XGBoostRegressionModelReader.load(XGBoostRegressor.scala:431)
at org.apache.spark.ml.util.MLReadable$class.load(ReadWrite.scala:223)
at ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel$.load(XGBoostRegressor.scala:412)
at com.isf.menasor.predictors.Predictor.predict(Predictor.scala:41)
at com.isf.menasor.Menasor$.main(Menasor.scala:38)
at com.isf.menasor.Menasor.main(Menasor.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:891)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:200)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:230)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:139)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
19/04/28 16:55:01 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.198.115.139:37408) with ID 9
19/04/28 16:55:01 INFO SparkContext: Invoking stop() from shutdown hook
19/04/28 16:55:01 INFO ExecutorAllocationManager: New executor 9 has registered (new total is 8)
19/04/28 16:55:01 INFO AbstractConnector: Stopped Spark@131581fb{HTTP/1.1,[http/1.1]}{0.0.0.0:0}
19/04/28 16:55:01 INFO SparkUI: Stopped Spark web UI at http://10.198.47.40:21137
19/04/28 16:55:01 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.198.123.136:23570) with ID 2
19/04/28 16:55:01 INFO BlockManagerMasterEndpoint: Registering block manager BJHTYD-Tyrande-115-139.hadoop.jd.local:11142 with 2.8 GB RAM, BlockManagerId(9, BJHTYD-Tyrande-115-139.hadoop.jd.local, 11142, None)

I checked the metadata file and not sure why it went wrong, but something error with jsonEncoder I suppose,
Saved model metadata:

{“class”:“ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel”,“timestamp”:1556435430464,“sparkVersion”:“2.3.0.online-JD2.3.0.10-201904091836”,“uid”:“xgbr_e2184741f54d”,“paramMap”:{“checkpointInterval”:5,“nthread”:4,“maximizeEvaluationMetrics”:false,“colsampleBytree”:0.8,“numWorkers”:320,“checkpointPath”:“personal/isf/menasor/checkout/xgb/brand_cate2_city-samples_v6/lag30/order_date-20180430-20180515/mape”,“subsample”:0.8,“silent”:0,“seed”:12345,“gamma”:0.0,“skipDrop”:0.0,“growPolicy”:“depthwise”,“sketchEps”:0.03,“verbosity”:1,“eta”:0.1,“minChildWeight”:1.0,“customEval”:{},“rateDrop”:0.0,“sampleType”:“uniform”,“lambda”:1.0,“maxDepth”:6,“normalizeType”:“tree”,“maxDeltaStep”:0.0,“lambdaBias”:0.0,“colsampleBylevel”:1.0,“treeLimit”:0,“maxBin”:16,“featuresCol”:“features”,“labelCol”:“total_label”,“missing”:“NaN”,“evalMetric”:“mae”,“trainTestRatio”:1.0,“scalePosWeight”:1.0,“numRound”:50,“treeMethod”:“approx”,“alpha”:1.0,“timeoutRequestWorkers”:60000,“useExternalMemory”:false,“predictionCol”:“prediction”,“objective”:“reg:linear”,“customObj”:{},“baseScore”:0.5,“objectiveType”:“regression”,“numEarlyStoppingRounds”:0,“trackerConf”:{“workerConnectionTimeout”:0,“trackerImpl”:“python”}}}

Anyone can help on this? Thanks!