Before anything, I want to thank the authors and maintainers for their work with xgboost. I’ve used it via R (CPU only) many times and really appreciate it.
Owning an RTX 3080, I want to take advantage of the GPU. I followed all the directions at the install page, and I have a working GPU-version of keras/tensorflow in R, so I know I have the proper files.
I invoke the configuration as: cmake .. -G"Visual Studio 16 2019" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DR_VERSION=4.0.0 -DGPU_COMPUTE_VER=86 -DLIBR_HOME="C:\R\RCurrent\R-devel".
I don’t notice anything out of the ordinary, except perhaps for not having backtrace, but I saw no mention of that in the docs. I then invoke the build as: cmake --build . --target install --config Release
A slew of compilation messages ensues, but always seems to stop at
C:\Users\Parents\AppData\Local\Temp\tmpxft_00005ba0_00000000-7_updater_gpu_hist.cudafe1.stub.c(150): note: see refere
nce to function template instantiation 'void xgboost::tree::RowPartitioner::UpdatePosition<__nv_dl_wrapper_t<__nv_dl_
tag<void (__cdecl xgboost::tree::GPUHistMakerDevice<GradientSumT>::* )(int,xgboost::RegTree *),void xgboost::tree::GP
UHistMakerDevice<GradientSumT>::UpdatePosition(int,xgboost::RegTree *),1>,xgboost::EllpackDeviceAccessor,xgboost::Reg
Tree::Node,xgboost::FeatureType,xgboost::common::Span<unsigned int,18446744073709551615>>>(xgboost::bst_node_t,xgboos
t::bst_node_t,xgboost::bst_node_t,UpdatePositionOpT)' being compiled
with
[
GradientSumT=xgboost::detail::GradientPairInternal<float>,
UpdatePositionOpT=__nv_dl_wrapper_t<__nv_dl_tag<void (__cdecl xgboost::tree::GPUHistMakerDevice<xgboost::
detail::GradientPairInternal<float>>::* )(int,xgboost::RegTree *),void xgboost::tree::GPUHistMakerDevice<xgboost::det
ail::GradientPairInternal<float>>::UpdatePosition(int,xgboost::RegTree *),1>,xgboost::EllpackDeviceAccessor,xgboost::
RegTree::Node,xgboost::FeatureType,xgboost::common::Span<unsigned int,18446744073709551615>>
]
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/include\thrust/system/cuda/detail/reduce.h(945): warning C4244
: 'initializing': conversion from 'Size' to 'thrust::detail::int32_t', possible loss of data [D:\xgboostgpu\build\src\o
bjxgboost.vcxproj]
with
[
Size=size_type
]
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/include\thrust/system/cuda/detail/reduce.h(1021): note: see
reference to function template instantiation 'T thrust::cuda_cub::detail::reduce_n_impl<Derived,InputIt,Size,T,Binary
Op>(thrust::cuda_cub::execution_policy<Derived> &,InputIt,Size,T,BinaryOp)' being compiled
with
[
T=xgboost::GradientPair,
Derived=thrust::detail::execute_with_allocator<dh::detail::XGBCachingDeviceAllocatorImpl<char> &,thrust::
cuda_cub::execute_on_stream_base>,
InputIt=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
Size=size_type,
BinaryOp=thrust::plus<xgboost::detail::GradientPairInternal<float>>
]
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/include\thrust/system/cuda/detail/reduce.h(1041): note: see
reference to function template instantiation 'T thrust::cuda_cub::reduce_n<Derived,InputIt,size_type,T,BinaryOp>(thru
st::cuda_cub::execution_policy<Derived> &,InputIt,Size,T,BinaryOp)' being compiled
with
[
T=xgboost::GradientPair,
Derived=thrust::detail::execute_with_allocator<dh::detail::XGBCachingDeviceAllocatorImpl<char> &,thrust::
cuda_cub::execute_on_stream_base>,
InputIt=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
BinaryOp=thrust::plus<xgboost::detail::GradientPairInternal<float>>,
Size=size_type
]
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/include\thrust/detail/reduce.inl(71): note: see reference to
function template instantiation 'T thrust::cuda_cub::reduce<Derived,InputIterator,T,BinaryFunction>(thrust::cuda_cub
::execution_policy<Derived> &,InputIt,InputIt,T,BinaryOp)' being compiled
with
[
T=xgboost::GradientPair,
Derived=thrust::detail::execute_with_allocator<dh::detail::XGBCachingDeviceAllocatorImpl<char> &,thrust::
cuda_cub::execute_on_stream_base>,
InputIterator=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
BinaryFunction=thrust::plus<xgboost::detail::GradientPairInternal<float>>,
InputIt=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
BinaryOp=thrust::plus<xgboost::detail::GradientPairInternal<float>>
]
D:\xgboostgpu\src\tree\../common/device_helpers.cuh(1268): note: see reference to function template instantiation 'T
thrust::reduce<DerivedPolicy,Derived,Init,Func>(const thrust::detail::execution_policy_base<DerivedPolicy> &,InputIte
rator,InputIterator,T,BinaryFunction)' being compiled
with
[
T=xgboost::GradientPair,
DerivedPolicy=thrust::detail::execute_with_allocator<dh::detail::XGBCachingDeviceAllocatorImpl<char> &,th
rust::cuda_cub::execute_on_stream_base>,
Derived=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
Init=xgboost::GradientPair,
Func=thrust::plus<xgboost::detail::GradientPairInternal<float>>,
InputIterator=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
BinaryFunction=thrust::plus<xgboost::detail::GradientPairInternal<float>>
]
D:/xgboostgpu/src/tree/updater_gpu_hist.cu(652): note: see reference to function template instantiation 'Ty dh::Reduc
e<thrust::detail::execute_with_allocator<Allocator,ExecutionPolicyCRTPBase>,thrust::device_ptr<const T>,xgboost::Grad
ientPair,thrust::plus<T>>(Policy,InputIt,InputIt,Init,Func)' being compiled
with
[
Allocator=dh::detail::XGBCachingDeviceAllocatorImpl<char> &,
ExecutionPolicyCRTPBase=thrust::cuda_cub::execute_on_stream_base,
T=xgboost::detail::GradientPairInternal<float>,
Policy=thrust::detail::execute_with_allocator<dh::detail::XGBCachingDeviceAllocatorImpl<char> &,thrust::c
uda_cub::execute_on_stream_base>,
InputIt=thrust::device_ptr<const xgboost::detail::GradientPairInternal<float>>,
Init=xgboost::GradientPair,
Func=thrust::plus<xgboost::detail::GradientPairInternal<float>>
]
D:/xgboostgpu/src/tree/updater_gpu_hist.cu(649): note: while compiling class template member function 'xgboost::tree:
:ExpandEntry xgboost::tree::GPUHistMakerDevice<GradientSumT>::InitRoot(xgboost::RegTree *,dh::AllReducer *)'
with
[
GradientSumT=xgboost::detail::GradientPairInternal<double>
]
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2/include\thrust/system/cuda/detail/reduce.h(973): warning C4244
: 'initializing': conversion from 'Size' to 'thrust::detail::int32_t', possible loss of data [D:\xgboostgpu\build\src\o
bjxgboost.vcxproj]
with
[
Size=size_type
]
D:\xgboostgpu\src\common\threading_utils.h(139): warning C4267: 'argument': conversion from 'size_t' to 'int', possible
loss of data [D:\xgboostgpu\build\src\objxgboost.vcxproj]
D:\xgboostgpu\src\common\threading_utils.h(148): note: see reference to function template instantiation 'void xgboost
::common::ParallelFor<Index,Func>(Index,size_t,Func)' being compiled
with
[
Index=xgboost::omp_ulong,
Func=xgboost::common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl
xgboost::tree::TreeEvaluator::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<
true>(xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsign
ed __int64,xgboost::common::Span<float,18446744073709551615>,xgboost::common::Span<float,18446744073709551615>,xgboos
t::common::Span<int,18446744073709551615>),int,int,int,unsigned int,float,float>>::LaunchCPU::<lambda_8eeb19893970615
efc98e2c89f9a4d91>
]
D:\xgboostgpu\src\tree\../common/transform.h(175): note: see reference to function template instantiation 'void xgboo
st::common::ParallelFor<xgboost::omp_ulong,xgboost::common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false
,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEvaluator::* )(int,int,int,unsigned int,float,float),void xgboost::tree
::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,fl
oat,float),1>,void (unsigned __int64,xgboost::common::Span<T,18446744073709551615>,xgboost::common::Span<T,1844674407
3709551615>,xgboost::common::Span<int,18446744073709551615>),int,int,int,unsigned int,float,float>>::LaunchCPU::<lamb
da_8eeb19893970615efc98e2c89f9a4d91>>(Index,Func)' being compiled
with
[
T=float,
Index=xgboost::omp_ulong,
Func=xgboost::common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl
xgboost::tree::TreeEvaluator::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<
true>(xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsign
ed __int64,xgboost::common::Span<float,18446744073709551615>,xgboost::common::Span<float,18446744073709551615>,xgboos
t::common::Span<int,18446744073709551615>),int,int,int,unsigned int,float,float>>::LaunchCPU::<lambda_8eeb19893970615
efc98e2c89f9a4d91>
]
D:\xgboostgpu\src\tree\../common/transform.h(82): note: see reference to function template instantiation 'void xgboos
t::common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEva
luator::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node
_t,xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::com
mon::Span<T,18446744073709551615>,xgboost::common::Span<T,18446744073709551615>,xgboost::common::Span<int,18446744073
709551615>),int,int,int,unsigned int,float,float>>::LaunchCPU<xgboost::HostDeviceVector<T>,xgboost::HostDeviceVector<
T>,xgboost::HostDeviceVector<int>>(Functor,xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector<T> *,xgboost::Hos
tDeviceVector<int> *) const' being compiled
with
[
T=float,
Functor=__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEvaluator::* )(int,in
t,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,xgboost::bst_no
de_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::common::Span<float,18
446744073709551615>,xgboost::common::Span<float,18446744073709551615>,xgboost::common::Span<int,18446744073709551615>
),int,int,int,unsigned int,float,float>
]
D:\xgboostgpu\src\tree\../common/transform.h(82): note: see reference to function template instantiation 'void xgboos
t::common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEva
luator::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node
_t,xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::com
mon::Span<T,18446744073709551615>,xgboost::common::Span<T,18446744073709551615>,xgboost::common::Span<int,18446744073
709551615>),int,int,int,unsigned int,float,float>>::LaunchCPU<xgboost::HostDeviceVector<T>,xgboost::HostDeviceVector<
T>,xgboost::HostDeviceVector<int>>(Functor,xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector<T> *,xgboost::Hos
tDeviceVector<int> *) const' being compiled
with
[
T=float,
Functor=__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEvaluator::* )(int,in
t,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,xgboost::bst_no
de_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::common::Span<float,18
446744073709551615>,xgboost::common::Span<float,18446744073709551615>,xgboost::common::Span<int,18446744073709551615>
),int,int,int,unsigned int,float,float>
]
D:\xgboostgpu\src\tree\split_evaluator.h(172): note: see reference to function template instantiation 'void xgboost::
common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEvalua
tor::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,
xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::common
::Span<T,18446744073709551615>,xgboost::common::Span<T,18446744073709551615>,xgboost::common::Span<int,18446744073709
551615>),int,int,int,unsigned int,float,float>>::Eval<xgboost::HostDeviceVector<T>*,xgboost::HostDeviceVector<T>*,xgb
oost::HostDeviceVector<int>*>(xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector
<int> *) const' being compiled
with
[
T=float
]
D:\xgboostgpu\src\tree\split_evaluator.h(172): note: see reference to function template instantiation 'void xgboost::
common::Transform<true>::Evaluator<__nv_hdl_wrapper_t<false,false,__nv_dl_tag<void (__cdecl xgboost::tree::TreeEvalua
tor::* )(int,int,int,unsigned int,float,float),void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,
xgboost::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float),1>,void (unsigned __int64,xgboost::common
::Span<T,18446744073709551615>,xgboost::common::Span<T,18446744073709551615>,xgboost::common::Span<int,18446744073709
551615>),int,int,int,unsigned int,float,float>>::Eval<xgboost::HostDeviceVector<T>*,xgboost::HostDeviceVector<T>*,xgb
oost::HostDeviceVector<int>*>(xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector<T> *,xgboost::HostDeviceVector
<int> *) const' being compiled
with
[
T=float
]
C:\Users\Parents\AppData\Local\Temp\tmpxft_00005ba0_00000000-7_updater_gpu_hist.cudafe1.stub.c(148): note: see refere
nce to function template instantiation 'void xgboost::tree::TreeEvaluator::AddSplit<true>(xgboost::bst_node_t,xgboost
::bst_node_t,xgboost::bst_node_t,xgboost::bst_feature_t,float,float)' being compiled
D:\xgboostgpu\cub\cub\agent\../grid/grid_even_share.cuh(133): warning C4244: '=': conversion from 'OffsetT' to 'int', p
ossible loss of data [D:\xgboostgpu\build\src\objxgboost.vcxproj]
with
[
OffsetT=__int64
]
D:\xgboostgpu\cub\cub\agent\../grid/grid_even_share.cuh(128): note: while compiling class template member function 'v
oid cub::GridEvenShare<__int64>::DispatchInit(OffsetT,int,int)'
with
[
OffsetT=__int64
]
D:\xgboostgpu\cub\cub\device\dispatch/dispatch_reduce.cuh(481): note: see reference to function template instantiatio
n 'void cub::GridEvenShare<__int64>::DispatchInit(OffsetT,int,int)' being compiled
with
[
OffsetT=__int64
]
C:\Users\Parents\AppData\Local\Temp\tmpxft_00005ba0_00000000-7_updater_gpu_hist.cudafe1.stub.c(980): note: see refere
nce to class template instantiation 'cub::GridEvenShare<__int64>' being compiled
D:\xgboostgpu\cub\cub\agent\../grid/grid_even_share.cuh(135): warning C4244: '=': conversion from 'OffsetT' to 'int', p
ossible loss of data [D:\xgboostgpu\build\src\objxgboost.vcxproj]
with
[
OffsetT=__int64
]
After which I cannot find a lib folder or an R package. What can I do to fix this? Is there more information that would help? Thank you.