Segfault during code cleanup


#1

Getting a segfault for v0.90

My Environment :

 * frame #0: 0x00007f50581618cc libc.so.6`__GI___libc_free + 28
    frame #1: 0x00007f5058e88879 libxgboost.so`dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() [inlined] std::__1::__libcpp_deallocate(__ptr=<unavailable>) + 57
    frame #2: 0x00007f5058e88874 libxgboost.so`dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() [inlined] std::__1::allocator<char>::deallocate(__p=<unavailable>)
    frame #3: 0x00007f5058e88874 libxgboost.so`dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() [inlined] std::__1::allocator_traits<std::__1::allocator<char> >::deallocate(__p=<unavailable>)
    frame #4: 0x00007f5058e88874 libxgboost.so`dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() [inlined] std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::~basic_string(this=0x000000000114c948) + 4
    frame #5: 0x00007f5058e88870 libxgboost.so`dmlc::parameter::FieldAccessEntry::~FieldAccessEntry(this=0x000000000114c900) + 48
    frame #6: 0x00007f5057a348ef libdmlc.so`dmlc::parameter::FieldEntry<std::string>::~FieldEntry() [inlined] dmlc::parameter::FieldEntryBase<dmlc::parameter::FieldEntry<std::string>, std::string>::~FieldEntryBase(this=0x000000000114c900) + 47
    frame #7: 0x00007f5057a348c8 libdmlc.so`dmlc::parameter::FieldEntry<std::string>::~FieldEntry(this=0x000000000114c900) + 8
    frame #8: 0x00007f5057a33f5f libdmlc.so`dmlc::parameter::ParamManager::~ParamManager(this=0x00007f5057cc78b0) + 47
    frame #9: 0x00007f505811bd2d libc.so.6`__cxa_finalize + 157
    frame #10: 0x00007f5057a2a3f3 libdmlc.so`__do_global_dtors_aux + 35
    frame #11: 0x00007f505a318fbc ld-linux-x86-64.so.2`_dl_fini + 412
    frame #12: 0x00007f505811b992 libc.so.6`__GI_exit + 226
    frame #13: 0x00007f5058104d24 libc.so.6`__libc_start_main + 260

CentOs release 6.9
clang version 6.0.0 (tags/RELEASE_600/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix

Seeing this issue during the code clean up


#2

Can you put up a script that I can try running and reproduce the issue?


#3

@hcho3: Thanks for the response, I don’t have any script and not sure how easy it is to reproduce without exposing the model, which is internal and I cannot do much here.

If it helps, this is happening during the runtime, and not at model training time, This code works fine on Mac, but on linux this fails.


#4

Here is some additional information that might help

(gdb) bt full
#0 0x00007fb08ebbc8cc in free () from /lib64/libc.so.6
No symbol table info available.
#1 0x00007fb08f9210c9 in dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() () at <Path/To/MyDirectory>//clang/RELEASE/6.0.0/bin/…/include/c++/v1/new:236
No locals.
#2 0x00007fb08e48cf2f in dmlc::parameter::FieldEntry<std::basic_string<char, std::char_traits, std::allocator > >::~FieldEntry() () at <Path/To/MyDirectory>/xgboost/dmlc-core/include/dmlc/parameter.h:618
std::piecewise_construct = {}
dmlc::make_ParserFactoryReg_uint32_t_real_t_libsvm = @0x1828f80
dmlc::make_ParserFactoryReg_uint32_t_real_t_libfm = @0x1829120
dmlc::make_ParserFactoryReg_uint64_t_int32_t_csv = @0x1829570
dmlc::make_ParserFactoryReg_uint64_t_int64_t_csv = @0x18296f0
dmlc::make_ParserFactoryReg_uint64_t_real_t_csv = @0x18293b0
dmlc::make_ParserFactoryReg_uint32_t_real_t_csv = @0x18292a0
dmlc::make_ParserFactoryReg_uint32_t_int32_t_csv = @0x18294c0
dmlc::make_ParserFactoryReg_uint32_t_int64_t_csv = @0x1829620
dmlc::make_ParserFactoryReg_uint64_t_real_t_libfm = @0x18291f0
std::__ioinit = {static _S_refcount = , static _S_synced_with_stdio = }
dmlc::make_ParserFactoryReg_uint64_t_real_t_libsvm = @0x1829030
#3 0x00007fb08e48c59f in dmlc::parameter::ParamManager::~ParamManager() () at <Path/To/MyDirectory>/xgboost/dmlc-core/include/dmlc/parameter.h:407
std::piecewise_construct = {}
dmlc::make_ParserFactoryReg_uint32_t_real_t_libsvm = @0x1828f80
dmlc::make_ParserFactoryReg_uint32_t_real_t_libfm = @0x1829120
dmlc::make_ParserFactoryReg_uint64_t_int32_t_csv = @0x1829570
dmlc::make_ParserFactoryReg_uint64_t_int64_t_csv = @0x18296f0
dmlc::make_ParserFactoryReg_uint64_t_real_t_csv = @0x18293b0
dmlc::make_ParserFactoryReg_uint32_t_real_t_csv = @0x18292a0
dmlc::make_ParserFactoryReg_uint32_t_int32_t_csv = @0x18294c0
dmlc::make_ParserFactoryReg_uint32_t_int64_t_csv = @0x1829620
dmlc::make_ParserFactoryReg_uint64_t_real_t_libfm = @0x18291f0
std::__ioinit = {static _S_refcount = , static _S_synced_with_stdio = }
dmlc::make_ParserFactoryReg_uint64_t_real_t_libsvm = @0x1829030
#4 0x00007fb08eb76d2d in __cxa_finalize () from /lib64/libc.so.6
No symbol table info available.
#5 0x00007fb08e4829e3 in __do_global_dtors_aux () from <Path/To/MyDirectory>/build/install/lib64/…/lib64/libdmlc.so
std::__ioinit = {static _S_refcount = , static _S_synced_with_stdio = }
#6 0x00007ffd1c836a90 in ?? ()
No symbol table info available.
#7 0x00007fb090e02fbc in _dl_fini () from /lib64/ld-linux-x86-64.so.2
No symbol table info available.
Backtrace stopped: frame did not save the PC


#5

(gdb) frame 2
#2 0x00007fb08e48cf2f in dmlc::parameter::FieldEntry<std::basic_string<char, std::char_traits, std::allocator > >::~FieldEntry() () at <Path/To/MyDirectory>/xgboost/dmlc-core/include/dmlc/parameter.h:618
618 class FieldEntryBase : public FieldAccessEntry {
(gdb) up
#3 0x00007fb08e48c59f in dmlc::parameter::ParamManager::~ParamManager() () at <Path/To/MyDirectory>/xgboost/dmlc-core/include/dmlc/parameter.h:407
407 delete entry_[i];
(gdb) down
#2 0x00007fb08e48cf2f in dmlc::parameter::FieldEntry<std::basic_string<char, std::char_traits, std::allocator > >::~FieldEntry() () at <Path/To/MyDirectory>/xgboost/dmlc-core/include/dmlc/parameter.h:618
618 class FieldEntryBase : public FieldAccessEntry {
(gdb) down
#1 0x00007fb08f9210c9 in dmlc::parameter::FieldAccessEntry::~FieldAccessEntry() () at /opt/<Path/To/MyDirectory>//clang/RELEASE/6.0.0/bin/…/include/c++/v1/new:236
236 __builtin_operator_delete(__ptr);


#6

In file …/xgboost/dmlc-core/include/dmlc/parameter.h

     * \brief manager class to handle parameter structure for each type
     *  An manager will be created for each parameter structure.
     */
    class ParamManager {
     public:
      /*! \brief destructor */
      ~ParamManager() {
        for (size_t i = 0; i < entry_.size(); ++i) {
          delete entry_[i];   <---- This causes the segfault
        }
      }