diff --git a/packages/xgboost/meta.yaml b/packages/xgboost/meta.yaml index 5d8edcb29..6e22f7347 100644 --- a/packages/xgboost/meta.yaml +++ b/packages/xgboost/meta.yaml @@ -1,21 +1,20 @@ package: name: xgboost - _disabled: true - version: 1.6.1 + version: 2.1.0.dev0 top-level: - xgboost source: - url: https://files.pythonhosted.org/packages/0e/8c/19309bcaf9a88b0bab34b88935925153f3f3f646163acaae9aa148cf72bb/xgboost-1.6.1.tar.gz - sha256: 24072028656f3428e7b8aabf77340ece057f273e41f7f85d67ccaefb7454bb18 + # temporary URL until xgboost makes a release + url: https://github.com/ryanking13/xgboost/releases/download/2.1.0.dev0/xgboost-2.1.0.dev0.tar.gz + sha256: 0695165010555807a6d3817b0f3ce05efeac74ede8e1d1f74853db944ad0e9f7 patches: - - patches/0001-Add-missing-template-type.patch - - patches/0002-Add-library-loading-path.patch - - patches/0003-Fix-type-mismatch-for-CSR-conversion-in-c_api.patch + - patches/0001-Fix-compilation-on-32-bit-platforms.patch build: + # DMLC_LOG_STACK_TRACE=0 is to handle https://github.com/dmlc/xgboost/issues/8595 cflags: | -DDMLC_USE_FOPEN64=0 -DDMLC_ENABLE_STD_THREAD=0 - -DDMLC_CXX11_THREAD_LOCAL=0 + -DDMLC_LOG_STACK_TRACE=0 -DUSE_OPENMP=0 exports: requested requirements: @@ -28,4 +27,3 @@ about: PyPI: https://pypi.org/project/xgboost summary: XGBoost Python Package license: Apache-2.0 -# Note: this package cannot be updated until we add support for building with meson diff --git a/packages/xgboost/patches/0001-Add-missing-template-type.patch b/packages/xgboost/patches/0001-Add-missing-template-type.patch deleted file mode 100644 index 4ccf4ad8f..000000000 --- a/packages/xgboost/patches/0001-Add-missing-template-type.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 4ac9a00d9e16b0879b4e734a4b604c7ce672894e Mon Sep 17 00:00:00 2001 -From: Gyeongjae Choi -Date: Mon, 9 May 2022 06:42:07 +0000 -Subject: [PATCH 1/3] Add missing template type - -TODO: Remove this patch when XGBoost version is updated. -(Upstream PR: https://github.com/dmlc/xgboost/pull/7954) - ---- - src/common/host_device_vector.cc | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/xgboost/src/common/host_device_vector.cc b/xgboost/src/common/host_device_vector.cc -index 3a4a59db..fc33317b 100644 ---- a/xgboost/src/common/host_device_vector.cc -+++ b/xgboost/src/common/host_device_vector.cc -@@ -180,13 +180,16 @@ template class HostDeviceVector; // bst_row_t - template class HostDeviceVector; // bst_feature_t - template class HostDeviceVector; - --#if defined(__APPLE__) -+#if defined(__APPLE__) || defined(__EMSCRIPTEN__) - /* - * On OSX: - * - * typedef unsigned int uint32_t; - * typedef unsigned long long uint64_t; - * typedef unsigned long __darwin_size_t; -+ * -+ * On Emscripten: -+ * typedef unsigned long size_t; - */ - template class HostDeviceVector; - #endif // defined(__APPLE__) --- -2.35.1 - diff --git a/packages/xgboost/patches/0001-Fix-compilation-on-32-bit-platforms.patch b/packages/xgboost/patches/0001-Fix-compilation-on-32-bit-platforms.patch new file mode 100644 index 000000000..fdf514170 --- /dev/null +++ b/packages/xgboost/patches/0001-Fix-compilation-on-32-bit-platforms.patch @@ -0,0 +1,108 @@ +From ec6451264b6a348f4a6eaa2e067fb1ffa432a6c2 Mon Sep 17 00:00:00 2001 +From: Gyeongjae Choi +Date: Tue, 23 Apr 2024 10:04:42 +0000 +Subject: [PATCH 1/1] Fix compilation on 32-bit platforms. + +Partially applies the upstream PR: https://github.com/dmlc/xgboost/pull/8964 + +--- + src/collective/communicator-inl.h | 9 --------- + src/common/quantile.cc | 6 +++--- + src/data/iterative_dmatrix.cc | 2 +- + src/metric/auc.cc | 2 +- + src/objective/adaptive.h | 2 +- + 5 files changed, 6 insertions(+), 15 deletions(-) + +diff --git a/src/collective/communicator-inl.h b/src/collective/communicator-inl.h +index 991e19f2c..ea7b415b1 100644 +--- a/cpp_src/src/collective/communicator-inl.h ++++ b/cpp_src/src/collective/communicator-inl.h +@@ -288,15 +288,6 @@ inline void Allreduce(uint64_t *send_receive_buffer, size_t count) { + Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op); + } + +-// Specialization for size_t, which is implementation defined, so it might or might not +-// be one of uint64_t/uint32_t/unsigned long long/unsigned long. +-template {} && !std::is_same{}> > +-inline void Allreduce(T *send_receive_buffer, size_t count) { +- static_assert(sizeof(T) == sizeof(uint64_t)); +- Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op); +-} +- + template + inline void Allreduce(float *send_receive_buffer, size_t count) { + Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kFloat, op); +diff --git a/src/common/quantile.cc b/src/common/quantile.cc +index 8c743d940..0ea819c38 100644 +--- a/cpp_src/src/common/quantile.cc ++++ b/cpp_src/src/common/quantile.cc +@@ -154,7 +154,7 @@ void SketchContainerImpl::GatherSketchInfo( + worker_segments.resize(1, 0); + auto world = collective::GetWorldSize(); + auto rank = collective::GetRank(); +- auto n_columns = sketches_.size(); ++ std::uint64_t n_columns = sketches_.size(); + + // get the size of each feature. + std::vector sketch_size; +@@ -285,7 +285,7 @@ void SketchContainerImpl::AllReduce( + std::vector *p_reduced, std::vector *p_num_cuts) { + monitor_.Start(__func__); + +- size_t n_columns = sketches_.size(); ++ std::uint64_t n_columns = sketches_.size(); + collective::Allreduce(&n_columns, 1); + CHECK_EQ(n_columns, sketches_.size()) << "Number of columns differs across workers"; + +@@ -339,7 +339,7 @@ void SketchContainerImpl::AllReduce( + ParallelFor(n_columns, n_threads_, [&](auto fidx) { + // gcc raises subobject-linkage warning if we put allreduce_result as lambda capture + QuantileAllreduce allreduce_result{global_sketches, worker_segments, +- sketches_scan, n_columns}; ++ sketches_scan, static_cast(n_columns)}; + int32_t intermediate_num_cuts = num_cuts[fidx]; + auto nbytes = WQSketch::SummaryContainer::CalcMemCost(intermediate_num_cuts); + if (IsCat(feature_types_, fidx)) { +diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc +index 0d75d0651..75f9d1145 100644 +--- a/cpp_src/src/data/iterative_dmatrix.cc ++++ b/cpp_src/src/data/iterative_dmatrix.cc +@@ -100,7 +100,7 @@ void SyncFeatureType(Context const*, std::vector* p_h_ft) { + return; + } + auto& h_ft = *p_h_ft; +- auto n_ft = h_ft.size(); ++ std::uint64_t n_ft = h_ft.size(); + collective::Allreduce(&n_ft, 1); + if (!h_ft.empty()) { + // Check correct size if this is not an empty DMatrix. +diff --git a/src/metric/auc.cc b/src/metric/auc.cc +index 212a3a027..bf2862a7d 100644 +--- a/cpp_src/src/metric/auc.cc ++++ b/cpp_src/src/metric/auc.cc +@@ -264,7 +264,7 @@ class EvalAUC : public MetricNoCache { + info.weights_.SetDevice(ctx_->Device()); + } + // We use the global size to handle empty dataset. +- std::array meta{info.labels.Size(), preds.Size()}; ++ std::array meta{info.labels.Size(), preds.Size()}; + if (!info.IsVerticalFederated()) { + collective::Allreduce(meta.data(), meta.size()); + } +diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h +index cbe69e79a..c9e92ae59 100644 +--- a/cpp_src/src/objective/adaptive.h ++++ b/cpp_src/src/objective/adaptive.h +@@ -42,7 +42,7 @@ inline void UpdateLeafValues(Context const* ctx, std::vector* p_quantiles + auto& quantiles = *p_quantiles; + auto const& h_node_idx = nidx; + +- size_t n_leaf = collective::GlobalMax(ctx, info, h_node_idx.size()); ++ std::uint64_t n_leaf = collective::GlobalMax(ctx, info, static_cast(h_node_idx.size())); + CHECK(quantiles.empty() || quantiles.size() == n_leaf); + if (quantiles.empty()) { + quantiles.resize(n_leaf, std::numeric_limits::quiet_NaN()); +-- +2.43.2 + diff --git a/packages/xgboost/patches/0002-Add-library-loading-path.patch b/packages/xgboost/patches/0002-Add-library-loading-path.patch deleted file mode 100644 index 9d5e5f0e6..000000000 --- a/packages/xgboost/patches/0002-Add-library-loading-path.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 54c2a9faeb0b0169172c5ab53367e6092f132c5a Mon Sep 17 00:00:00 2001 -From: Gyeongjae Choi -Date: Mon, 9 May 2022 12:07:44 +0000 -Subject: [PATCH 2/3] Add library loading path - -TODO: Remove this patch when XGBoost version is updated. -(Upstream PR: https://github.com/dmlc/xgboost/pull/7954) - ---- - python-package/xgboost/libpath.py | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/xgboost/libpath.py b/xgboost/libpath.py -index f7a7d9cd..1ab41cbe 100644 ---- a/xgboost/libpath.py -+++ b/xgboost/libpath.py -@@ -43,8 +43,7 @@ def find_lib_path() -> List[str]: - # directory here - dll_path.append(os.path.join(curr_path, './windows/Release/')) - dll_path = [os.path.join(p, 'xgboost.dll') for p in dll_path] -- elif sys.platform.startswith('linux') or sys.platform.startswith( -- 'freebsd'): -+ elif sys.platform.startswith(('linux', 'freebsd', 'emscripten')): - dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path] - elif sys.platform == 'darwin': - dll_path = [os.path.join(p, 'libxgboost.dylib') for p in dll_path] --- -2.35.1 - diff --git a/packages/xgboost/patches/0003-Fix-type-mismatch-for-CSR-conversion-in-c_api.patch b/packages/xgboost/patches/0003-Fix-type-mismatch-for-CSR-conversion-in-c_api.patch deleted file mode 100644 index 92abf7deb..000000000 --- a/packages/xgboost/patches/0003-Fix-type-mismatch-for-CSR-conversion-in-c_api.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 4ec1b506b424dd9e81fd7127f5712522800a5596 Mon Sep 17 00:00:00 2001 -From: Yizhi Liu -Date: Mon, 17 Oct 2022 15:16:45 -0700 -Subject: [PATCH 3/3] Fix type mismatch for CSR conversion in c_api - -TODO: Remove this patch when XGBoost version is updated. -(Upstream PR: https://github.com/dmlc/xgboost/pull/8369) - ---- - xgboost/core.py | 2 +- - xgboost/data.py | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xgboost/core.py b/xgboost/core.py -index 36548d8..0246779 100644 ---- a/xgboost/core.py -+++ b/xgboost/core.py -@@ -2119,7 +2119,7 @@ class Booster: - _array_interface(csr.indptr), - _array_interface(csr.indices), - _array_interface(csr.data), -- ctypes.c_size_t(csr.shape[1]), -+ c_bst_ulong(csr.shape[1]), - from_pystr_to_cstr(json.dumps(args)), - p_handle, - ctypes.byref(shape), -diff --git a/xgboost/data.py b/xgboost/data.py -index 119b354..b958436 100644 ---- a/xgboost/data.py -+++ b/xgboost/data.py -@@ -88,7 +88,7 @@ def _from_scipy_csr( - _array_interface(data.indptr), - _array_interface(data.indices), - _array_interface(data.data), -- ctypes.c_size_t(data.shape[1]), -+ c_bst_ulong(data.shape[1]), - config, - ctypes.byref(handle), - ) --- -2.35.1 - diff --git a/packages/xgboost/test_xgboost.py b/packages/xgboost/test_xgboost.py index 96b96f60a..1e096988e 100644 --- a/packages/xgboost/test_xgboost.py +++ b/packages/xgboost/test_xgboost.py @@ -135,11 +135,11 @@ def test_pandas(selenium): # 1 2 0 1 0 # 2 3 0 0 1 result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False) - exp = np.array([[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]) - np.testing.assert_array_equal(result, exp) + exp = np.array([[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]).T + np.testing.assert_array_equal(result.columns, exp) dm = xgb.DMatrix(dummies) assert dm.feature_names == ["B", "A_X", "A_Y", "A_Z"] - assert dm.feature_types == ["int", "int", "int", "int"] + assert dm.feature_types == ["int", "i", "i", "i"] assert dm.num_row() == 3 assert dm.num_col() == 4 @@ -228,7 +228,7 @@ def test_pandas_categorical(selenium): X, enable_categorical=True ) - assert transformed[:, 0].min() == 0 + assert transformed.columns[0].min() == 0 # test missing value X = pd.DataFrame({"f0": ["a", "b", np.NaN]}) @@ -282,18 +282,18 @@ def test_pandas_label(selenium): # label must be a single column df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]}) with pytest.raises(ValueError): - xgb.data._transform_pandas_df(df, False, None, None, "label", "float") + xgb.data._transform_pandas_df(df, False, None, None, "label") # label must be supported dtype df = pd.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)}) with pytest.raises(ValueError): - xgb.data._transform_pandas_df(df, False, None, None, "label", "float") + xgb.data._transform_pandas_df(df, False, None, None, "label") df = pd.DataFrame({"A": np.array([1, 2, 3], dtype=int)}) - result, _, _ = xgb.data._transform_pandas_df( - df, False, None, None, "label", "float" + result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, "label") + np.testing.assert_array_equal( + np.stack(result.columns, axis=1), np.array([[1.0], [2.0], [3.0]], dtype=float) ) - np.testing.assert_array_equal(result, np.array([[1.0], [2.0], [3.0]], dtype=float)) dm = xgb.DMatrix(np.random.randn(3, 2), label=df) assert dm.num_row() == 3 assert dm.num_col() == 2