mirror of https://github.com/pyodide/pyodide.git
Re-enable and update XGBoost (#4716)
Co-authored-by: Hood Chatham <roberthoodchatham@gmail.com>
This commit is contained in:
parent
4633a958da
commit
55339e00e4
|
@ -1,21 +1,20 @@
|
|||
package:
|
||||
name: xgboost
|
||||
_disabled: true
|
||||
version: 1.6.1
|
||||
version: 2.1.0.dev0
|
||||
top-level:
|
||||
- xgboost
|
||||
source:
|
||||
url: https://files.pythonhosted.org/packages/0e/8c/19309bcaf9a88b0bab34b88935925153f3f3f646163acaae9aa148cf72bb/xgboost-1.6.1.tar.gz
|
||||
sha256: 24072028656f3428e7b8aabf77340ece057f273e41f7f85d67ccaefb7454bb18
|
||||
# temporary URL until xgboost makes a release
|
||||
url: https://github.com/ryanking13/xgboost/releases/download/2.1.0.dev0/xgboost-2.1.0.dev0.tar.gz
|
||||
sha256: 0695165010555807a6d3817b0f3ce05efeac74ede8e1d1f74853db944ad0e9f7
|
||||
patches:
|
||||
- patches/0001-Add-missing-template-type.patch
|
||||
- patches/0002-Add-library-loading-path.patch
|
||||
- patches/0003-Fix-type-mismatch-for-CSR-conversion-in-c_api.patch
|
||||
- patches/0001-Fix-compilation-on-32-bit-platforms.patch
|
||||
build:
|
||||
# DMLC_LOG_STACK_TRACE=0 is to handle https://github.com/dmlc/xgboost/issues/8595
|
||||
cflags: |
|
||||
-DDMLC_USE_FOPEN64=0
|
||||
-DDMLC_ENABLE_STD_THREAD=0
|
||||
-DDMLC_CXX11_THREAD_LOCAL=0
|
||||
-DDMLC_LOG_STACK_TRACE=0
|
||||
-DUSE_OPENMP=0
|
||||
exports: requested
|
||||
requirements:
|
||||
|
@ -28,4 +27,3 @@ about:
|
|||
PyPI: https://pypi.org/project/xgboost
|
||||
summary: XGBoost Python Package
|
||||
license: Apache-2.0
|
||||
# Note: this package cannot be updated until we add support for building with meson
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
From 4ac9a00d9e16b0879b4e734a4b604c7ce672894e Mon Sep 17 00:00:00 2001
|
||||
From: Gyeongjae Choi <def6488@gmail.com>
|
||||
Date: Mon, 9 May 2022 06:42:07 +0000
|
||||
Subject: [PATCH 1/3] Add missing template type
|
||||
|
||||
TODO: Remove this patch when XGBoost version is updated.
|
||||
(Upstream PR: https://github.com/dmlc/xgboost/pull/7954)
|
||||
|
||||
---
|
||||
src/common/host_device_vector.cc | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/xgboost/src/common/host_device_vector.cc b/xgboost/src/common/host_device_vector.cc
|
||||
index 3a4a59db..fc33317b 100644
|
||||
--- a/xgboost/src/common/host_device_vector.cc
|
||||
+++ b/xgboost/src/common/host_device_vector.cc
|
||||
@@ -180,13 +180,16 @@ template class HostDeviceVector<uint64_t>; // bst_row_t
|
||||
template class HostDeviceVector<uint32_t>; // bst_feature_t
|
||||
template class HostDeviceVector<RegTree::Segment>;
|
||||
|
||||
-#if defined(__APPLE__)
|
||||
+#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
|
||||
/*
|
||||
* On OSX:
|
||||
*
|
||||
* typedef unsigned int uint32_t;
|
||||
* typedef unsigned long long uint64_t;
|
||||
* typedef unsigned long __darwin_size_t;
|
||||
+ *
|
||||
+ * On Emscripten:
|
||||
+ * typedef unsigned long size_t;
|
||||
*/
|
||||
template class HostDeviceVector<std::size_t>;
|
||||
#endif // defined(__APPLE__)
|
||||
--
|
||||
2.35.1
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
From ec6451264b6a348f4a6eaa2e067fb1ffa432a6c2 Mon Sep 17 00:00:00 2001
|
||||
From: Gyeongjae Choi <def6488@gmail.com>
|
||||
Date: Tue, 23 Apr 2024 10:04:42 +0000
|
||||
Subject: [PATCH 1/1] Fix compilation on 32-bit platforms.
|
||||
|
||||
Partially applies the upstream PR: https://github.com/dmlc/xgboost/pull/8964
|
||||
|
||||
---
|
||||
src/collective/communicator-inl.h | 9 ---------
|
||||
src/common/quantile.cc | 6 +++---
|
||||
src/data/iterative_dmatrix.cc | 2 +-
|
||||
src/metric/auc.cc | 2 +-
|
||||
src/objective/adaptive.h | 2 +-
|
||||
5 files changed, 6 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/src/collective/communicator-inl.h b/src/collective/communicator-inl.h
|
||||
index 991e19f2c..ea7b415b1 100644
|
||||
--- a/cpp_src/src/collective/communicator-inl.h
|
||||
+++ b/cpp_src/src/collective/communicator-inl.h
|
||||
@@ -288,15 +288,6 @@ inline void Allreduce(uint64_t *send_receive_buffer, size_t count) {
|
||||
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op);
|
||||
}
|
||||
|
||||
-// Specialization for size_t, which is implementation defined, so it might or might not
|
||||
-// be one of uint64_t/uint32_t/unsigned long long/unsigned long.
|
||||
-template <Operation op, typename T,
|
||||
- typename = std::enable_if_t<std::is_same<size_t, T>{} && !std::is_same<uint64_t, T>{}> >
|
||||
-inline void Allreduce(T *send_receive_buffer, size_t count) {
|
||||
- static_assert(sizeof(T) == sizeof(uint64_t));
|
||||
- Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kUInt64, op);
|
||||
-}
|
||||
-
|
||||
template <Operation op>
|
||||
inline void Allreduce(float *send_receive_buffer, size_t count) {
|
||||
Communicator::Get()->AllReduce(send_receive_buffer, count, DataType::kFloat, op);
|
||||
diff --git a/src/common/quantile.cc b/src/common/quantile.cc
|
||||
index 8c743d940..0ea819c38 100644
|
||||
--- a/cpp_src/src/common/quantile.cc
|
||||
+++ b/cpp_src/src/common/quantile.cc
|
||||
@@ -154,7 +154,7 @@ void SketchContainerImpl<WQSketch>::GatherSketchInfo(
|
||||
worker_segments.resize(1, 0);
|
||||
auto world = collective::GetWorldSize();
|
||||
auto rank = collective::GetRank();
|
||||
- auto n_columns = sketches_.size();
|
||||
+ std::uint64_t n_columns = sketches_.size();
|
||||
|
||||
// get the size of each feature.
|
||||
std::vector<bst_idx_t> sketch_size;
|
||||
@@ -285,7 +285,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
std::vector<typename WQSketch::SummaryContainer> *p_reduced, std::vector<int32_t> *p_num_cuts) {
|
||||
monitor_.Start(__func__);
|
||||
|
||||
- size_t n_columns = sketches_.size();
|
||||
+ std::uint64_t n_columns = sketches_.size();
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_columns, 1);
|
||||
CHECK_EQ(n_columns, sketches_.size()) << "Number of columns differs across workers";
|
||||
|
||||
@@ -339,7 +339,7 @@ void SketchContainerImpl<WQSketch>::AllReduce(
|
||||
ParallelFor(n_columns, n_threads_, [&](auto fidx) {
|
||||
// gcc raises subobject-linkage warning if we put allreduce_result as lambda capture
|
||||
QuantileAllreduce<typename WQSketch::Entry> allreduce_result{global_sketches, worker_segments,
|
||||
- sketches_scan, n_columns};
|
||||
+ sketches_scan, static_cast<size_t>(n_columns)};
|
||||
int32_t intermediate_num_cuts = num_cuts[fidx];
|
||||
auto nbytes = WQSketch::SummaryContainer::CalcMemCost(intermediate_num_cuts);
|
||||
if (IsCat(feature_types_, fidx)) {
|
||||
diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc
|
||||
index 0d75d0651..75f9d1145 100644
|
||||
--- a/cpp_src/src/data/iterative_dmatrix.cc
|
||||
+++ b/cpp_src/src/data/iterative_dmatrix.cc
|
||||
@@ -100,7 +100,7 @@ void SyncFeatureType(Context const*, std::vector<FeatureType>* p_h_ft) {
|
||||
return;
|
||||
}
|
||||
auto& h_ft = *p_h_ft;
|
||||
- auto n_ft = h_ft.size();
|
||||
+ std::uint64_t n_ft = h_ft.size();
|
||||
collective::Allreduce<collective::Operation::kMax>(&n_ft, 1);
|
||||
if (!h_ft.empty()) {
|
||||
// Check correct size if this is not an empty DMatrix.
|
||||
diff --git a/src/metric/auc.cc b/src/metric/auc.cc
|
||||
index 212a3a027..bf2862a7d 100644
|
||||
--- a/cpp_src/src/metric/auc.cc
|
||||
+++ b/cpp_src/src/metric/auc.cc
|
||||
@@ -264,7 +264,7 @@ class EvalAUC : public MetricNoCache {
|
||||
info.weights_.SetDevice(ctx_->Device());
|
||||
}
|
||||
// We use the global size to handle empty dataset.
|
||||
- std::array<size_t, 2> meta{info.labels.Size(), preds.Size()};
|
||||
+ std::array<bst_idx_t, 2> meta{info.labels.Size(), preds.Size()};
|
||||
if (!info.IsVerticalFederated()) {
|
||||
collective::Allreduce<collective::Operation::kMax>(meta.data(), meta.size());
|
||||
}
|
||||
diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
|
||||
index cbe69e79a..c9e92ae59 100644
|
||||
--- a/cpp_src/src/objective/adaptive.h
|
||||
+++ b/cpp_src/src/objective/adaptive.h
|
||||
@@ -42,7 +42,7 @@ inline void UpdateLeafValues(Context const* ctx, std::vector<float>* p_quantiles
|
||||
auto& quantiles = *p_quantiles;
|
||||
auto const& h_node_idx = nidx;
|
||||
|
||||
- size_t n_leaf = collective::GlobalMax(ctx, info, h_node_idx.size());
|
||||
+ std::uint64_t n_leaf = collective::GlobalMax(ctx, info, static_cast<std::uint64_t>(h_node_idx.size()));
|
||||
CHECK(quantiles.empty() || quantiles.size() == n_leaf);
|
||||
if (quantiles.empty()) {
|
||||
quantiles.resize(n_leaf, std::numeric_limits<float>::quiet_NaN());
|
||||
--
|
||||
2.43.2
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
From 54c2a9faeb0b0169172c5ab53367e6092f132c5a Mon Sep 17 00:00:00 2001
|
||||
From: Gyeongjae Choi <def6488@gmail.com>
|
||||
Date: Mon, 9 May 2022 12:07:44 +0000
|
||||
Subject: [PATCH 2/3] Add library loading path
|
||||
|
||||
TODO: Remove this patch when XGBoost version is updated.
|
||||
(Upstream PR: https://github.com/dmlc/xgboost/pull/7954)
|
||||
|
||||
---
|
||||
python-package/xgboost/libpath.py | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/xgboost/libpath.py b/xgboost/libpath.py
|
||||
index f7a7d9cd..1ab41cbe 100644
|
||||
--- a/xgboost/libpath.py
|
||||
+++ b/xgboost/libpath.py
|
||||
@@ -43,8 +43,7 @@ def find_lib_path() -> List[str]:
|
||||
# directory here
|
||||
dll_path.append(os.path.join(curr_path, './windows/Release/'))
|
||||
dll_path = [os.path.join(p, 'xgboost.dll') for p in dll_path]
|
||||
- elif sys.platform.startswith('linux') or sys.platform.startswith(
|
||||
- 'freebsd'):
|
||||
+ elif sys.platform.startswith(('linux', 'freebsd', 'emscripten')):
|
||||
dll_path = [os.path.join(p, 'libxgboost.so') for p in dll_path]
|
||||
elif sys.platform == 'darwin':
|
||||
dll_path = [os.path.join(p, 'libxgboost.dylib') for p in dll_path]
|
||||
--
|
||||
2.35.1
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
From 4ec1b506b424dd9e81fd7127f5712522800a5596 Mon Sep 17 00:00:00 2001
|
||||
From: Yizhi Liu <liuyizhi@apache.org>
|
||||
Date: Mon, 17 Oct 2022 15:16:45 -0700
|
||||
Subject: [PATCH 3/3] Fix type mismatch for CSR conversion in c_api
|
||||
|
||||
TODO: Remove this patch when XGBoost version is updated.
|
||||
(Upstream PR: https://github.com/dmlc/xgboost/pull/8369)
|
||||
|
||||
---
|
||||
xgboost/core.py | 2 +-
|
||||
xgboost/data.py | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/xgboost/core.py b/xgboost/core.py
|
||||
index 36548d8..0246779 100644
|
||||
--- a/xgboost/core.py
|
||||
+++ b/xgboost/core.py
|
||||
@@ -2119,7 +2119,7 @@ class Booster:
|
||||
_array_interface(csr.indptr),
|
||||
_array_interface(csr.indices),
|
||||
_array_interface(csr.data),
|
||||
- ctypes.c_size_t(csr.shape[1]),
|
||||
+ c_bst_ulong(csr.shape[1]),
|
||||
from_pystr_to_cstr(json.dumps(args)),
|
||||
p_handle,
|
||||
ctypes.byref(shape),
|
||||
diff --git a/xgboost/data.py b/xgboost/data.py
|
||||
index 119b354..b958436 100644
|
||||
--- a/xgboost/data.py
|
||||
+++ b/xgboost/data.py
|
||||
@@ -88,7 +88,7 @@ def _from_scipy_csr(
|
||||
_array_interface(data.indptr),
|
||||
_array_interface(data.indices),
|
||||
_array_interface(data.data),
|
||||
- ctypes.c_size_t(data.shape[1]),
|
||||
+ c_bst_ulong(data.shape[1]),
|
||||
config,
|
||||
ctypes.byref(handle),
|
||||
)
|
||||
--
|
||||
2.35.1
|
||||
|
|
@ -135,11 +135,11 @@ def test_pandas(selenium):
|
|||
# 1 2 0 1 0
|
||||
# 2 3 0 0 1
|
||||
result, _, _ = xgb.data._transform_pandas_df(dummies, enable_categorical=False)
|
||||
exp = np.array([[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]])
|
||||
np.testing.assert_array_equal(result, exp)
|
||||
exp = np.array([[1.0, 1.0, 0.0, 0.0], [2.0, 0.0, 1.0, 0.0], [3.0, 0.0, 0.0, 1.0]]).T
|
||||
np.testing.assert_array_equal(result.columns, exp)
|
||||
dm = xgb.DMatrix(dummies)
|
||||
assert dm.feature_names == ["B", "A_X", "A_Y", "A_Z"]
|
||||
assert dm.feature_types == ["int", "int", "int", "int"]
|
||||
assert dm.feature_types == ["int", "i", "i", "i"]
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 4
|
||||
|
||||
|
@ -228,7 +228,7 @@ def test_pandas_categorical(selenium):
|
|||
X, enable_categorical=True
|
||||
)
|
||||
|
||||
assert transformed[:, 0].min() == 0
|
||||
assert transformed.columns[0].min() == 0
|
||||
|
||||
# test missing value
|
||||
X = pd.DataFrame({"f0": ["a", "b", np.NaN]})
|
||||
|
@ -282,18 +282,18 @@ def test_pandas_label(selenium):
|
|||
# label must be a single column
|
||||
df = pd.DataFrame({"A": ["X", "Y", "Z"], "B": [1, 2, 3]})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
# label must be supported dtype
|
||||
df = pd.DataFrame({"A": np.array(["a", "b", "c"], dtype=object)})
|
||||
with pytest.raises(ValueError):
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label", "float")
|
||||
xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
|
||||
df = pd.DataFrame({"A": np.array([1, 2, 3], dtype=int)})
|
||||
result, _, _ = xgb.data._transform_pandas_df(
|
||||
df, False, None, None, "label", "float"
|
||||
result, _, _ = xgb.data._transform_pandas_df(df, False, None, None, "label")
|
||||
np.testing.assert_array_equal(
|
||||
np.stack(result.columns, axis=1), np.array([[1.0], [2.0], [3.0]], dtype=float)
|
||||
)
|
||||
np.testing.assert_array_equal(result, np.array([[1.0], [2.0], [3.0]], dtype=float))
|
||||
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
|
||||
assert dm.num_row() == 3
|
||||
assert dm.num_col() == 2
|
||||
|
|
Loading…
Reference in New Issue