diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 8510b70d4e..0c2c5a69c4 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -27,7 +27,6 @@ Brief description of all our automation tools used for boosting development perf
- GPU: 2 x NVIDIA RTX 3090
- TPU: [Google TPU v4-8](https://cloud.google.com/tpu/docs)
- - IPU: [Colossus MK1 IPU](https://www.graphcore.ai/products/ipu)
- To check which versions of Python or PyTorch are used for testing in our CI, see the corresponding workflow files or checkgroup config file at [`.github/checkgroup.yml`](../checkgroup.yml).
diff --git a/.gitignore b/.gitignore
index 598ad521af..fe3a13e9e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,9 +22,7 @@ docs/source-pytorch/notebooks
docs/source-pytorch/_static/images/course_UvA-DL
docs/source-pytorch/_static/images/lightning_examples
docs/source-pytorch/_static/fetched-s3-assets
-docs/source-pytorch/_static/images/ipu/
docs/source-pytorch/integrations/hpu
-docs/source-pytorch/integrations/ipu
docs/source-fabric/*/generated
diff --git a/docs/source-app/quickstart.rst b/docs/source-app/quickstart.rst
index 6df9b26b7b..99872c5f8a 100644
--- a/docs/source-app/quickstart.rst
+++ b/docs/source-app/quickstart.rst
@@ -53,7 +53,6 @@ And that's it!
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
- IPU available: False, using: 0 IPUs
| Name | Type | Params | In sizes | Out sizes
------------------------------------------------------------------
diff --git a/docs/source-pytorch/advanced/speed.rst b/docs/source-pytorch/advanced/speed.rst
index 3abbc73dc8..1f79393d2f 100644
--- a/docs/source-pytorch/advanced/speed.rst
+++ b/docs/source-pytorch/advanced/speed.rst
@@ -20,7 +20,7 @@ Training on Accelerators
**Use when:** Whenever possible!
-With Lightning, running on GPUs, TPUs, IPUs on multiple nodes is a simple switch of a flag.
+With Lightning, running on GPUs, TPUs, HPUs on multiple nodes is a simple switch of a flag.
GPU Training
============
diff --git a/docs/source-pytorch/common/index.rst b/docs/source-pytorch/common/index.rst
index 573cd48300..84d4e331cf 100644
--- a/docs/source-pytorch/common/index.rst
+++ b/docs/source-pytorch/common/index.rst
@@ -17,7 +17,6 @@
../advanced/model_parallel
Train on single or multiple GPUs <../accelerators/gpu>
Train on single or multiple HPUs <../integrations/hpu/index>
- Train on single or multiple IPUs <../integrations/ipu/index>
Train on single or multiple TPUs <../accelerators/tpu>
Train on MPS <../accelerators/mps>
Use a pretrained model <../advanced/pretrained>
@@ -168,13 +167,6 @@ How-to Guides
:col_css: col-md-4
:height: 180
-.. displayitem::
- :header: Train on single or multiple IPUs
- :description: Train models faster with IPU accelerators
- :button_link: ../integrations/ipu/index.html
- :col_css: col-md-4
- :height: 180
-
.. displayitem::
:header: Train on single or multiple TPUs
:description: TTrain models faster with TPU accelerators
diff --git a/docs/source-pytorch/common/precision_basic.rst b/docs/source-pytorch/common/precision_basic.rst
index eb7fe0f9e9..1134524b51 100644
--- a/docs/source-pytorch/common/precision_basic.rst
+++ b/docs/source-pytorch/common/precision_basic.rst
@@ -103,31 +103,26 @@ Precision support by accelerator
********************************
.. list-table:: Precision with Accelerators
- :widths: 20 20 20 20 20
+ :widths: 20 20 20 20
:header-rows: 1
* - Precision
- CPU
- GPU
- TPU
- - IPU
* - 16 Mixed
- No
- Yes
- No
- - Yes
* - BFloat16 Mixed
- Yes
- Yes
- Yes
- - No
* - 32 True
- Yes
- Yes
- Yes
- - Yes
* - 64 True
- Yes
- Yes
- No
- - No
diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst
index 37c279e311..0ad4592754 100644
--- a/docs/source-pytorch/common/trainer.rst
+++ b/docs/source-pytorch/common/trainer.rst
@@ -175,7 +175,7 @@ Trainer flags
accelerator
^^^^^^^^^^^
-Supports passing different accelerator types (``"cpu", "gpu", "tpu", "ipu", "auto"``)
+Supports passing different accelerator types (``"cpu", "gpu", "tpu", "hpu", "auto"``)
as well as custom accelerator instances.
.. code-block:: python
@@ -393,9 +393,6 @@ Number of devices to train on (``int``), which devices to train on (``list`` or
# Training with TPU Accelerator using 8 tpu cores
trainer = Trainer(devices="auto", accelerator="tpu")
- # Training with IPU Accelerator using 4 ipus
- trainer = Trainer(devices="auto", accelerator="ipu")
-
.. note::
If the ``devices`` flag is not defined, it will assume ``devices`` to be ``"auto"`` and fetch the ``auto_device_count``
diff --git a/docs/source-pytorch/common_usecases.rst b/docs/source-pytorch/common_usecases.rst
index 4af75de9dd..7e6ed91d0c 100644
--- a/docs/source-pytorch/common_usecases.rst
+++ b/docs/source-pytorch/common_usecases.rst
@@ -133,13 +133,6 @@ Customize and extend Lightning for things like custom hardware or distributed st
:button_link: integrations/hpu/index.html
:height: 100
-.. displayitem::
- :header: Train on single or multiple IPUs
- :description: Train models faster with IPUs.
- :col_css: col-md-12
- :button_link: integrations/ipu/index.html
- :height: 100
-
.. displayitem::
:header: Train on single or multiple TPUs
:description: Train models faster with TPUs.
diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py
index 829f1f2c9e..a7fa7ce559 100644
--- a/docs/source-pytorch/conf.py
+++ b/docs/source-pytorch/conf.py
@@ -94,18 +94,6 @@ assist_local.AssistantCLI.pull_docs_files(
target_dir="docs/source-pytorch/integrations/hpu",
checkout="refs/tags/1.3.0",
)
-assist_local.AssistantCLI.pull_docs_files(
- gh_user_repo="Lightning-AI/lightning-Graphcore",
- target_dir="docs/source-pytorch/integrations/ipu",
- checkout="refs/tags/v0.1.0",
- as_orphan=True, # todo: this can be dropped after new IPU release
-)
-# the IPU also need one image
-URL_RAW_DOCS_GRAPHCORE = "https://raw.githubusercontent.com/Lightning-AI/lightning-Graphcore/v0.1.0/docs/source"
-for img in ["_static/images/ipu/profiler.png"]:
- img_ = os.path.join(_PATH_HERE, "integrations", "ipu", img)
- os.makedirs(os.path.dirname(img_), exist_ok=True)
- urllib.request.urlretrieve(f"{URL_RAW_DOCS_GRAPHCORE}/{img}", img_)
# Copy strategies docs as single pages
assist_local.AssistantCLI.pull_docs_files(
@@ -340,7 +328,6 @@ intersphinx_mapping = {
"numpy": ("https://numpy.org/doc/stable/", None),
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
"torchmetrics": ("https://torchmetrics.readthedocs.io/en/stable/", None),
- "graphcore": ("https://docs.graphcore.ai/en/latest/", None),
"lightning_habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
"tensorboardX": ("https://tensorboardx.readthedocs.io/en/stable/", None),
# needed for referencing App from lightning scope
diff --git a/docs/source-pytorch/expertise_levels.rst b/docs/source-pytorch/expertise_levels.rst
index 5988890332..7c34123f5b 100644
--- a/docs/source-pytorch/expertise_levels.rst
+++ b/docs/source-pytorch/expertise_levels.rst
@@ -190,34 +190,26 @@ Configure all aspects of Lightning for advanced usecases.
:tag: advanced
.. displayitem::
- :header: Level 18: Explore IPUs
- :description: Explore Intelligence Processing Unit (IPU) for model scaling.
+ :header: Level 18: Explore HPUs
+ :description: Explore Havana Gaudi Processing Unit (HPU) for model scaling.
:col_css: col-md-6
:button_link: levels/advanced_level_19.html
:height: 150
:tag: advanced
.. displayitem::
- :header: Level 19: Explore HPUs
- :description: Explore Havana Gaudi Processing Unit (HPU) for model scaling.
+ :header: Level 19: Master TPUs
+ :description: Master TPUs and run on cloud TPUs.
:col_css: col-md-6
:button_link: levels/advanced_level_20.html
:height: 150
:tag: advanced
.. displayitem::
- :header: Level 20: Master TPUs
- :description: Master TPUs and run on cloud TPUs.
- :col_css: col-md-6
- :button_link: levels/advanced_level_21.html
- :height: 150
- :tag: advanced
-
-.. displayitem::
- :header: Level 21: Train models with billions of parameters
+ :header: Level 20: Train models with billions of parameters
:description: Scale GPU training to models with billions of parameters
:col_css: col-md-6
- :button_link: levels/advanced_level_22.html
+ :button_link: levels/advanced_level_21.html
:height: 150
:tag: advanced
@@ -240,7 +232,7 @@ Customize and extend Lightning for things like custom hardware or distributed st
.. Add callout items below this line
.. displayitem::
- :header: Level 22: Extend the Lightning CLI
+ :header: Level 21: Extend the Lightning CLI
:description: Extend the functionality of the Lightning CLI.
:col_css: col-md-6
:button_link: levels/expert_level_23.html
@@ -248,7 +240,7 @@ Customize and extend Lightning for things like custom hardware or distributed st
:tag: expert
.. displayitem::
- :header: Level 23: Integrate a custom cluster
+ :header: Level 22: Integrate a custom cluster
:description: Integrate a custom cluster into Lightning.
:col_css: col-md-6
:button_link: levels/expert_level_24.html
@@ -256,7 +248,7 @@ Customize and extend Lightning for things like custom hardware or distributed st
:tag: expert
.. displayitem::
- :header: Level 24: Make your own profiler
+ :header: Level 23: Make your own profiler
:description: Make your own profiler.
:col_css: col-md-6
:button_link: tuning/profiler_expert.html
@@ -264,10 +256,10 @@ Customize and extend Lightning for things like custom hardware or distributed st
:tag: expert
.. displayitem::
- :header: Level 25: Add a new accelerator or Strategy
+ :header: Level 24: Add a new accelerator or Strategy
:description: Integrate a new accelerator or distributed strategy.
:col_css: col-md-6
- :button_link: levels/expert_level_27.html
+ :button_link: levels/expert_level_25.html
:height: 150
:tag: expert
diff --git a/docs/source-pytorch/extensions/accelerator.rst b/docs/source-pytorch/extensions/accelerator.rst
index 0589d9850c..93dc467b02 100644
--- a/docs/source-pytorch/extensions/accelerator.rst
+++ b/docs/source-pytorch/extensions/accelerator.rst
@@ -4,13 +4,12 @@
Accelerator
###########
-The Accelerator connects a Lightning Trainer to arbitrary hardware (CPUs, GPUs, TPUs, IPUs, MPS, ...).
+The Accelerator connects a Lightning Trainer to arbitrary hardware (CPUs, GPUs, TPUs, HPUs, MPS, ...).
Currently there are accelerators for:
- CPU
- :doc:`GPU <../accelerators/gpu>`
- :doc:`TPU <../accelerators/tpu>`
-- :doc:`IPU <../integrations/ipu/index>`
- :doc:`HPU <../integrations/hpu/index>`
- :doc:`MPS <../accelerators/mps>`
diff --git a/docs/source-pytorch/extensions/strategy.rst b/docs/source-pytorch/extensions/strategy.rst
index 858a6744ee..7155cff815 100644
--- a/docs/source-pytorch/extensions/strategy.rst
+++ b/docs/source-pytorch/extensions/strategy.rst
@@ -57,9 +57,6 @@ Here are some examples:
# Training with the DDP Spawn strategy on 8 TPU cores
trainer = Trainer(strategy="ddp_spawn", accelerator="tpu", devices=8)
- # Training with the default IPU strategy on 8 IPUs
- trainer = Trainer(accelerator="ipu", devices=8)
-
The below table lists all relevant strategies available in Lightning with their corresponding short-hand name:
.. list-table:: Strategy Classes and Nicknames
@@ -87,9 +84,6 @@ The below table lists all relevant strategies available in Lightning with their
* - hpu_single
- ``SingleHPUStrategy``
- Strategy for training on a single HPU device. :doc:`Learn more. <../integrations/hpu/index>`
- * - ipu_strategy
- - ``IPUStrategy``
- - Plugin for training on IPU devices. :doc:`Learn more. <../integrations/ipu/index>`
* - xla
- :class:`~lightning.pytorch.strategies.XLAStrategy`
- Strategy for training on multiple TPU devices using the :func:`torch_xla.distributed.xla_multiprocessing.spawn` method. :doc:`Learn more. <../accelerators/tpu>`
diff --git a/docs/source-pytorch/glossary/index.rst b/docs/source-pytorch/glossary/index.rst
index 805510a1aa..5ca677c48e 100644
--- a/docs/source-pytorch/glossary/index.rst
+++ b/docs/source-pytorch/glossary/index.rst
@@ -20,7 +20,6 @@
Half precision <../common/precision>
HPU <../integrations/hpu/index>
Inference <../deploy/production_intermediate>
- IPU <../integrations/ipu/index>
Lightning CLI <../cli/lightning_cli>
LightningDataModule <../data/datamodule>
LightningModule <../common/lightning_module>
@@ -177,13 +176,6 @@ Glossary
:button_link: ../deploy/production_intermediate.html
:height: 100
-.. displayitem::
- :header: IPU
- :description: Graphcore Intelligence Processing Unit for faster training
- :col_css: col-md-12
- :button_link: ../integrations/ipu/index.html
- :height: 100
-
.. displayitem::
:header: Lightning CLI
:description: A Command-line Interface (CLI) to interact with Lightning code via a terminal
diff --git a/docs/source-pytorch/integrations/ipu/index.rst b/docs/source-pytorch/integrations/ipu/index.rst
deleted file mode 100644
index 138814fefc..0000000000
--- a/docs/source-pytorch/integrations/ipu/index.rst
+++ /dev/null
@@ -1,48 +0,0 @@
-.. _ipu:
-
-Accelerator: IPU training
-=========================
-
-.. raw:: html
-
-
-
-
-.. Add callout items below this line
-
-.. displayitem::
- :header: Prepare your code (Optional)
- :description: Prepare your code to run on any hardware
- :col_css: col-md-6
- :button_link: accelerator_prepare.html
- :height: 150
- :tag: basic
-
-.. displayitem::
- :header: Basic
- :description: Learn the basics of single and multi-IPU training.
- :col_css: col-md-6
- :button_link: ipu_basic.html
- :height: 150
- :tag: basic
-
-.. displayitem::
- :header: Intermediate
- :description: Tune model performance with mix-precision settings and the performance analyser.
- :col_css: col-md-6
- :button_link: ipu_intermediate.html
- :height: 150
- :tag: intermediate
-
-.. displayitem::
- :header: Advanced
- :description: Learn advanced techniques to customize IPU training for massive models.
- :col_css: col-md-6
- :button_link: ipu_advanced.html
- :height: 150
- :tag: advanced
-
-.. raw:: html
-
-
-
diff --git a/docs/source-pytorch/levels/advanced.rst b/docs/source-pytorch/levels/advanced.rst
index 1ea809d6fa..6e4b1e99f9 100644
--- a/docs/source-pytorch/levels/advanced.rst
+++ b/docs/source-pytorch/levels/advanced.rst
@@ -46,34 +46,26 @@ Configure all aspects of Lightning for advanced usecases.
:tag: advanced
.. displayitem::
- :header: Level 18: Explore IPUs
- :description: Explore Intelligence Processing Unit (IPU) for model scaling.
+ :header: Level 18: Explore HPUs
+ :description: Explore Habana Gaudi Processing Unit (HPU) for model scaling.
:col_css: col-md-6
:button_link: advanced_level_19.html
:height: 150
:tag: advanced
.. displayitem::
- :header: Level 19: Explore HPUs
- :description: Explore Habana Gaudi Processing Unit (HPU) for model scaling.
+ :header: Level 19: Master TPUs
+ :description: Master TPUs and run on cloud TPUs.
:col_css: col-md-6
:button_link: advanced_level_20.html
:height: 150
:tag: advanced
.. displayitem::
- :header: Level 20: Master TPUs
- :description: Master TPUs and run on cloud TPUs.
- :col_css: col-md-6
- :button_link: advanced_level_21.html
- :height: 150
- :tag: advanced
-
-.. displayitem::
- :header: Level 21: Train models with billions of parameters
+ :header: Level 20: Train models with billions of parameters
:description: Scale GPU training to models with billions of parameters
:col_css: col-md-6
- :button_link: advanced_level_22.html
+ :button_link: advanced_level_21.html
:height: 150
:tag: advanced
diff --git a/docs/source-pytorch/levels/advanced_level_19.rst b/docs/source-pytorch/levels/advanced_level_19.rst
index eba1a9bc14..6ce849c12b 100644
--- a/docs/source-pytorch/levels/advanced_level_19.rst
+++ b/docs/source-pytorch/levels/advanced_level_19.rst
@@ -1,10 +1,10 @@
:orphan:
######################
-Level 18: Explore IPUs
+Level 18: Explore HPUs
######################
-Explore Intelligence Processing Unit (IPU) for model scaling.
+Explore Intel Habana Processing Unit (HPU) for model scaling.
----
@@ -16,26 +16,18 @@ Explore Intelligence Processing Unit (IPU) for model scaling.
.. Add callout items below this line
.. displayitem::
- :header: Prepare your code (Optional)
- :description: Prepare your code to run on any hardware.
- :col_css: col-md-4
- :button_link: ../accelerators/accelerator_prepare.html
+ :header: Train models on HPUs
+ :description: Learn the basics of single and multi-HPU core training.
+ :col_css: col-md-6
+ :button_link: ../integrations/hpu/basic.html
:height: 150
:tag: basic
.. displayitem::
- :header: Train models on IPUs
- :description: Learn the basics of single and multi-IPU training.
- :col_css: col-md-4
- :button_link: ../integrations/ipu/basic.html
- :height: 150
- :tag: basic
-
-.. displayitem::
- :header: Optimize models training on IPUs
- :description: Tune model performance with mixed precision and the performance analyser.
- :col_css: col-md-4
- :button_link: ../integrations/ipu/intermediate.html
+ :header: Optimize models training on HPUs
+ :description: Enable state-of-the-art scaling with advanced mixed-precision settings.
+ :col_css: col-md-6
+ :button_link: ../integrations/hpu/intermediate.html
:height: 150
:tag: intermediate
diff --git a/docs/source-pytorch/levels/advanced_level_20.rst b/docs/source-pytorch/levels/advanced_level_20.rst
index 8aaa159cc6..ebde7d6ea5 100644
--- a/docs/source-pytorch/levels/advanced_level_20.rst
+++ b/docs/source-pytorch/levels/advanced_level_20.rst
@@ -1,10 +1,10 @@
:orphan:
-######################
-Level 19: Explore HPUs
-######################
+#####################
+Level 19: Master TPUs
+#####################
-Explore Intel Habana Processing Unit (HPU) for model scaling.
+Master cloud TPU training with profiling and scaling techniques.
----
@@ -16,20 +16,28 @@ Explore Intel Habana Processing Unit (HPU) for model scaling.
.. Add callout items below this line
.. displayitem::
- :header: Train models on HPUs
- :description: Learn the basics of single and multi-HPU core training.
- :col_css: col-md-6
- :button_link: ../integrations/hpu/basic.html
- :height: 150
- :tag: basic
+ :header: Run on cloud TPUs
+ :description: Scale massive models using cloud TPUs.
+ :col_css: col-md-4
+ :button_link: ../accelerators/tpu_intermediate.html
+ :height: 180
+ :tag: intermediate
.. displayitem::
- :header: Optimize models training on HPUs
- :description: Enable state-of-the-art scaling with advanced mixed-precision settings.
- :col_css: col-md-6
- :button_link: ../integrations/hpu/intermediate.html
- :height: 150
- :tag: intermediate
+ :header: Explore advanced TPU scaling techniques
+ :description: Dive into XLA and advanced techniques to optimize TPU-powered models.
+ :col_css: col-md-4
+ :button_link: ../accelerators/tpu_advanced.html
+ :height: 180
+ :tag: advanced
+
+.. displayitem::
+ :header: Profile TPU code
+ :description: Learn to profile TPU code.
+ :col_css: col-md-4
+ :button_link: ../tuning/profiler_advanced.html
+ :height: 180
+ :tag: advanced
.. raw:: html
diff --git a/docs/source-pytorch/levels/advanced_level_21.rst b/docs/source-pytorch/levels/advanced_level_21.rst
index 92358c04eb..6c07d1d037 100644
--- a/docs/source-pytorch/levels/advanced_level_21.rst
+++ b/docs/source-pytorch/levels/advanced_level_21.rst
@@ -1,10 +1,10 @@
:orphan:
-#####################
-Level 20: Master TPUs
-#####################
+##################################################
+Level 20: Train models with billions of parameters
+##################################################
-Master cloud TPU training with profiling and scaling techniques.
+Scale to billions of parameters with multiple distributed strategies.
----
@@ -16,27 +16,19 @@ Master cloud TPU training with profiling and scaling techniques.
.. Add callout items below this line
.. displayitem::
- :header: Run on cloud TPUs
- :description: Scale massive models using cloud TPUs.
- :col_css: col-md-4
- :button_link: ../accelerators/tpu_intermediate.html
- :height: 180
+ :header: Scale with distributed strategies
+ :description: Learn about different distributed strategies to reach bigger model parameter sizes.
+ :col_css: col-md-6
+ :button_link: ../accelerators/gpu_intermediate.html
+ :height: 150
:tag: intermediate
.. displayitem::
- :header: Explore advanced TPU scaling techniques
- :description: Dive into XLA and advanced techniques to optimize TPU-powered models.
- :col_css: col-md-4
- :button_link: ../accelerators/tpu_advanced.html
- :height: 180
- :tag: advanced
-
-.. displayitem::
- :header: Profile TPU code
- :description: Learn to profile TPU code.
- :col_css: col-md-4
- :button_link: ../tuning/profiler_advanced.html
- :height: 180
+ :header: Train models with billions of parameters
+ :description: Scale to billions of params on GPUs with FSDP or Deepspeed.
+ :col_css: col-md-6
+ :button_link: ../advanced/model_parallel.html
+ :height: 150
:tag: advanced
.. raw:: html
diff --git a/docs/source-pytorch/levels/advanced_level_22.rst b/docs/source-pytorch/levels/advanced_level_22.rst
deleted file mode 100644
index 825f389e61..0000000000
--- a/docs/source-pytorch/levels/advanced_level_22.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-:orphan:
-
-##################################################
-Level 21: Train models with billions of parameters
-##################################################
-
-Scale to billions of parameters with multiple distributed strategies.
-
-----
-
-.. raw:: html
-
-
-
-
-.. Add callout items below this line
-
-.. displayitem::
- :header: Scale with distributed strategies
- :description: Learn about different distributed strategies to reach bigger model parameter sizes.
- :col_css: col-md-6
- :button_link: ../accelerators/gpu_intermediate.html
- :height: 150
- :tag: intermediate
-
-.. displayitem::
- :header: Train models with billions of parameters
- :description: Scale to billions of params on GPUs with FSDP or Deepspeed.
- :col_css: col-md-6
- :button_link: ../advanced/model_parallel.html
- :height: 150
- :tag: advanced
-
-.. raw:: html
-
-
-
diff --git a/docs/source-pytorch/levels/expert.rst b/docs/source-pytorch/levels/expert.rst
index c73414201e..bb0fbf25a8 100644
--- a/docs/source-pytorch/levels/expert.rst
+++ b/docs/source-pytorch/levels/expert.rst
@@ -14,23 +14,23 @@ Customize and extend Lightning for things like custom hardware or distributed st
.. Add callout items below this line
.. displayitem::
- :header: Level 22: Extend the Lightning CLI
+ :header: Level 21: Extend the Lightning CLI
:description: Extend the functionality of the Lightning CLI.
:col_css: col-md-6
+ :button_link: expert_level_22.html
+ :height: 150
+ :tag: expert
+
+.. displayitem::
+ :header: Level 22: Integrate a custom cluster
+ :description: Integrate a custom cluster into Lightning.
+ :col_css: col-md-6
:button_link: expert_level_23.html
:height: 150
:tag: expert
.. displayitem::
- :header: Level 23: Integrate a custom cluster
- :description: Integrate a custom cluster into Lightning.
- :col_css: col-md-6
- :button_link: expert_level_24.html
- :height: 150
- :tag: expert
-
-.. displayitem::
- :header: Level 24: Make your own profiler
+ :header: Level 23: Make your own profiler
:description: Make your own profiler.
:col_css: col-md-6
:button_link: ../tuning/profiler_expert.html
@@ -38,7 +38,7 @@ Customize and extend Lightning for things like custom hardware or distributed st
:tag: expert
.. displayitem::
- :header: Level 25: Add a new accelerator or Strategy
+ :header: Level 24: Add a new accelerator or Strategy
:description: Integrate a new accelerator or distributed strategy.
:col_css: col-md-6
:button_link: expert_level_27.html
diff --git a/docs/source-pytorch/levels/expert_level_22.rst b/docs/source-pytorch/levels/expert_level_22.rst
new file mode 100644
index 0000000000..af2f020272
--- /dev/null
+++ b/docs/source-pytorch/levels/expert_level_22.rst
@@ -0,0 +1,37 @@
+:orphan:
+
+##################################
+Level 21: Extend the Lightning CLI
+##################################
+
+Extend the functionality of the Lightning CLI.
+
+----
+
+.. raw:: html
+
+
+
+
+.. Add callout items below this line
+
+.. displayitem::
+ :header: Customize configs for complex projects
+ :description: Learn how to connect complex projects with each Registry.
+ :col_css: col-md-6
+ :button_link: ../cli/lightning_cli_advanced_3.html
+ :height: 150
+ :tag: expert
+
+.. displayitem::
+ :header: Extend the Lightning CLI
+ :description: Customize the Lightning CLI
+ :col_css: col-md-6
+ :button_link: ../cli/lightning_cli_expert.html
+ :height: 150
+ :tag: expert
+
+.. raw:: html
+
+
+
diff --git a/docs/source-pytorch/levels/expert_level_23.rst b/docs/source-pytorch/levels/expert_level_23.rst
index 5d1ba67e96..eff7b781e5 100644
--- a/docs/source-pytorch/levels/expert_level_23.rst
+++ b/docs/source-pytorch/levels/expert_level_23.rst
@@ -1,8 +1,8 @@
:orphan:
-##################################
-Level 22: Extend the Lightning CLI
-##################################
+####################################
+Level 22: Integrate a custom cluster
+####################################
Extend the functionality of the Lightning CLI.
@@ -16,18 +16,10 @@ Extend the functionality of the Lightning CLI.
.. Add callout items below this line
.. displayitem::
- :header: Customize configs for complex projects
- :description: Learn how to connect complex projects with each Registry.
+ :header: Integrate your own cluster
+ :description: Learn how to integrate your own cluster
:col_css: col-md-6
- :button_link: ../cli/lightning_cli_advanced_3.html
- :height: 150
- :tag: expert
-
-.. displayitem::
- :header: Extend the Lightning CLI
- :description: Customize the Lightning CLI
- :col_css: col-md-6
- :button_link: ../cli/lightning_cli_expert.html
+ :button_link: ../clouds/cluster_expert.html
:height: 150
:tag: expert
diff --git a/docs/source-pytorch/levels/expert_level_24.rst b/docs/source-pytorch/levels/expert_level_24.rst
deleted file mode 100644
index 54c544ee9d..0000000000
--- a/docs/source-pytorch/levels/expert_level_24.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-:orphan:
-
-####################################
-Level 23: Integrate a custom cluster
-####################################
-
-Extend the functionality of the Lightning CLI.
-
-----
-
-.. raw:: html
-
-
-
-
-.. Add callout items below this line
-
-.. displayitem::
- :header: Integrate your own cluster
- :description: Learn how to integrate your own cluster
- :col_css: col-md-6
- :button_link: ../clouds/cluster_expert.html
- :height: 150
- :tag: expert
-
-.. raw:: html
-
-
-
diff --git a/docs/source-pytorch/levels/expert_level_27.rst b/docs/source-pytorch/levels/expert_level_25.rst
similarity index 96%
rename from docs/source-pytorch/levels/expert_level_27.rst
rename to docs/source-pytorch/levels/expert_level_25.rst
index 9b06b10195..00244e53c0 100644
--- a/docs/source-pytorch/levels/expert_level_27.rst
+++ b/docs/source-pytorch/levels/expert_level_25.rst
@@ -1,7 +1,7 @@
:orphan:
###########################################
-Level 25: Add a new accelerator or Strategy
+Level 24: Add a new accelerator or Strategy
###########################################
Integrate a new accelerator or distributed strategy.
diff --git a/docs/source-pytorch/model/manual_optimization.rst b/docs/source-pytorch/model/manual_optimization.rst
index d897b59c98..150f04793e 100644
--- a/docs/source-pytorch/model/manual_optimization.rst
+++ b/docs/source-pytorch/model/manual_optimization.rst
@@ -345,4 +345,4 @@ Here is an example using a closure function.
opt.step(closure=closure)
.. warning::
- The :class:`~torch.optim.LBFGS` optimizer is not supported for AMP, IPUs, or DeepSpeed.
+ The :class:`~torch.optim.LBFGS` optimizer is not supported for AMP or DeepSpeed.
diff --git a/docs/source-pytorch/tuning/profiler_basic.rst b/docs/source-pytorch/tuning/profiler_basic.rst
index d248cc6490..cf1ae93e97 100644
--- a/docs/source-pytorch/tuning/profiler_basic.rst
+++ b/docs/source-pytorch/tuning/profiler_basic.rst
@@ -110,7 +110,7 @@ If the profiler report becomes too long, you can stream the report to a file:
*************************
Measure accelerator usage
*************************
-Another helpful technique to detect bottlenecks is to ensure that you're using the full capacity of your accelerator (GPU/TPU/IPU/HPU).
+Another helpful technique to detect bottlenecks is to ensure that you're using the full capacity of your accelerator (GPU/TPU/HPU).
This can be measured with the :class:`~lightning.pytorch.callbacks.device_stats_monitor.DeviceStatsMonitor`:
.. testcode::
diff --git a/examples/pytorch/ipu/mnist_sample.py b/examples/pytorch/ipu/mnist_sample.py
deleted file mode 100644
index b50d2516eb..0000000000
--- a/examples/pytorch/ipu/mnist_sample.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright The Lightning AI team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from lightning.pytorch import LightningModule, Trainer
-from lightning.pytorch.demos.mnist_datamodule import MNISTDataModule
-from torch.nn import functional as F
-
-
-class LitClassifier(LightningModule):
- def __init__(self, hidden_dim: int = 128, learning_rate: float = 0.0001):
- super().__init__()
- self.save_hyperparameters()
-
- self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim)
- self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10)
-
- self.val_outptus = []
- self.test_outputs = []
-
- def forward(self, x):
- x = x.view(x.size(0), -1)
- x = torch.relu(self.l1(x))
- return torch.relu(self.l2(x))
-
- def training_step(self, batch, batch_idx):
- x, y = batch
- y_hat = self(x)
- return F.cross_entropy(y_hat, y)
-
- def validation_step(self, batch, batch_idx):
- x, y = batch
- probs = self(x)
- acc = self.accuracy(probs, y)
- self.val_outputs.append(acc)
- return acc
-
- def test_step(self, batch, batch_idx):
- x, y = batch
- logits = self(x)
- acc = self.accuracy(logits, y)
- self.test_outputs.append(acc)
- return acc
-
- def accuracy(self, logits, y):
- # currently IPU poptorch doesn't implicit convert bools to tensor
- # hence we use an explicit calculation for accuracy here. Once fixed in poptorch
- # we can use the accuracy metric.
- return torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
-
- def on_validation_epoch_end(self) -> None:
- # since the training step/validation step and test step are run on the IPU device
- # we must log the average loss outside the step functions.
- self.log("val_acc", torch.stack(self.val_outptus).mean(), prog_bar=True)
- self.val_outptus.clear()
-
- def on_test_epoch_end(self) -> None:
- self.log("test_acc", torch.stack(self.test_outputs).mean())
- self.test_outputs.clear()
-
- def configure_optimizers(self):
- return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
-
-
-if __name__ == "__main__":
- dm = MNISTDataModule(batch_size=32)
- model = LitClassifier()
- trainer = Trainer(max_epochs=2, accelerator="ipu", devices=8)
-
- trainer.fit(model, datamodule=dm)
- trainer.test(model, datamodule=dm)
diff --git a/requirements/_integrations/accelerators.txt b/requirements/_integrations/accelerators.txt
index d7f372764e..90c72bedb2 100644
--- a/requirements/_integrations/accelerators.txt
+++ b/requirements/_integrations/accelerators.txt
@@ -1,3 +1,2 @@
# validation accelerator connectors
lightning-habana >=1.2.0, <1.3.0
-lightning-graphcore >=0.1.0, <0.2.0
diff --git a/src/lightning/pytorch/_graveyard/__init__.py b/src/lightning/pytorch/_graveyard/__init__.py
index 3403bbd52a..d5cfb18148 100644
--- a/src/lightning/pytorch/_graveyard/__init__.py
+++ b/src/lightning/pytorch/_graveyard/__init__.py
@@ -13,6 +13,5 @@
# limitations under the License.
import lightning.pytorch._graveyard._torchmetrics
import lightning.pytorch._graveyard.hpu
-import lightning.pytorch._graveyard.ipu
import lightning.pytorch._graveyard.precision
import lightning.pytorch._graveyard.tpu # noqa: F401
diff --git a/src/lightning/pytorch/_graveyard/ipu.py b/src/lightning/pytorch/_graveyard/ipu.py
deleted file mode 100644
index 3c462410d2..0000000000
--- a/src/lightning/pytorch/_graveyard/ipu.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import sys
-from typing import Any
-
-import lightning.pytorch as pl
-
-
-def _patch_sys_modules() -> None:
- self = sys.modules[__name__]
- sys.modules["lightning.pytorch.accelerators.ipu"] = self
- sys.modules["lightning.pytorch.strategies.ipu"] = self
- sys.modules["lightning.pytorch.plugins.precision.ipu"] = self
-
-
-class IPUAccelerator:
- def __init__(self, *_: Any, **__: Any) -> None:
- raise NotImplementedError(
- "The `IPUAccelerator` class has been moved to an external package."
- " Install the extension package as `pip install lightning-graphcore`"
- " and import with `from lightning_graphcore import IPUAccelerator`."
- " Please see: https://github.com/Lightning-AI/lightning-Graphcore for more details."
- )
-
-
-class IPUStrategy:
- def __init__(self, *_: Any, **__: Any) -> None:
- raise NotImplementedError(
- "The `IPUStrategy` class has been moved to an external package."
- " Install the extension package as `pip install lightning-graphcore`"
- " and import with `from lightning_graphcore import IPUStrategy`."
- " Please see: https://github.com/Lightning-AI/lightning-Graphcore for more details."
- )
-
-
-class IPUPrecisionPlugin:
- def __init__(self, *_: Any, **__: Any) -> None:
- raise NotImplementedError(
- "The `IPUPrecisionPlugin` class has been moved to an external package."
- " Install the extension package as `pip install lightning-graphcore`"
- " and import with `from lightning_graphcore import IPUPrecisionPlugin`."
- " Please see: https://github.com/Lightning-AI/lightning-Graphcore for more details."
- )
-
-
-def _patch_classes() -> None:
- setattr(pl.accelerators, "IPUAccelerator", IPUAccelerator)
- setattr(pl.strategies, "IPUStrategy", IPUStrategy)
- setattr(pl.plugins, "IPUPrecisionPlugin", IPUPrecisionPlugin)
- setattr(pl.plugins.precision, "IPUPrecisionPlugin", IPUPrecisionPlugin)
-
-
-_patch_sys_modules()
-_patch_classes()
diff --git a/src/lightning/pytorch/core/hooks.py b/src/lightning/pytorch/core/hooks.py
index 2f510fe270..e1428a442d 100644
--- a/src/lightning/pytorch/core/hooks.py
+++ b/src/lightning/pytorch/core/hooks.py
@@ -601,10 +601,6 @@ class DataHooks:
batch = super().transfer_batch_to_device(batch, device, dataloader_idx)
return batch
- Raises:
- MisconfigurationException:
- If using IPUs, ``Trainer(accelerator='ipu')``.
-
See Also:
- :meth:`move_data_to_device`
- :meth:`apply_to_collection`
@@ -661,10 +657,6 @@ class DataHooks:
batch['x'] = gpu_transforms(batch['x'])
return batch
- Raises:
- MisconfigurationException:
- If using IPUs, ``Trainer(accelerator='ipu')``.
-
See Also:
- :meth:`on_before_batch_transfer`
- :meth:`transfer_batch_to_device`
diff --git a/src/lightning/pytorch/trainer/configuration_validator.py b/src/lightning/pytorch/trainer/configuration_validator.py
index 27cca4b426..23c04523cd 100644
--- a/src/lightning/pytorch/trainer/configuration_validator.py
+++ b/src/lightning/pytorch/trainer/configuration_validator.py
@@ -16,7 +16,6 @@ import lightning.pytorch as pl
from lightning.fabric.utilities.warnings import PossibleUserWarning
from lightning.pytorch.trainer.states import TrainerFn
from lightning.pytorch.utilities.exceptions import MisconfigurationException
-from lightning.pytorch.utilities.imports import _graphcore_available_and_importable
from lightning.pytorch.utilities.model_helpers import is_overridden
from lightning.pytorch.utilities.rank_zero import rank_zero_deprecation, rank_zero_warn
from lightning.pytorch.utilities.signature_utils import is_param_in_hook_signature
@@ -43,10 +42,7 @@ def _verify_loop_configurations(trainer: "pl.Trainer") -> None:
elif trainer.state.fn == TrainerFn.PREDICTING:
__verify_eval_loop_configuration(model, "predict")
- __verify_batch_transfer_support(trainer)
-
__verify_configure_model_configuration(model)
-
__warn_dataloader_iter_limitations(model)
@@ -120,22 +116,6 @@ def __verify_eval_loop_configuration(model: "pl.LightningModule", stage: str) ->
)
-def __verify_batch_transfer_support(trainer: "pl.Trainer") -> None:
- batch_transfer_hooks = ("transfer_batch_to_device", "on_after_batch_transfer")
- datahook_selector = trainer._data_connector._datahook_selector
- assert datahook_selector is not None
- for hook in batch_transfer_hooks:
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- # TODO: This code could be done in a hook in the IPUAccelerator as it's a simple error check
- # through the Trainer. It doesn't need to stay in Lightning
- if isinstance(trainer.accelerator, IPUAccelerator) and (
- is_overridden(hook, datahook_selector.model) or is_overridden(hook, datahook_selector.datamodule)
- ):
- raise MisconfigurationException(f"Overriding `{hook}` is not supported with IPUs.")
-
-
def __verify_manual_optimization_support(trainer: "pl.Trainer", model: "pl.LightningModule") -> None:
if model.automatic_optimization:
return
diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
index 8940b82749..fd2a5d413b 100644
--- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
+++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
@@ -64,7 +64,6 @@ from lightning.pytorch.strategies.ddp import _DDP_FORK_ALIASES
from lightning.pytorch.utilities.exceptions import MisconfigurationException
from lightning.pytorch.utilities.imports import (
_LIGHTNING_COLOSSALAI_AVAILABLE,
- _graphcore_available_and_importable,
_habana_available_and_importable,
)
from lightning.pytorch.utilities.rank_zero import rank_zero_info, rank_zero_warn
@@ -338,11 +337,6 @@ class _AcceleratorConnector:
"""Choose the accelerator type (str) based on availability."""
if XLAAccelerator.is_available():
return "tpu"
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- if IPUAccelerator.is_available():
- return "ipu"
if _habana_available_and_importable():
from lightning_habana import HPUAccelerator
@@ -420,16 +414,6 @@ class _AcceleratorConnector:
return LightningEnvironment()
def _choose_strategy(self) -> Union[Strategy, str]:
- if self._accelerator_flag == "ipu":
- if not _graphcore_available_and_importable():
- raise ImportError(
- "You have passed `accelerator='ipu'` but the IPU integration is not installed."
- " Please run `pip install lightning-graphcore` or check out"
- " https://github.com/Lightning-AI/lightning-Graphcore for instructions"
- )
- from lightning_graphcore import IPUStrategy
-
- return IPUStrategy.strategy_name
if self._accelerator_flag == "hpu":
if not _habana_available_and_importable():
raise ImportError(
@@ -500,16 +484,6 @@ class _AcceleratorConnector:
if isinstance(self._precision_plugin_flag, Precision):
return self._precision_plugin_flag
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator, IPUPrecision
-
- # TODO: For the strategies that have a fixed precision class, we don't really need this logic
- # in the accelerator. Since the strategy owns the precision plugin, the strategy.precision_plugin
- # could be a no-op and then we wouldn't need this.
-
- if isinstance(self.accelerator, IPUAccelerator):
- return IPUPrecision(self._precision_flag)
-
if _habana_available_and_importable():
from lightning_habana import HPUAccelerator, HPUPrecisionPlugin
@@ -691,12 +665,3 @@ def _register_external_accelerators_and_strategies() -> None:
HPUParallelStrategy.register_strategies(StrategyRegistry)
if "hpu_single" not in StrategyRegistry:
SingleHPUStrategy.register_strategies(StrategyRegistry)
-
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator, IPUStrategy
-
- # TODO: Prevent registering multiple times
- if "ipu" not in AcceleratorRegistry:
- IPUAccelerator.register_accelerators(AcceleratorRegistry)
- if "ipu_strategy" not in StrategyRegistry:
- IPUStrategy.register_strategies(StrategyRegistry)
diff --git a/src/lightning/pytorch/trainer/connectors/data_connector.py b/src/lightning/pytorch/trainer/connectors/data_connector.py
index 1bc63c62c5..eb1beccec8 100644
--- a/src/lightning/pytorch/trainer/connectors/data_connector.py
+++ b/src/lightning/pytorch/trainer/connectors/data_connector.py
@@ -34,7 +34,6 @@ from lightning.pytorch.trainer.states import RunningStage, TrainerFn
from lightning.pytorch.utilities.combined_loader import CombinedLoader
from lightning.pytorch.utilities.data import _is_dataloader_shuffled, _update_dataloader
from lightning.pytorch.utilities.exceptions import MisconfigurationException
-from lightning.pytorch.utilities.imports import _graphcore_available_and_importable
from lightning.pytorch.utilities.model_helpers import is_overridden
from lightning.pytorch.utilities.rank_zero import WarningCache, rank_zero_warn
from lightning.pytorch.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
@@ -165,19 +164,11 @@ class _DataConnector:
datamodule.trainer = trainer
def _requires_distributed_sampler(self, dataloader: DataLoader) -> bool:
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- # `DistributedSampler` is never used with `poptorch.DataLoader`
- is_ipu = isinstance(self.trainer.accelerator, IPUAccelerator)
- else:
- is_ipu = False
return (
self.trainer._accelerator_connector.use_distributed_sampler
and self.trainer._accelerator_connector.is_distributed
and not isinstance(dataloader.sampler, DistributedSampler)
and not has_iterable_dataset(dataloader)
- and not is_ipu
)
def _prepare_dataloader(self, dataloader: object, shuffle: bool, mode: RunningStage) -> object:
@@ -190,18 +181,9 @@ class _DataConnector:
# don't do anything if it's not a dataloader
if not isinstance(dataloader, DataLoader):
return dataloader
-
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- # IPUs use a custom `poptorch.DataLoader` which we might need to convert to
- is_ipu = isinstance(self.trainer.accelerator, IPUAccelerator)
- else:
- is_ipu = False
if (
self._requires_distributed_sampler(dataloader) # sets the distributed sampler
or mode == RunningStage.PREDICTING # to track indices for the predictions
- or is_ipu
):
sampler = self._resolve_sampler(dataloader, shuffle=shuffle, mode=mode)
return _update_dataloader(dataloader, sampler, mode=mode)
diff --git a/src/lightning/pytorch/trainer/setup.py b/src/lightning/pytorch/trainer/setup.py
index 2dd5af675a..00b546b252 100644
--- a/src/lightning/pytorch/trainer/setup.py
+++ b/src/lightning/pytorch/trainer/setup.py
@@ -28,7 +28,7 @@ from lightning.pytorch.profilers import (
XLAProfiler,
)
from lightning.pytorch.utilities.exceptions import MisconfigurationException
-from lightning.pytorch.utilities.imports import _graphcore_available_and_importable, _habana_available_and_importable
+from lightning.pytorch.utilities.imports import _habana_available_and_importable
from lightning.pytorch.utilities.rank_zero import rank_zero_info, rank_zero_warn
@@ -158,16 +158,6 @@ def _log_device_info(trainer: "pl.Trainer") -> None:
num_tpu_cores = trainer.num_devices if isinstance(trainer.accelerator, XLAAccelerator) else 0
rank_zero_info(f"TPU available: {XLAAccelerator.is_available()}, using: {num_tpu_cores} TPU cores")
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- num_ipus = trainer.num_devices if isinstance(trainer.accelerator, IPUAccelerator) else 0
- ipu_available = IPUAccelerator.is_available()
- else:
- num_ipus = 0
- ipu_available = False
- rank_zero_info(f"IPU available: {ipu_available}, using: {num_ipus} IPUs")
-
if _habana_available_and_importable():
from lightning_habana import HPUAccelerator
@@ -192,12 +182,6 @@ def _log_device_info(trainer: "pl.Trainer") -> None:
if XLAAccelerator.is_available() and not isinstance(trainer.accelerator, XLAAccelerator):
rank_zero_warn("TPU available but not used. You can set it by doing `Trainer(accelerator='tpu')`.")
- if _graphcore_available_and_importable():
- from lightning_graphcore import IPUAccelerator
-
- if IPUAccelerator.is_available() and not isinstance(trainer.accelerator, IPUAccelerator):
- rank_zero_warn("IPU available but not used. You can set it by doing `Trainer(accelerator='ipu')`.")
-
if _habana_available_and_importable():
from lightning_habana import HPUAccelerator
diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
index d0988d9d38..74b409a18b 100644
--- a/src/lightning/pytorch/trainer/trainer.py
+++ b/src/lightning/pytorch/trainer/trainer.py
@@ -136,7 +136,7 @@ class Trainer:
r"""Customize every aspect of training via flags.
Args:
- accelerator: Supports passing different accelerator types ("cpu", "gpu", "tpu", "ipu", "hpu", "mps", "auto")
+ accelerator: Supports passing different accelerator types ("cpu", "gpu", "tpu", "hpu", "mps", "auto")
as well as custom accelerator instances.
strategy: Supports different training strategies with aliases as well custom strategies.
@@ -151,7 +151,7 @@ class Trainer:
precision: Double precision (64, '64' or '64-true'), full precision (32, '32' or '32-true'),
16bit mixed precision (16, '16', '16-mixed') or bfloat16 mixed precision ('bf16', 'bf16-mixed').
- Can be used on CPU, GPU, TPUs, HPUs or IPUs.
+ Can be used on CPU, GPU, TPUs, or HPUs.
Default: ``'32-true'``.
logger: Logger (or iterable collection of loggers) for experiment tracking. A ``True`` value uses
diff --git a/src/lightning/pytorch/utilities/imports.py b/src/lightning/pytorch/utilities/imports.py
index 2723eb2ccc..eabc1c1469 100644
--- a/src/lightning/pytorch/utilities/imports.py
+++ b/src/lightning/pytorch/utilities/imports.py
@@ -41,15 +41,6 @@ def _try_import_module(module_name: str) -> bool:
return False
-_LIGHTNING_GRAPHCORE_AVAILABLE = RequirementCache("lightning-graphcore>=0.1.0")
-
-
-def _graphcore_available_and_importable() -> bool:
- # This is defined as a function instead of a constant to avoid circular imports, because `lightning_graphcore`
- # also imports Lightning
- return bool(_LIGHTNING_GRAPHCORE_AVAILABLE) and _try_import_module("lightning_graphcore")
-
-
_LIGHTNING_HABANA_AVAILABLE = RequirementCache("lightning-habana>=1.2.0")
diff --git a/src/pytorch_lightning/README.md b/src/pytorch_lightning/README.md
index 067038614a..f1e1d5dfc7 100644
--- a/src/pytorch_lightning/README.md
+++ b/src/pytorch_lightning/README.md
@@ -62,7 +62,7 @@ Lightning forces the following structure to your code which makes it reusable an
- Non-essential research code (logging, etc... this goes in Callbacks).
- Data (use PyTorch DataLoaders or organize them into a LightningDataModule).
-Once you do this, you can train on multiple-GPUs, TPUs, CPUs, IPUs, HPUs and even in 16-bit precision without changing your code!
+Once you do this, you can train on multiple-GPUs, TPUs, CPUs, HPUs and even in 16-bit precision without changing your code!
[Get started in just 15 minutes](https://lightning.ai/docs/pytorch/latest/starter/introduction.html)
diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py
index d5e20338c1..64eb5c1f1f 100644
--- a/tests/tests_fabric/conftest.py
+++ b/tests/tests_fabric/conftest.py
@@ -63,7 +63,6 @@ def restore_env_variables():
"PL_GLOBAL_SEED",
"PL_SEED_WORKERS",
"RANK", # set by DeepSpeed
- "POPLAR_ENGINE_OPTIONS", # set by IPUStrategy
"CUDA_MODULE_LOADING", # leaked by PyTorch
"CRC32C_SW_MODE", # set by tensorboardX
"OMP_NUM_THREADS", # set by our launchers
diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py
index 96554ff24b..f42f53a56a 100644
--- a/tests/tests_pytorch/conftest.py
+++ b/tests/tests_pytorch/conftest.py
@@ -83,7 +83,6 @@ def restore_env_variables():
"WANDB_REQUIRE_SERVICE",
"WANDB_SERVICE",
"RANK", # set by DeepSpeed
- "POPLAR_ENGINE_OPTIONS", # set by IPUStrategy
"CUDA_MODULE_LOADING", # leaked by PyTorch
"KMP_INIT_AT_FORK", # leaked by PyTorch
"KMP_DUPLICATE_LIB_OK", # leaked by PyTorch
diff --git a/tests/tests_pytorch/graveyard/test_ipu.py b/tests/tests_pytorch/graveyard/test_ipu.py
deleted file mode 100644
index 520729f80c..0000000000
--- a/tests/tests_pytorch/graveyard/test_ipu.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from importlib import import_module
-
-import pytest
-
-
-@pytest.mark.parametrize(
- ("import_path", "name"),
- [
- ("lightning.pytorch.accelerators", "IPUAccelerator"),
- ("lightning.pytorch.accelerators.ipu", "IPUAccelerator"),
- ("lightning.pytorch.strategies", "IPUStrategy"),
- ("lightning.pytorch.strategies.ipu", "IPUStrategy"),
- ("lightning.pytorch.plugins.precision", "IPUPrecisionPlugin"),
- ("lightning.pytorch.plugins.precision.ipu", "IPUPrecisionPlugin"),
- ],
-)
-def test_extracted_ipu(import_path, name):
- module = import_module(import_path)
- cls = getattr(module, name)
- with pytest.raises(NotImplementedError, match=f"{name}` class has been moved to an external package.*"):
- cls()
diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
index a9c5306815..b1cafbf9dc 100644
--- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py
@@ -58,7 +58,6 @@ from lightning.pytorch.trainer.connectors.accelerator_connector import _Accelera
from lightning.pytorch.utilities.exceptions import MisconfigurationException
from lightning.pytorch.utilities.imports import (
_LIGHTNING_HABANA_AVAILABLE,
- _graphcore_available_and_importable,
)
from lightning_utilities.core.imports import package_available
@@ -580,16 +579,6 @@ def test_unsupported_tpu_choice(xla_available, tpu_available):
Trainer(accelerator="tpu", precision="16-true", strategy="ddp")
-def mock_ipu_available(monkeypatch, value=True):
- # TODO: this isn't really mocking. it should be implemented and used as `mock_hpu_count`
- try:
- import lightning_graphcore
- except ModuleNotFoundError:
- return
- monkeypatch.setattr(lightning_graphcore.accelerator, "_IPU_AVAILABLE", value)
- monkeypatch.setattr(lightning_graphcore.strategy, "_IPU_AVAILABLE", value)
-
-
if _LIGHTNING_HABANA_AVAILABLE:
from lightning_habana import HPUAccelerator, HPUParallelStrategy, SingleHPUStrategy
else:
@@ -657,7 +646,6 @@ def mock_hpu_count(monkeypatch, n=1):
def test_devices_auto_choice_cpu(monkeypatch, cuda_count_0):
mock_hpu_count(monkeypatch, 0)
- mock_ipu_available(monkeypatch, False)
mock_xla_available(monkeypatch, False)
trainer = Trainer(accelerator="auto", devices="auto")
assert trainer.num_devices == 1
@@ -915,7 +903,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 0)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
trainer = Trainer()
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.strategy, SingleDeviceStrategy)
@@ -927,7 +914,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 1)
mock_mps_count(monkeypatch, 0)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
trainer = Trainer()
assert isinstance(trainer.accelerator, CUDAAccelerator)
assert isinstance(trainer.strategy, SingleDeviceStrategy)
@@ -939,7 +925,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 4)
mock_mps_count(monkeypatch, 0)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
trainer = Trainer()
assert isinstance(trainer.accelerator, CUDAAccelerator)
assert isinstance(trainer.strategy, (SingleDeviceStrategy if is_interactive else DDPStrategy))
@@ -955,7 +940,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 1)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
connector = _AcceleratorConnector()
assert isinstance(connector.accelerator, MPSAccelerator)
assert isinstance(connector.strategy, SingleDeviceStrategy)
@@ -965,7 +949,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
with monkeypatch.context():
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 0)
- mock_ipu_available(monkeypatch, False)
_mock_tpu_available(True)
monkeypatch.setattr(lightning.pytorch.accelerators.XLAAccelerator, "auto_device_count", lambda *_: 1)
monkeypatch.setattr(torch, "device", DeviceMock())
@@ -982,7 +965,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 0)
_mock_tpu_available(True)
- mock_ipu_available(monkeypatch, False)
connector = _AcceleratorConnector()
assert isinstance(connector.accelerator, XLAAccelerator)
assert isinstance(connector.strategy, XLAStrategy)
@@ -991,28 +973,11 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
assert connector.strategy._start_method == "fork"
assert connector.strategy.launcher.is_interactive_compatible
- # Single/Multi IPU: strategy is the same
- if _graphcore_available_and_importable():
- with monkeypatch.context():
- mock_cuda_count(monkeypatch, 0)
- mock_mps_count(monkeypatch, 0)
- mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, True)
- from lightning_graphcore import IPUAccelerator, IPUStrategy
-
- connector = _AcceleratorConnector()
- assert isinstance(connector.accelerator, IPUAccelerator)
- assert isinstance(connector.strategy, IPUStrategy)
- assert connector._devices_flag == 4
- assert isinstance(connector.strategy.cluster_environment, LightningEnvironment)
- assert connector.strategy.launcher is None
-
# Single HPU
with monkeypatch.context():
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 0)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
mock_hpu_count(monkeypatch, 1)
connector = _AcceleratorConnector()
assert isinstance(connector.accelerator, HPUAccelerator)
@@ -1029,7 +994,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 0)
mock_mps_count(monkeypatch, 0)
mock_tpu_available(monkeypatch, False)
- mock_ipu_available(monkeypatch, False)
mock_hpu_count(monkeypatch, 8)
connector = _AcceleratorConnector()
assert isinstance(connector.accelerator, HPUAccelerator)
@@ -1047,7 +1011,6 @@ def test_connector_auto_selection(monkeypatch, is_interactive):
mock_cuda_count(monkeypatch, 2)
mock_mps_count(monkeypatch, 0)
_mock_tpu_available(True)
- mock_ipu_available(monkeypatch, False)
connector = _AcceleratorConnector()
assert isinstance(connector.accelerator, XLAAccelerator)
assert isinstance(connector.strategy, XLAStrategy)