2023-03-17 08:42:58 +00:00
|
|
|
.. toctree::
|
|
|
|
:maxdepth: 1
|
|
|
|
:hidden:
|
|
|
|
|
|
|
|
<../advanced/gradient_accumulation>
|
|
|
|
<../advanced/distributed_communication>
|
|
|
|
<../advanced/multiple_setup>
|
2024-01-31 19:57:07 +00:00
|
|
|
<../advanced/compile>
|
2023-07-19 16:39:07 +00:00
|
|
|
<../advanced/model_parallel/fsdp>
|
2024-01-16 13:44:10 +00:00
|
|
|
<../guide/checkpoint/distributed_checkpoint>
|
2023-03-17 08:42:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
###############
|
|
|
|
Advanced skills
|
|
|
|
###############
|
|
|
|
|
|
|
|
.. raw:: html
|
|
|
|
|
|
|
|
<div class="display-card-container">
|
|
|
|
<div class="row">
|
|
|
|
|
|
|
|
.. displayitem::
|
2023-03-20 22:11:26 +00:00
|
|
|
:header: Use efficient gradient accumulation
|
2023-03-17 08:42:58 +00:00
|
|
|
:description: Learn how to perform efficient gradient accumulation in distributed settings
|
|
|
|
:button_link: ../advanced/gradient_accumulation.html
|
|
|
|
:col_css: col-md-4
|
2023-07-19 16:39:07 +00:00
|
|
|
:height: 170
|
2023-03-17 08:42:58 +00:00
|
|
|
:tag: advanced
|
|
|
|
|
|
|
|
.. displayitem::
|
2023-03-20 22:11:26 +00:00
|
|
|
:header: Distribute communication
|
2023-03-17 08:42:58 +00:00
|
|
|
:description: Learn all about communication primitives for distributed operation. Gather, reduce, broadcast, etc.
|
|
|
|
:button_link: ../advanced/distributed_communication.html
|
|
|
|
:col_css: col-md-4
|
2023-07-19 16:39:07 +00:00
|
|
|
:height: 170
|
2023-03-17 08:42:58 +00:00
|
|
|
:tag: advanced
|
|
|
|
|
|
|
|
.. displayitem::
|
2023-03-20 22:11:26 +00:00
|
|
|
:header: Use multiple models and optimizers
|
2023-03-17 08:42:58 +00:00
|
|
|
:description: See how flexible Fabric is to work with multiple models and optimizers!
|
|
|
|
:button_link: ../advanced/multiple_setup.html
|
|
|
|
:col_css: col-md-4
|
2023-07-19 16:39:07 +00:00
|
|
|
:height: 170
|
|
|
|
:tag: advanced
|
|
|
|
|
2024-01-31 19:57:07 +00:00
|
|
|
.. displayitem::
|
|
|
|
:header: Speed up models by compiling them
|
|
|
|
:description: Use torch.compile to speed up models on modern hardware
|
|
|
|
:button_link: ../advanced/compile.html
|
|
|
|
:col_css: col-md-4
|
|
|
|
:height: 170
|
|
|
|
:tag: advanced
|
|
|
|
|
2023-07-19 16:39:07 +00:00
|
|
|
.. displayitem::
|
|
|
|
:header: Train models with billions of parameters
|
2024-05-22 10:20:40 +00:00
|
|
|
:description: Train the largest models with FSDP/TP across multiple GPUs and machines
|
|
|
|
:button_link: ../advanced/model_parallel/index.html
|
2023-07-19 16:39:07 +00:00
|
|
|
:col_css: col-md-4
|
|
|
|
:height: 170
|
2023-03-17 08:42:58 +00:00
|
|
|
:tag: advanced
|
|
|
|
|
2024-01-16 13:44:10 +00:00
|
|
|
.. displayitem::
|
|
|
|
:header: Save and load very large models
|
|
|
|
:description: Save and load very large models efficiently with distributed checkpoints
|
|
|
|
:button_link: ../guide/checkpoint/distributed_checkpoint.html
|
|
|
|
:col_css: col-md-4
|
|
|
|
:height: 170
|
|
|
|
:tag: advanced
|
|
|
|
|
2023-03-17 08:42:58 +00:00
|
|
|
.. raw:: html
|
|
|
|
|
|
|
|
</div>
|
|
|
|
</div>
|