diff --git a/.codecov.yml b/.codecov.yml index 36ffcaf1bb..0102d5269a 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -9,7 +9,7 @@ codecov: strict_yaml_branch: "yaml-config" require_ci_to_pass: yes notify: - after_n_builds: 19 + # after_n_builds: 2 wait_for_ci: yes # https://docs.codecov.io/docs/codecov-yaml#section-expired-reports max_report_age: off @@ -50,4 +50,4 @@ comment: layout: header, diff require_changes: false behavior: default # update if exists else create new - after_n_builds: 19 + # after_n_builds: 2 diff --git a/.drone.yml b/.drone.yml index 8a022ca2eb..a5338de29f 100644 --- a/.drone.yml +++ b/.drone.yml @@ -44,9 +44,7 @@ steps: #- cd docs; make doctest; make coverage - coverage report # see: https://docs.codecov.io/docs/merging-reports - - codecov --token $CODECOV_TOKEN --flags=gpu,pytest --name="GPU-coverage" --env=linux --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --commit $DRONE_COMMIT - # --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG --pr $DRONE_PULL_REQUEST - # - codecov --token $CODECOV_TOKEN --flags=gpu,pytest --build $DRONE_BUILD_NUMBER + - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG - python tests/collect_env_details.py trigger: diff --git a/.github/workflows/ci-test-base.yml b/.github/workflows/ci-test-base.yml index ba4411fe70..c5fa458ec1 100644 --- a/.github/workflows/ci-test-base.yml +++ b/.github/workflows/ci-test-base.yml @@ -75,7 +75,8 @@ jobs: - name: Test Package [only] run: | # NOTE: run coverage on tests does not propagare faler status for Win, https://github.com/nedbat/coveragepy/issues/1003 - coverage run --source pytorch_lightning -m pytest pytorch_lightning -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py + python -m pytest pytorch_lightning -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py + # coverage report - name: Upload pytest test results uses: actions/upload-artifact@master @@ -83,21 +84,4 @@ jobs: name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml # Use always() to always run this step to publish test results when there are test failures - if: always() - - - name: Statistics - if: success() - run: | - coverage report - coverage xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - if: always() - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage.xml - flags: cpu,pytest - env_vars: ${{ runner.os }},python${{ matrix.python-version }} - name: Base-coverage - fail_ci_if_error: true + if: always() \ No newline at end of file diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index a19e666498..eaa40132b7 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -130,16 +130,4 @@ jobs: - name: Statistics if: success() run: | - coverage report - coverage xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - if: always() - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage.xml - flags: cpu,pytest - env_vars: ${{ runner.os }},python${{ matrix.python-version }} - name: CPU-coverage - fail_ci_if_error: true + coverage report diff --git a/.github/workflows/tpu-testing.yml b/.github/workflows/tpu-testing.yml index 686346d98f..610b6687c2 100644 --- a/.github/workflows/tpu-testing.yml +++ b/.github/workflows/tpu-testing.yml @@ -4,7 +4,6 @@ on: push: branches: - master - # TODO: temporal disable TPU testing until we find way how to pass credentials to forked PRs pull_request: branches: - master @@ -18,25 +17,20 @@ env: jobs: setup-build-publish-deploy: name: tpu-testing-job - runs-on: ubuntu-20.04 - # Timeout: https://stackoverflow.com/a/59076067/4521646 - timeout-minutes: 50 + runs-on: ubuntu-latest steps: - name: Install Go uses: actions/setup-go@v2 with: go-version: 1.14.x - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - name: Checkout Pytorch Lightning uses: actions/checkout@v2 with: repository: PyTorchLightning/pytorch-lightning ref: ${{ github.event.pull_request.head.sha }} + path: main - name: Checkout ml-testing-accelerators uses: actions/checkout@v2 @@ -60,7 +54,7 @@ jobs: shell: bash - name: Build and Push Docker Image run: | - cd docker/tpu + cd main/docker/tpu docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" . docker push "$IMAGE:$GITHUB_RUN_ID" shell: bash @@ -77,7 +71,7 @@ jobs: - name: Deploy the job on the kubernetes cluster run: |- - job_name=$(jsonnet -J ml-testing-accelerators/ docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \ + job_name=$(jsonnet -J ml-testing-accelerators/ main/docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \ job_name=${job_name#job.batch/} && \ job_name=${job_name% created} && \ echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \ @@ -105,28 +99,15 @@ jobs: exit $status_code shell: bash - - name: Statistics - if: success() - run: | - mv ./xx01 coverage - # TODO: add human readable report - cat coverage - # sudo pip install pycobertura - # pycobertura show coverage.xml + # todo: to be used after enable merging reports from different CIs + #- name: Upload coverage to Codecov + # uses: codecov/codecov-action@v1 + # if: always() + # with: + # token: ${{ secrets.CODECOV_TOKEN }} + # file: ./xx01 + # flags: tpu,pytest + # # env_vars: OS,PYTHON + # # name: codecov-umbrella + # fail_ci_if_error: true - - name: Upload coverage results - uses: actions/upload-artifact@master - with: - name: coverage-TPU - path: coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - if: always() - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage - flags: tpu,pytest - env_vars: ${{ runner.os }},python - name: TPU-coverage - fail_ci_if_error: true diff --git a/README.md b/README.md index b9d6a0d4c0..03b3624012 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ | :---: | :---: | :---: | :---: | | Conda py3.7 [linux] | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | | Linux py3.7 [GPU] | - | - | [![Build Status](http://35.192.60.23/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://35.192.60.23/PyTorchLightning/pytorch-lightning) | -| Linux py3.7 [TPU] | - | - | ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg) | +| Linux py3.6 [TPU] | - | - | ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg) | | Linux py3.6 / py3.7 / py3.8 | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | | OSX py3.6 / py3.7 | - | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | | Windows py3.6 / py3.7 / py3.8 | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) |[![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | diff --git a/docker/Dockerfile b/docker/Dockerfile index 1cd5d821a7..a4df9088da 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /home/containeruser # install conda and python -RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ +RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b -p /home/containeruser/conda && \ rm ~/miniconda.sh && \ @@ -31,8 +31,8 @@ RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12 ENV PATH /home/containeruser/conda/bin:$PATH # install dependencies -RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \ - git clone https://github.com/PyTorchLightning/pytorch-lightning.git --single-branch --branch $LIGHTNING_VERSION && \ +RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html +RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git --single-branch --branch $LIGHTNING_VERSION && \ pip install ./pytorch-lightning && \ pip install -r pytorch-lightning/requirements/extra.txt && \ rm -rf pytorch-lightning diff --git a/docker/README.md b/docker/README.md index 72d924cef2..a3dfc37bbe 100644 --- a/docker/README.md +++ b/docker/README.md @@ -12,10 +12,10 @@ or with specific arguments ```bash git clone docker image build \ - -t pytorch-lightning:py38 \ + -t pytorch-lightning:py36 \ -f docker/Dockerfile \ - --build-arg PYTHON_VERSION=3.8 \ - --build-arg PYTORCH_VERSION=1.4 \ + --build-arg PYTHON_VERSION=3.6 \ + --build-arg PYTORCH_VERSION=1.3 \ . ``` diff --git a/docker/tpu/Dockerfile b/docker/tpu/Dockerfile index fcc870f4d3..33ef5f03f1 100644 --- a/docker/tpu/Dockerfile +++ b/docker/tpu/Dockerfile @@ -4,71 +4,66 @@ FROM google/cloud-sdk:slim ARG GITHUB_REF=refs/heads/master ARG TEST_IMAGE=0 -# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below. -ENV PYTHON_VERSION=3.7 - -SHELL ["/bin/bash", "-c"] +# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 +# wheels available; see below. +ENV PYTHON_VERSION=3.6 RUN apt-get update && apt-get install -y --no-install-recommends \ - python${PYTHON_VERSION} \ - python${PYTHON_VERSION}-dev \ - $( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \ - build-essential \ - cmake \ - git \ - curl \ - wget \ - ca-certificates \ - libomp5 \ - && \ + build-essential \ + cmake \ + git \ + curl \ + ca-certificates -# install python dependencies - wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ - python${PYTHON_VERSION} get-pip.py && \ - rm get-pip.py && \ +# Install conda and python. +# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 +RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b && \ + rm ~/miniconda.sh -# Set the default python and install PIP packages - update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 +ENV PATH=/root/miniconda3/bin:$PATH -RUN py_version=${PYTHON_VERSION/./} && \ +RUN conda create -y --name container python=$PYTHON_VERSION + +# Run the rest of commands within the new conda env. +# Use absolute path to appease Codefactor. +SHELL ["/root/miniconda3/bin/conda", "run", "-n", "container", "/bin/bash", "-c"] +RUN conda install -y python=$PYTHON_VERSION mkl + +RUN pip uninstall -y torch && \ # Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m - gsutil cp "gs://tpu-pytorch/wheels/torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - gsutil cp "gs://tpu-pytorch/wheels/torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - gsutil cp "gs://tpu-pytorch/wheels/torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - pip install "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install mkl + gsutil cp 'gs://tpu-pytorch/wheels/torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ + gsutil cp 'gs://tpu-pytorch/wheels/torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ + gsutil cp 'gs://tpu-pytorch/wheels/torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ + pip install 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + pip install 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + pip install 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + rm 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + rm 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + rm 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ + apt-get install -y libomp5 -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" - -RUN python -c "import torch; print(torch.__version__)" +ENV LD_LIBRARY_PATH=root/miniconda3/envs/container/lib # Install pytorch-lightning at the current PR, plus dependencies. RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git && \ cd pytorch-lightning && \ git fetch origin $GITHUB_REF:CI && \ git checkout CI && \ - pip install --requirement ./requirements/base.txt + cd .. && \ + pip install ./pytorch-lightning -# If using this image for tests, intall more dependencies and don"t delete +# If using this image for tests, intall more dependencies and don't delete # the source code where the tests live. -RUN \ - # TODO: use conda sources if possible - # drop Horovod - # python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [ln for ln in open(fname).readlines() if not ln.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ - # pip install -r pytorch-lightning/requirements/extra.txt ; && \ - if [ $TEST_IMAGE -eq 1 ] ; then \ +RUN if [ $TEST_IMAGE -eq 1 ] ; then \ pip install -r pytorch-lightning/requirements/test.txt ; \ else \ rm -rf pytorch-lightning ; \ fi -#RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" +RUN conda init bash +RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" COPY docker-entrypoint.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/docker-entrypoint.sh diff --git a/docker/tpu/docker-entrypoint.sh b/docker/tpu/docker-entrypoint.sh index 57abc703c8..103242b942 100644 --- a/docker/tpu/docker-entrypoint.sh +++ b/docker/tpu/docker-entrypoint.sh @@ -1,7 +1,7 @@ #!/bin/bash -# source ~/.bashrc +source ~/.bashrc echo "running docker-entrypoint.sh" -# conda activate container +conda activate container echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS echo "printed TPU info" export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" diff --git a/docker/tpu/tpu_test_cases.jsonnet b/docker/tpu/tpu_test_cases.jsonnet index 9294727f42..1f5cad0822 100644 --- a/docker/tpu/tpu_test_cases.jsonnet +++ b/docker/tpu/tpu_test_cases.jsonnet @@ -20,8 +20,7 @@ local tputests = base.BaseTest { command: utils.scriptCommand( ||| - cd pytorch-lightning - coverage run --source=pytorch_lightning -m pytest tests/models/test_tpu.py -v + coverage run --source=pytorch_lightning -m pytest pytorch-lightning/tests/models/test_tpu.py -v test_exit_code=$? echo "\n||| END PYTEST LOGS |||\n" coverage xml