From 39a6435726e7a110df1189efe63094888e0c23be Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 4 Jul 2020 17:31:12 +0200 Subject: [PATCH] Revert "Revert "join coverage (#2460)" (#2499)" (#2500) This reverts commit 355918af8dcee6bb21cbdc497b7b33243fec8db3. --- .codecov.yml | 4 +- .drone.yml | 4 +- .github/workflows/ci-test-base.yml | 22 ++++++-- .github/workflows/ci-testing.yml | 14 ++++- .github/workflows/tpu-testing.yml | 49 +++++++++++------ README.md | 2 +- docker/Dockerfile | 6 +-- docker/README.md | 6 +-- docker/tpu/Dockerfile | 85 ++++++++++++++++-------------- docker/tpu/docker-entrypoint.sh | 4 +- docker/tpu/tpu_test_cases.jsonnet | 3 +- 11 files changed, 127 insertions(+), 72 deletions(-) diff --git a/.codecov.yml b/.codecov.yml index 0102d5269a..36ffcaf1bb 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -9,7 +9,7 @@ codecov: strict_yaml_branch: "yaml-config" require_ci_to_pass: yes notify: - # after_n_builds: 2 + after_n_builds: 19 wait_for_ci: yes # https://docs.codecov.io/docs/codecov-yaml#section-expired-reports max_report_age: off @@ -50,4 +50,4 @@ comment: layout: header, diff require_changes: false behavior: default # update if exists else create new - # after_n_builds: 2 + after_n_builds: 19 diff --git a/.drone.yml b/.drone.yml index a5338de29f..8a022ca2eb 100644 --- a/.drone.yml +++ b/.drone.yml @@ -44,7 +44,9 @@ steps: #- cd docs; make doctest; make coverage - coverage report # see: https://docs.codecov.io/docs/merging-reports - - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG + - codecov --token $CODECOV_TOKEN --flags=gpu,pytest --name="GPU-coverage" --env=linux --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --commit $DRONE_COMMIT + # --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG --pr $DRONE_PULL_REQUEST + # - codecov --token $CODECOV_TOKEN --flags=gpu,pytest --build $DRONE_BUILD_NUMBER - python tests/collect_env_details.py trigger: diff --git a/.github/workflows/ci-test-base.yml b/.github/workflows/ci-test-base.yml index c5fa458ec1..ba4411fe70 100644 --- a/.github/workflows/ci-test-base.yml +++ b/.github/workflows/ci-test-base.yml @@ -75,8 +75,7 @@ jobs: - name: Test Package [only] run: | # NOTE: run coverage on tests does not propagare faler status for Win, https://github.com/nedbat/coveragepy/issues/1003 - python -m pytest pytorch_lightning -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py - # coverage report + coverage run --source pytorch_lightning -m pytest pytorch_lightning -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py - name: Upload pytest test results uses: actions/upload-artifact@master @@ -84,4 +83,21 @@ jobs: name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }} path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml # Use always() to always run this step to publish test results when there are test failures - if: always() \ No newline at end of file + if: always() + + - name: Statistics + if: success() + run: | + coverage report + coverage xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + if: always() + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: coverage.xml + flags: cpu,pytest + env_vars: ${{ runner.os }},python${{ matrix.python-version }} + name: Base-coverage + fail_ci_if_error: true diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index eaa40132b7..a19e666498 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -130,4 +130,16 @@ jobs: - name: Statistics if: success() run: | - coverage report + coverage report + coverage xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + if: always() + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: coverage.xml + flags: cpu,pytest + env_vars: ${{ runner.os }},python${{ matrix.python-version }} + name: CPU-coverage + fail_ci_if_error: true diff --git a/.github/workflows/tpu-testing.yml b/.github/workflows/tpu-testing.yml index 610b6687c2..686346d98f 100644 --- a/.github/workflows/tpu-testing.yml +++ b/.github/workflows/tpu-testing.yml @@ -4,6 +4,7 @@ on: push: branches: - master + # TODO: temporal disable TPU testing until we find way how to pass credentials to forked PRs pull_request: branches: - master @@ -17,20 +18,25 @@ env: jobs: setup-build-publish-deploy: name: tpu-testing-job - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 + # Timeout: https://stackoverflow.com/a/59076067/4521646 + timeout-minutes: 50 steps: - name: Install Go uses: actions/setup-go@v2 with: go-version: 1.14.x + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 - name: Checkout Pytorch Lightning uses: actions/checkout@v2 with: repository: PyTorchLightning/pytorch-lightning ref: ${{ github.event.pull_request.head.sha }} - path: main - name: Checkout ml-testing-accelerators uses: actions/checkout@v2 @@ -54,7 +60,7 @@ jobs: shell: bash - name: Build and Push Docker Image run: | - cd main/docker/tpu + cd docker/tpu docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" . docker push "$IMAGE:$GITHUB_RUN_ID" shell: bash @@ -71,7 +77,7 @@ jobs: - name: Deploy the job on the kubernetes cluster run: |- - job_name=$(jsonnet -J ml-testing-accelerators/ main/docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \ + job_name=$(jsonnet -J ml-testing-accelerators/ docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \ job_name=${job_name#job.batch/} && \ job_name=${job_name% created} && \ echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \ @@ -99,15 +105,28 @@ jobs: exit $status_code shell: bash - # todo: to be used after enable merging reports from different CIs - #- name: Upload coverage to Codecov - # uses: codecov/codecov-action@v1 - # if: always() - # with: - # token: ${{ secrets.CODECOV_TOKEN }} - # file: ./xx01 - # flags: tpu,pytest - # # env_vars: OS,PYTHON - # # name: codecov-umbrella - # fail_ci_if_error: true + - name: Statistics + if: success() + run: | + mv ./xx01 coverage + # TODO: add human readable report + cat coverage + # sudo pip install pycobertura + # pycobertura show coverage.xml + - name: Upload coverage results + uses: actions/upload-artifact@master + with: + name: coverage-TPU + path: coverage + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + if: always() + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: coverage + flags: tpu,pytest + env_vars: ${{ runner.os }},python + name: TPU-coverage + fail_ci_if_error: true diff --git a/README.md b/README.md index 03b3624012..b9d6a0d4c0 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ | :---: | :---: | :---: | :---: | | Conda py3.7 [linux] | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | ![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg) | | Linux py3.7 [GPU] | - | - | [![Build Status](http://35.192.60.23/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://35.192.60.23/PyTorchLightning/pytorch-lightning) | -| Linux py3.6 [TPU] | - | - | ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg) | +| Linux py3.7 [TPU] | - | - | ![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg) | | Linux py3.6 / py3.7 / py3.8 | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | | OSX py3.6 / py3.7 | - | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | | Windows py3.6 / py3.7 / py3.8 | [![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) |[![CI testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20testing/badge.svg?event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | diff --git a/docker/Dockerfile b/docker/Dockerfile index a4df9088da..1cd5d821a7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /home/containeruser # install conda and python -RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ +RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b -p /home/containeruser/conda && \ rm ~/miniconda.sh && \ @@ -31,8 +31,8 @@ RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest ENV PATH /home/containeruser/conda/bin:$PATH # install dependencies -RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html -RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git --single-branch --branch $LIGHTNING_VERSION && \ +RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \ + git clone https://github.com/PyTorchLightning/pytorch-lightning.git --single-branch --branch $LIGHTNING_VERSION && \ pip install ./pytorch-lightning && \ pip install -r pytorch-lightning/requirements/extra.txt && \ rm -rf pytorch-lightning diff --git a/docker/README.md b/docker/README.md index a3dfc37bbe..72d924cef2 100644 --- a/docker/README.md +++ b/docker/README.md @@ -12,10 +12,10 @@ or with specific arguments ```bash git clone docker image build \ - -t pytorch-lightning:py36 \ + -t pytorch-lightning:py38 \ -f docker/Dockerfile \ - --build-arg PYTHON_VERSION=3.6 \ - --build-arg PYTORCH_VERSION=1.3 \ + --build-arg PYTHON_VERSION=3.8 \ + --build-arg PYTORCH_VERSION=1.4 \ . ``` diff --git a/docker/tpu/Dockerfile b/docker/tpu/Dockerfile index 33ef5f03f1..fcc870f4d3 100644 --- a/docker/tpu/Dockerfile +++ b/docker/tpu/Dockerfile @@ -4,66 +4,71 @@ FROM google/cloud-sdk:slim ARG GITHUB_REF=refs/heads/master ARG TEST_IMAGE=0 -# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 -# wheels available; see below. -ENV PYTHON_VERSION=3.6 +# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below. +ENV PYTHON_VERSION=3.7 + +SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git \ - curl \ - ca-certificates + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-dev \ + $( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \ + build-essential \ + cmake \ + git \ + curl \ + wget \ + ca-certificates \ + libomp5 \ + && \ -# Install conda and python. -# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 -RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ - chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b && \ - rm ~/miniconda.sh +# install python dependencies + wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ + python${PYTHON_VERSION} get-pip.py && \ + rm get-pip.py && \ -ENV PATH=/root/miniconda3/bin:$PATH +# Set the default python and install PIP packages + update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 -RUN conda create -y --name container python=$PYTHON_VERSION - -# Run the rest of commands within the new conda env. -# Use absolute path to appease Codefactor. -SHELL ["/root/miniconda3/bin/conda", "run", "-n", "container", "/bin/bash", "-c"] -RUN conda install -y python=$PYTHON_VERSION mkl - -RUN pip uninstall -y torch && \ +RUN py_version=${PYTHON_VERSION/./} && \ # Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m - gsutil cp 'gs://tpu-pytorch/wheels/torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ - gsutil cp 'gs://tpu-pytorch/wheels/torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ - gsutil cp 'gs://tpu-pytorch/wheels/torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' . && \ - pip install 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - pip install 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - pip install 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - rm 'torch-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - rm 'torch_xla-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - rm 'torchvision-nightly-cp${PYTHON_VERSION/./}-cp${PYTHON_VERSION/./}m-linux_x86_64.whl' && \ - apt-get install -y libomp5 + gsutil cp "gs://tpu-pytorch/wheels/torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + gsutil cp "gs://tpu-pytorch/wheels/torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + gsutil cp "gs://tpu-pytorch/wheels/torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + pip install "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + pip install "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + pip install "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + rm "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + rm "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + rm "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ + pip install mkl -ENV LD_LIBRARY_PATH=root/miniconda3/envs/container/lib +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" + +RUN python -c "import torch; print(torch.__version__)" # Install pytorch-lightning at the current PR, plus dependencies. RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git && \ cd pytorch-lightning && \ git fetch origin $GITHUB_REF:CI && \ git checkout CI && \ - cd .. && \ - pip install ./pytorch-lightning + pip install --requirement ./requirements/base.txt -# If using this image for tests, intall more dependencies and don't delete +# If using this image for tests, intall more dependencies and don"t delete # the source code where the tests live. -RUN if [ $TEST_IMAGE -eq 1 ] ; then \ +RUN \ + # TODO: use conda sources if possible + # drop Horovod + # python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [ln for ln in open(fname).readlines() if not ln.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ + # pip install -r pytorch-lightning/requirements/extra.txt ; && \ + if [ $TEST_IMAGE -eq 1 ] ; then \ pip install -r pytorch-lightning/requirements/test.txt ; \ else \ rm -rf pytorch-lightning ; \ fi -RUN conda init bash -RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" +#RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" COPY docker-entrypoint.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/docker-entrypoint.sh diff --git a/docker/tpu/docker-entrypoint.sh b/docker/tpu/docker-entrypoint.sh index 103242b942..57abc703c8 100644 --- a/docker/tpu/docker-entrypoint.sh +++ b/docker/tpu/docker-entrypoint.sh @@ -1,7 +1,7 @@ #!/bin/bash -source ~/.bashrc +# source ~/.bashrc echo "running docker-entrypoint.sh" -conda activate container +# conda activate container echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS echo "printed TPU info" export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}" diff --git a/docker/tpu/tpu_test_cases.jsonnet b/docker/tpu/tpu_test_cases.jsonnet index 1f5cad0822..9294727f42 100644 --- a/docker/tpu/tpu_test_cases.jsonnet +++ b/docker/tpu/tpu_test_cases.jsonnet @@ -20,7 +20,8 @@ local tputests = base.BaseTest { command: utils.scriptCommand( ||| - coverage run --source=pytorch_lightning -m pytest pytorch-lightning/tests/models/test_tpu.py -v + cd pytorch-lightning + coverage run --source=pytorch_lightning -m pytest tests/models/test_tpu.py -v test_exit_code=$? echo "\n||| END PYTEST LOGS |||\n" coverage xml