integrate with CircleCI (#2486)
* add circleCI * wip * CircleCI setup that worked on my private repo. Use a working pytorch-lightning commit * Fix the orb imports * Update circleci header comment * Try to pull the GITHUB_REF from the CI_PULL_REQUEST * Use null instead of space for 'sed' * Add TODO for codecov * Remove echo of GKE_CLUSTER since it will be redacted by CircleCI. * Try running codecov upload. * Try using codecov orb * Use pip install codecov * Use codecov orb again since it should be approved * dockers/tpu-tests/Dockerfile * action * suggestions * drop suggestion * suggestion Co-authored-by: Jirka <jirka@pytorchlightning.ai>
This commit is contained in:
parent
1e68968ed7
commit
d0b8e850a4
|
@ -1,43 +1,118 @@
|
|||
# Python CircleCI 2.0 configuration file
|
||||
#
|
||||
# Check https://circleci.com/docs/2.0/language-python/ for more details
|
||||
#
|
||||
version: 2.0
|
||||
# Python CircleCI 2.1 configuration file.
|
||||
version: 2.1
|
||||
orbs:
|
||||
gcp-gke: circleci/gcp-gke@1.0.4
|
||||
go: circleci/go@1.3.0
|
||||
codecov: codecov/codecov@1.1.0
|
||||
|
||||
references:
|
||||
|
||||
install_deps: &install_deps
|
||||
run:
|
||||
name: Install Dependences
|
||||
command: |
|
||||
sudo apt-get update && sudo apt-get install -y cmake
|
||||
pip install -r requirements/base.txt -q
|
||||
pip install -r requirements/test.txt -q
|
||||
|
||||
tests: &tests
|
||||
run:
|
||||
name: Testing
|
||||
command: |
|
||||
python --version ; pip --version ; pip list
|
||||
python -m pytest pytorch_lightning -v --junitxml=test-reports/pytest_junit.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py
|
||||
no_output_timeout: 15m
|
||||
|
||||
make_docs: &make_docs
|
||||
run:
|
||||
name: Make Documentation
|
||||
command: |
|
||||
# First run the same pipeline as Read-The-Docs
|
||||
# apt-get update && apt-get install -y cmake
|
||||
# using: https://hub.docker.com/r/readthedocs/build
|
||||
# we need to use py3.7 ot higher becase of an issue with metaclass inheritence
|
||||
pyenv global 3.7.3
|
||||
python --version
|
||||
pip install -r requirements/docs.txt
|
||||
cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W"
|
||||
|
||||
checkout_ml_testing: &checkout_ml_testing
|
||||
run:
|
||||
name: Make Documentation
|
||||
name: Checkout ml-testing-accelerators
|
||||
command: |
|
||||
# First run the same pipeline as Read-The-Docs
|
||||
# apt-get update && apt-get install -y cmake
|
||||
# using: https://hub.docker.com/r/readthedocs/build
|
||||
# we need to use py3.7 ot higher becase of an issue with metaclass inheritence
|
||||
pyenv global 3.7.3
|
||||
python --version
|
||||
pip install -r requirements/docs.txt
|
||||
cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W"
|
||||
git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
|
||||
cd ml-testing-accelerators
|
||||
git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
|
||||
git checkout stable
|
||||
cd ..
|
||||
|
||||
build_push_docker: &build_push_docker
|
||||
run:
|
||||
name: Build and push Docker image
|
||||
command: |
|
||||
gcloud --quiet auth configure-docker
|
||||
cd dockers/tpu-tests
|
||||
# TODO: How to find the GITHUB_REF in CircleCI?
|
||||
# $CI_PULL_REQUEST seems to be of form: https://github.com/org/repo-name/pull/11.
|
||||
# Grab the last bit, e.g. pull/11, convert to pull/11/head, and use it
|
||||
# for the GITHUB_REF so Docker can pull the latest pending code in PR.
|
||||
git_ref=$(echo "$CI_PULL_REQUEST" | sed "s/.*pytorch-lightning\///")/head
|
||||
docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=$git_ref" .
|
||||
#docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" .
|
||||
docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
|
||||
|
||||
deploy_cluster: &deploy_cluster
|
||||
run:
|
||||
name: Deploy the job on the kubernetes cluster
|
||||
command: |
|
||||
go get github.com/google/go-jsonnet/cmd/jsonnet
|
||||
export PATH=$PATH:$HOME/go/bin
|
||||
job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -)
|
||||
job_name=${job_name#job.batch/}
|
||||
job_name=${job_name% created}
|
||||
echo "Waiting on kubernetes job: $job_name"
|
||||
i=0 && \
|
||||
# N checks spaced 30s apart = 900s total.
|
||||
status_code=2 && \
|
||||
# Check on the job periodically. Set the status code depending on what
|
||||
# happened to the job in Kubernetes. If we try MAX_CHECKS times and
|
||||
# still the job hasn't finished, give up and return the starting
|
||||
# non-zero status code.
|
||||
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
|
||||
echo "Done waiting. Job status code: $status_code" && \
|
||||
# Allow time for logs to flush.
|
||||
sleep 30 && \
|
||||
echo "JOB_NAME: $job_name" && \
|
||||
gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID > /tmp/full_output.txt && \
|
||||
if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
|
||||
# First portion is the test logs. Print these to Github Action stdout.
|
||||
cat xx00 && \
|
||||
echo "Done with log retrieval attempt." && \
|
||||
gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
|
||||
exit $status_code
|
||||
|
||||
stats: &stats
|
||||
run:
|
||||
name: Statistics
|
||||
command: |
|
||||
mv ./xx01 coverage.xml
|
||||
# TODO: add human readable report
|
||||
cat coverage.xml
|
||||
sudo pip install pycobertura
|
||||
pycobertura show coverage.xml
|
||||
|
||||
jobs:
|
||||
|
||||
Build-Docs:
|
||||
TPU-tests:
|
||||
docker:
|
||||
- image: circleci/python:3.7
|
||||
environment:
|
||||
- MAX_CHECKS: 60
|
||||
steps:
|
||||
- checkout
|
||||
- go/install
|
||||
- *checkout_ml_testing
|
||||
- gcp-gke/install
|
||||
- gcp-gke/update-kubeconfig-with-credentials:
|
||||
cluster: $GKE_CLUSTER
|
||||
perform-login: true
|
||||
- setup_remote_docker
|
||||
- *build_push_docker
|
||||
- *deploy_cluster
|
||||
- *stats
|
||||
- codecov/upload:
|
||||
file: coverage.xml
|
||||
flags: tpu,pytest
|
||||
upload_name: TPU-coverage
|
||||
|
||||
- store_artifacts:
|
||||
path: coverage.xml
|
||||
|
||||
build-Docs:
|
||||
docker:
|
||||
- image: readthedocs/build:latest
|
||||
steps:
|
||||
|
@ -48,24 +123,9 @@ jobs:
|
|||
path: docs/build/html/
|
||||
destination: html
|
||||
|
||||
CPU-Tests:
|
||||
# todo: to be replaced by TPU tests
|
||||
docker:
|
||||
- image: circleci/python:3.6
|
||||
environment:
|
||||
- TORCH_VERSION: "torch"
|
||||
steps: &steps
|
||||
- checkout
|
||||
- *install_deps
|
||||
- *tests
|
||||
- store_test_results:
|
||||
path: test-reports
|
||||
- store_artifacts:
|
||||
path: test-reports
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
build:
|
||||
tpu-tests:
|
||||
jobs:
|
||||
- Build-Docs
|
||||
- CPU-Tests
|
||||
- build-Docs
|
||||
- TPU-tests
|
||||
|
|
|
@ -5,9 +5,9 @@ on:
|
|||
branches:
|
||||
- master
|
||||
# TODO: temporal disable TPU testing until we find way how to pass credentials to forked PRs
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
# pull_request:
|
||||
# branches:
|
||||
# - master
|
||||
|
||||
env:
|
||||
PROJECT_ID: ${{ secrets.GKE_PROJECT }}
|
||||
|
|
|
@ -13,6 +13,7 @@ ARG TEST_IMAGE=0
|
|||
# Install pytorch-lightning at the current PR, plus dependencies.
|
||||
RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git && \
|
||||
cd pytorch-lightning && \
|
||||
echo $GITHUB_REF && \
|
||||
git fetch origin $GITHUB_REF:CI && \
|
||||
git checkout CI && \
|
||||
pip install --requirement ./requirements/base.txt --no-cache-dir
|
||||
|
|
Loading…
Reference in New Issue