Use kubectl to get logs from TPU CI instead of gcloud logging. (#2918)
* Use kubectl to get logs from TPU CI instead of gcloud logging. * Update Github Action to read logs from kubectl rather than gcloud logging.
This commit is contained in:
parent
69d241c82e
commit
580a5bd1df
|
@ -63,10 +63,9 @@ references:
|
|||
printf "Waiting for job to finish: " && \
|
||||
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "."; fi; sleep $CHECK_SPEEP; done && \
|
||||
echo "Done waiting. Job status code: $status_code" && \
|
||||
# Allow time for logs to flush.
|
||||
sleep 30 && \
|
||||
echo "JOB_NAME: $job_name" && \
|
||||
gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID > /tmp/full_output.txt && \
|
||||
pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
|
||||
echo "GKE pod name: $pod_name" && \
|
||||
kubectl logs -f $pod_name --container=train > /tmp/full_output.txt
|
||||
if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
|
||||
# First portion is the test logs. Print these to Github Action stdout.
|
||||
cat xx00 && \
|
||||
|
|
|
@ -93,12 +93,9 @@ jobs:
|
|||
printf "Waiting for job to finish: " && \
|
||||
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \
|
||||
echo "Done waiting. Job status code: $status_code" && \
|
||||
# Allow time for logs to flush.
|
||||
sleep 30 && \
|
||||
echo "JOB_NAME: $job_name" && \
|
||||
echo "GKE_CLUSTER: $GKE_CLUSTER" && \
|
||||
echo "GKE_ZONE: $GKE_ZONE" && \
|
||||
gcloud logging read "resource.type=k8s_container resource.labels.project_id=$PROJECT_ID resource.labels.location=$GKE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$PROJECT_ID > /tmp/full_output.txt && \
|
||||
pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
|
||||
echo "GKE pod name: $pod_name" && \
|
||||
kubectl logs -f $pod_name --container=train > /tmp/full_output.txt
|
||||
if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
|
||||
# First portion is the test logs. Print these to Github Action stdout.
|
||||
cat xx00 && \
|
||||
|
|
Loading…
Reference in New Issue