#!/bin/bash
# Exits 0 when GPUs have been locked, 1 if still busy after 120s

# Author: Christian Kastner <ckk@kvr.at>
# License: MIT

set -e

# To allow for local testing
list_cmd="util/list-worker-gpus"
if ! [ -e "$list_cmd" ]
then
    list_cmd="/usr/share/debci/util/list-worker-gpus"
fi

gpus="$($list_cmd --csv)"

# This loop of 120s was originally conceived to allow for a device to settle
# between job invocations, which was an issue on one of our workers
ii=0
while true
do
	if gpuenv-cli is_free_multi "$gpus" >/dev/null 2>&1
	then
		# We lock by our grandparent's PID because we want gpuenv to
		# implicitly release this lock when the grandparent (debci-test) exits.
		gpuenv-cli lock_multi --pid="$(ps -o ppid= "$PPID")" "$gpus" || exit 1
		exit 0
	fi
	ii=$((ii+1))
	[ "$ii" -ge 12 ] && break
	sleep 10
done
exit 1
