#!/bin/bash
# Helper for running a command in a podman container with GPU support
#
# Author: Christian Kastner <ckk@kvr.at>
# License: MIT

function usage() {
    cat >&2 <<-EOF

	Run a command in a podman container with GPU support

	This is a thin wrapper around podman-run(1) that takes care of adding
	all of the arguments necessary for using a GPU in the container. If the
	container isn't tagged STACK/..., then the first argument to must be the
	name of the stack to use. All remaining arguments are passed on straight
	to podman-run.

	Currently supported stacks are:[rocm, cuda].

	Synopsis:
	  $0 -h

	  $0 [podman-run args]

	  $0 STACK [podman-run args]

	Options:
	  -h          Show this help

	Examples:

	  # Configure the system for GPU-in-container use with AMD GPUs

	  \$ gpuisol-podman-setup -u <user> rocm

	  # Create an image first, if needed

	  \$ gpuisol-podman-create rocm myimage

	  # Call as if you would call 'podman run', but prepending the stack name

	  \$ $0 rocm --rm -it myimage

	  # If this image had been tagged 'rocm/myimage', then prepending the
	  # stack name could have been omitted

	  \$ $0 --rm -it rocm/myimage

	EOF
}

if [ "$#" -lt 1 ]; then
    echo "Not enough arguments." >&2
    exit 1
elif [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
    usage
    exit 0
fi

tagname="${!#}"
stack=
if [[ "$tagname" == rocm/* ]]; then
    stack=rocm
elif [[ "$tagname" == cuda/* ]]; then
    stack=cuda
else
    if [ "$#" -lt 1 ]; then
        echo "Not enough arguments." >&2
        exit 1
    fi
    stack="$1"
    if [ "$stack" != "rocm" ] && [ "$stack" != "cuda" ]; then
        echo "Unsupported GPU stack argument value: $stack" >&2
        exit 1
    fi
    shift 1
fi

userNAME=$(whoami)
renderGID="$(getent group render | cut -d: -f3)"
# By policy
videoGID=44

# Sanity checks
if [ -z "$renderGID" ]; then
    cat >&2 <<-EOF
	Group 'render' does not exist on this system. Are you sure that you are on
	the right system? This group should have been autmatically created by the
	udev package."
	EOF
    exit 1
elif ! groups "$userNAME" | grep -q '\brender\b'; then
    echo "'$userNAME' is not in group 'render'." >&2
    exit 1
elif ! groups "$userNAME" | grep -q '\bvideo\b'; then
    echo "'$userNAME' is not in group 'video'." >&2
    exit 1
elif [ "$(cat /proc/sys/kernel/unprivileged_userns_clone)" != "1" ]; then
    echo "unprivileged_userns_clone not enabled." >&2
    exit 1
elif [ "$stack" = "rocm" ] && ! [ -c /dev/kfd ]; then
    echo "Device /dev/kfd does not exist - is the 'amdgpu' module loaded?" >&2
    exit 1
elif [ "$stack" = "rocm" ] && ! [ -w /dev/kfd ]; then
    echo "No write permissions for /dev/kfd." >&2
    exit 1
elif [ "$stack" = "cuda" ] && ! [ -c /dev/nvidiactl ]; then
    # /etc/nvidiactl is always world-writable
    echo "Device /dev/nvidiactl does not exist - is the 'nvidia' module loaded?" >&2
    exit 1
elif ! grep -q "$userNAME:$renderGID:1" /etc/subgid; then
    echo "No subgid mapping for group 'render'. Run gpuisol-podman-setup" >&2
    exit 1
elif ! grep -q "$userNAME:$videoGID:1" /etc/subgid; then
    echo "No subgid mapping for group 'video'. Run gpuisol-podman-setup" >&2
    exit 1
elif ! grep -q -E "$userNAME:[0-9]{6,}:6553[4-6]" /etc/subgid; then
    echo "No large subgid mapping for '$(whoami)'. Run gpuisol-podman-setup" >&2
    exit 1
fi

# The only difference between invocations should be the --device arguments,
# but factoring these out into a variable isn't much more readable than
# just duplicating the invcations.
maxsubGID="$(sed -nr "s/$userNAME:[0-9]{6,}:(6553[4-6])/\1/p" /etc/subgid)"
maxsubGID=$((maxsubGID - renderGID - 1))
if [ "$stack" = "rocm" ]; then
    exec podman run \
        --device=/dev/dri \
        --device=/dev/kfd \
        --gidmap=0:0:1 \
        --gidmap=44:1:1 \
        --gidmap="$renderGID":2:1 \
        --gidmap=1:3:43 \
        --gidmap=45:46:$((renderGID - videoGID - 1)) \
        --gidmap=$((renderGID + 1)):$((renderGID + 2)):$maxsubGID \
        "$@"
elif [ "$stack" = "cuda" ]; then
    # This creates /dev/nvidia-uvm if it does not exist yet
    [ -c /dev/nvidia-uvm ] || nvidia-modprobe -c 0 -u
    exec podman run \
        --device=/dev/dri \
        --device=/dev/nvidiactl \
        --device=/dev/nvidia-modeset \
        --device=/dev/nvidia-uvm \
        --device=/dev/nvidia-uvm-tools \
        --device=/dev/nvidia0 \
        --gidmap=0:0:1 \
        --gidmap=44:1:1 \
        --gidmap="$renderGID":2:1 \
        --gidmap=1:3:43 \
        --gidmap=45:46:$((renderGID - videoGID - 1)) \
        --gidmap=$((renderGID + 1)):$((renderGID + 2)):$maxsubGID \
        "$@"
fi
