Advertisement
xosski

Startup script

Mar 21st, 2025
11
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.97 KB | None | 0 0
  1. #!/bin/bash
  2. # Redirect all output (stdout and stderr) to a log file for debugging
  3. exec > /var/log/startup-script.log 2>&1
  4.  
  5. # Helper logging functions for clarity
  6. log() { echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] $*"; }
  7. log_err(){ echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] $*" >&2; }
  8.  
  9. log "Startup script initiated."
  10.  
  11. # 1. Install required packages if not present
  12. log "Checking and installing prerequisites (Python3, curl, Docker, NVIDIA drivers, NVIDIA container toolkit)..."
  13. apt-get update -y
  14.  
  15. # Install Python3 if missing
  16. if ! command -v python3 >/dev/null 2>&1; then
  17. log "Python3 not found. Installing Python3..."
  18. apt-get install -y python3 || log_err "Failed to install Python3"
  19. else
  20. log "Python3 is already installed."
  21. fi
  22.  
  23. # Install curl if missing
  24. if ! command -v curl >/dev/null 2>&1; then
  25. log "curl not found. Installing curl..."
  26. apt-get install -y curl || log_err "Failed to install curl"
  27. else
  28. log "curl is already installed."
  29. fi
  30.  
  31. # Install NVIDIA GPU driver if not present (using default driver package)
  32. if ! command -v nvidia-smi >/dev/null 2>&1; then
  33. log "NVIDIA GPU driver not detected. Installing NVIDIA driver..."
  34. apt-get install -y ubuntu-drivers-common && ubuntu-drivers autoinstall || log_err "Failed to install NVIDIA GPU driver"
  35. # Optionally, you could use Google's provided installer script or specific driver version as needed
  36. else
  37. log "NVIDIA GPU driver already installed."
  38. fi
  39.  
  40. # Install Docker if not present
  41. if ! command -v docker >/dev/null 2>&1; then
  42. log "Docker not found. Installing Docker..."
  43. # Install prerequisites for Docker
  44. apt-get install -y apt-transport-https ca-certificates gnupg lsb-release
  45. curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
  46. add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
  47. apt-get update -y
  48. apt-get install -y docker-ce docker-ce-cli containerd.io || log_err "Failed to install Docker"
  49. log "Docker installed."
  50. else
  51. log "Docker is already installed."
  52. # Ensure Docker service is running
  53. systemctl start docker || true
  54. fi
  55.  
  56. # Install NVIDIA Container Toolkit (nvidia-docker2) if not present
  57. if ! docker info | grep -q "Runtimes: nvidia"; then
  58. log "NVIDIA Container Toolkit not found. Installing..."
  59. distribution=$(. /etc/os-release; echo $ID$VERSION_ID)
  60. curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add -
  61. curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list
  62. apt-get update -y
  63. apt-get install -y nvidia-container-toolkit || log_err "Failed to install NVIDIA Container Toolkit"
  64. # Configure Docker default runtime to use nvidia (if needed) and restart Docker
  65. systemctl restart docker
  66. log "NVIDIA Container Toolkit installed and Docker restarted."
  67. else
  68. log "NVIDIA Container Toolkit is already installed."
  69. fi
  70.  
  71. # 2. Retrieve Docker image name and env vars from metadata
  72. METADATA_URL="http://metadata.google.internal/computeMetadata/v1/instance/attributes"
  73. log "Fetching 'docker-image' and environment variables from instance metadata..."
  74. # Fetch Docker image from metadata (required)
  75. docker_image=$(curl -fsH "Metadata-Flavor: Google" "$METADATA_URL/docker-image") || {
  76. log_err "Metadata 'docker-image' not found! Aborting."; exit 1;
  77. }
  78. log "Docker image to run: $docker_image"
  79.  
  80. # Fetch all env- variables from metadata (if any)
  81. env_args=""
  82. # List all attributes and filter env- keys:
  83. metadata_entries=$(curl -fsH "Metadata-Flavor: Google" "$METADATA_URL/?recursive=true&alt=text") || log_err "Failed to fetch metadata entries"
  84. # Loop through lines to find env- keys
  85. while IFS= read -r line; do
  86. case "$line" in
  87. env-*)
  88. # Split "env-KEY value" into key and value
  89. key="${line%% *}"; value="${line#* }"
  90. env_name="${key#env-}"
  91. env_args+=" -e ${env_name}=${value}"
  92. log "Found metadata env var: $env_name=$value"
  93. ;;
  94. esac
  95. done <<< "$metadata_entries"
  96.  
  97. # 3. Pull the Docker image with retry logic (max 4 attempts)
  98. max_attempts=4; attempt=1
  99. until docker pull "$docker_image"; do
  100. if (( attempt >= max_attempts )); then
  101. log_err "Docker image pull failed after $attempt attempts. Exiting."
  102. exit 1
  103. fi
  104. log_err "Docker pull failed (attempt $attempt of $max_attempts). Retrying in 10 seconds..."
  105. attempt=$((attempt+1))
  106. sleep 10
  107. done
  108. log "Successfully pulled Docker image: $docker_image"
  109.  
  110. # 4. Run the Docker container (with any retrieved env variables) using NVIDIA runtime
  111. # Use --restart unless-stopped to ensure the container restarts if the VM reboots.
  112. log "Launching Docker container from image..."
  113. docker run -d --name app --restart unless-stopped --gpus all $env_args "$docker_image" || {
  114. log_err "Failed to start Docker container."; exit 1;
  115. }
  116. log "Docker container started successfully."
  117.  
  118. log "Startup script completed."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement