Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- WAIT_TIME = 1
- NAME = 'icons-train'
- IMAGE_NAME = f'runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04'
- GPU_TYPE_ID = 'NVIDIA A100 80GB PCIe'
- OS_DISK_SIZE_GB = 100
- PERSISTENT_DISK_SIZE_GB = 0
- CLOUD_TYPE = 'SECURE'
- MIN_DOWNLOAD_SPEED = 700
- DATA_CENTER_ID = 'EU-RO-1'
- NETWORK_VOLUME_ID = '54epb6rtc4'
- TEMPLATE_ID = 'ue50iblx66'
- CUDA_VERSION = '12.2'
- GPU_COUNT = 8
- ERRORS = {
- "specs": (
- "There are no longer any instances available with "
- "the requested specifications. Please refresh and try again."
- ),
- "disk": (
- "There are no longer any instances available with "
- "enough disk space."
- )
- }
- def create_pod(api, bar):
- bar.update(1)
- pod_config = f"""
- cudaVersion: "{CUDA_VERSION}",
- templateId: "{TEMPLATE_ID}",
- networkVolumeId: "{NETWORK_VOLUME_ID}",
- dataCenterId: "{DATA_CENTER_ID}",
- minDownload: {MIN_DOWNLOAD_SPEED},
- gpuCount: {GPU_COUNT},
- volumeInGb: {PERSISTENT_DISK_SIZE_GB},
- containerDiskInGb: {OS_DISK_SIZE_GB},
- gpuTypeId: "{GPU_TYPE_ID}",
- cloudType: {CLOUD_TYPE},
- supportPublicIp: true,
- name: "{NAME}",
- dockerArgs: "",
- volumeMountPath: "/workspace",
- imageName: "{IMAGE_NAME}",
- startJupyter: true,
- startSsh: true,
- """
- response = api.create_on_demand_pod(pod_config)
- resp_json = response.json()
- if response.status_code == 200:
- if 'errors' in resp_json:
- for error in resp_json['errors']:
- if error['message'] == ERRORS['specs']:
- time.sleep(WAIT_TIME)
- create_pod(api, bar)
- elif error['message'] == ERRORS['disk']:
- print(error)
- print('No instances with enough disk space available, sleeping for 5 seconds')
- time.sleep(WAIT_TIME)
- create_pod(api, bar)
- else:
- print('ERROR: ' + error['message'])
- else:
- return
- bar = tqdm()
- api = API("XXX")
- res = create_pod(api, bar)
- res
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement