Model Endpoint
Deploying containers to serve model endpoints is an efficient and reliable method. Containerization allows you to package the model along with its dependencies into a cohesive, portable unit. This ensures environmental consistency and simplifies the scaling process. By adopting this approach, you enhance the reliability, maintainability, and overall efficiency of model endpoints in production environments.
SKU Listing
To get the list of SKU, send a GET request to the Model Inference Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct
import requests
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct"
payload={}
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
Model Endpoint Lisitng
To get the list of Model Endpoint, send a GET request to the Model Inference Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5
import requests
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5"
payload={}
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
Create Star Code 7b Model
To create a Star Code 7b Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "star-coder-7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "starcoder2-7b",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "bigcode/starcoder2-7b",
"tokenizer": "bigcode/starcoder2-7b"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "star-coder-7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "starcoder2-7b",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "bigcode/starcoder2-7b",
"tokenizer": "bigcode/starcoder2-7b"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "star-coder-7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "starcoder2-7b",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "bigcode/starcoder2-7b",
"tokenizer": "bigcode/starcoder2-7b"
},
"model_load_integration_id": 170
}
Create LLAMA2 Model
To create a LLAMA2 Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "llamda-two",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 2,
"path": "",
"framework": "llma",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
"tokenizer": "meta-llama/Llama-2-7b-chat-hf"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "llamda-two",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 2,
"path": "",
"framework": "llma",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
"tokenizer": "meta-llama/Llama-2-7b-chat-hf"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "llamda-two",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 2,
"path": "",
"framework": "llma",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
"tokenizer": "meta-llama/Llama-2-7b-chat-hf"
},
"model_load_integration_id": 170
}
Create LLAMA3 Model
To create a LLAMA3 Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "llama-threeee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "llama-3-8b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct",
"tokenizer": "meta-llama/Meta-Llama-3-8B-Instruct"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "llama-threeee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "llama-3-8b-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct",
"tokenizer": "meta-llama/Meta-Llama-3-8B-Instruct"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "llamda-two",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 2,
"path": "",
"framework": "llma",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
"tokenizer": "meta-llama/Llama-2-7b-chat-hf"
},
"model_load_integration_id": 170
}
Create Gemma2 Model
To create a Gemma2 Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "gemmad-twooo",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-2b-it",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-2b-it",
"tokenizer": "google/gemma-2b-it"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "gemmad-twooo",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-2b-it",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-2b-it",
"tokenizer": "google/gemma-2b-it"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "gemmad-twooo",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-2b-it",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-2b-it",
"tokenizer": "google/gemma-2b-it"
},
"model_load_integration_id": 170
}
Create Gemma7 Model
To create a Gemma7 Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "gemma-sevennnn",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-7b-it",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-7b-it",
"tokenizer": "google/gemma-7b-it"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "gemma-sevennnn",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-7b-it",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-7b-it",
"tokenizer": "google/gemma-7b-it"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "gemma-sevennnn",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-7b-it",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "google/gemma-7b-it",
"tokenizer": "google/gemma-7b-it"
},
"model_load_integration_id": 170
}
Create CodeLLAMA 7b Model
To create a CodeLLAMA 7b Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "codellama-sevenbbbbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "codellama",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
"tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "codellama-sevenbbbbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "codellama",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
"tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "codellama-sevenbbbbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "codellama",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
"tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
},
"model_load_integration_id": 170
}
Create Mistral 7B instruct Model
To create a Mistral 7B instruct Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "mistral-7b-instructtttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mistral-7b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
"tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "mistral-7b-instructtttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mistral-7b-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
"tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "mistral-7b-instructtttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mistral-7b-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
"tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
},
"model_load_integration_id": 170
}
Create Mistral 8X7B Model
To create a Mistral 8X7B Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "mistral-8x7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mixtral-8x7b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "mistral-8x7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mixtral-8x7b-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "mistral-8x7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mixtral-8x7b-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
},
"model_load_integration_id": 170
}
Create Stable Diffusion v2.1 model
To create a Stable Diffusion v2.1 model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "stable-diffusuon-v2222",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "stable-diffusuon-v2222",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "stable-diffusuon-v2222",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
}
Create Stable Difffusion Xl Base Model
To create a Stable Difffusion Xl Base Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "stable-diffusion-xl-baseee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion_xl",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "stable-diffusion-xl-baseee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion_xl",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "stable-diffusion-xl-baseee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion_xl",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "",
"tokenizer": ""
},
"model_load_integration_id": 170
}
Create MPT Model
To create a MPT Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"
payload = json.dumps({
"name": "mpttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mpt",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mosaicml/mpt-7b-instruct",
"tokenizer": "EleutherAI/gpt-neox-20b"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "mpttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mpt",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mosaicml/mpt-7b-instruct",
"tokenizer": "EleutherAI/gpt-neox-20b"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "mpttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mpt",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "mosaicml/mpt-7b-instruct",
"tokenizer": "EleutherAI/gpt-neox-20b"
},
"model_load_integration_id": 170
}
Create Phi3 Mini Model
To create a Phi3 Mini Model, send a POST request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey={{tapi_key}}&prefix=models%2F
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey={{tapi_key}}prefix=models%2F"
payload = json.dumps({
"name": "tir-endpoint-060511131818",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": False,
"metric_port": False,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": False,
"is_liveness_probe_enabled": False,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "Phi-3-mini-128k-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
"tokenizer": "microsoft/Phi-3-mini-128k-instruct"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)
curl --location 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey=308dc8e1-daea-4aad-9b29-5f607d189a37&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"name": "tir-endpoint-060511131818",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "Phi-3-mini-128k-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
"tokenizer": "microsoft/Phi-3-mini-128k-instruct"
},
"model_load_integration_id": 170
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"name": "tir-endpoint-060511131818",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
"service_port": false,
"metric_port": false,
"container": {
"container_name": "vllm/vllm-openai:latest",
"container_type": "public",
"private_image_details": {},
"advance_config": {
"image_pull_policy": "Always",
"is_readiness_probe_enabled": false,
"is_liveness_probe_enabled": false,
"readiness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
},
"liveness_probe": {
"protocol": "http",
"initial_delay_seconds": 10,
"success_threshold": 1,
"failure_threshold": 3,
"port": 8080,
"period_seconds": 10,
"timeout_seconds": 10,
"path": "/metrics",
"grpc_service": "",
"commands": ""
}
}
},
"resource_details": {
"disk_size": 50,
"mount_path": "",
"env_variables": []
},
"public_ip": "no"
},
"model_id": null,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "Phi-3-mini-128k-instruct",
"is_auto_scale_enabled": false,
"detailed_info": {
"commands": "",
"args": "",
"hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
"tokenizer": "microsoft/Phi-3-mini-128k-instruct"
},
"model_load_integration_id": 170
}
Disable AutoScale and Replica Increment
To disable autoScale and replica increment, send a PUT request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}"
payload = json.dumps({
"action": "disable_auto_scale",
"replicas": 3
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("PUT", url, headers=headers, data=payload)
print(response.text)
curl --location -g --request PUT 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"action": "disable_auto_scale",
"replicas": 3
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"action": "disable_auto_scale",
"replicas": 3
}
Enable Autoscaling
To enable Autoscaling, send a PUT request to the Model Repository Endpoint
https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}
import requests
import json
url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}"
payload = json.dumps({
"auto_scale_policy": {
"min_replicas": 1,
"max_replicas": 5,
"rules": [
{
"metric": "cpu",
"condition_type": "limit",
"value": 12,
"watch_period": 60
}
],
"stability_period": 300
},
"action": "update_auto_scale"
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}
response = requests.request("PUT", url, headers=headers, data=payload)
print(response.text)
curl --location -g --request PUT 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json' \
--data '{
"auto_scale_policy": {
"min_replicas": 1,
"max_replicas": 5,
"rules": [
{
"metric": "cpu",
"condition_type": "limit",
"value": 12,
"watch_period": 60
}
],
"stability_period": 300
},
"action": "update_auto_scale"
}'
Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...
content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218
{
"auto_scale_policy": {
"min_replicas": 1,
"max_replicas": 5,
"rules": [
{
"metric": "cpu",
"condition_type": "limit",
"value": 12,
"watch_period": 60
}
],
"stability_period": 300
},
"action": "update_auto_scale"
}