Model Endpoint

Deploying containers to serve model endpoints is an efficient and reliable method. Containerization allows you to package the model along with its dependencies into a cohesive, portable unit. This ensures environmental consistency and simplifies the scaling process. By adopting this approach, you enhance the reliability, maintainability, and overall efficiency of model endpoints in production environments.

SKU Listing

To get the list of SKU, send a GET request to the Model Inference Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct

import requests

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct"

payload={}
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/gpu_service/sku/?apikey={{tapi_key}}&service=inference_service&framework=Phi-3-mini-128k-instruct' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

Model Endpoint Lisitng

To get the list of Model Endpoint, send a GET request to the Model Inference Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5

import requests

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5"

payload={}
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference?apikey={{tapi_key}}&page_no=1&per_page=5' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

Create Star Code 7b Model

To create a Star Code 7b Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "star-coder-7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "starcoder2-7b",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "bigcode/starcoder2-7b",
    "tokenizer": "bigcode/starcoder2-7b"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "star-coder-7bbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "starcoder2-7b",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "bigcode/starcoder2-7b",
        "tokenizer": "bigcode/starcoder2-7b"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "star-coder-7bbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "starcoder2-7b",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "bigcode/starcoder2-7b",
        "tokenizer": "bigcode/starcoder2-7b"
    },
    "model_load_integration_id": 170
}

Create LLAMA2 Model

To create a LLAMA2 Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "llamda-two",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 2,
"path": "",
"framework": "llma",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
    "tokenizer": "meta-llama/Llama-2-7b-chat-hf"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "llamda-two",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 2,
    "path": "",
    "framework": "llma",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
        "tokenizer": "meta-llama/Llama-2-7b-chat-hf"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "llamda-two",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 2,
    "path": "",
    "framework": "llma",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
        "tokenizer": "meta-llama/Llama-2-7b-chat-hf"
    },
    "model_load_integration_id": 170
}

Create LLAMA3 Model

To create a LLAMA3 Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "llama-threeee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "llama-3-8b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct",
    "tokenizer": "meta-llama/Meta-Llama-3-8B-Instruct"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "llama-threeee",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 1,
    "path": "",
    "framework": "llama-3-8b-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/Meta-Llama-3-8B-Instruct",
        "tokenizer": "meta-llama/Meta-Llama-3-8B-Instruct"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "llamda-two",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 2,
    "path": "",
    "framework": "llma",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/Llama-2-7b-chat-hf",
        "tokenizer": "meta-llama/Llama-2-7b-chat-hf"
    },
    "model_load_integration_id": 170
}

Create Gemma2 Model

To create a Gemma2 Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "gemmad-twooo",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-2b-it",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "google/gemma-2b-it",
    "tokenizer": "google/gemma-2b-it"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "gemmad-twooo",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 1,
    "path": "",
    "framework": "gemma-2b-it",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "google/gemma-2b-it",
        "tokenizer": "google/gemma-2b-it"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "gemmad-twooo",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 1,
    "path": "",
    "framework": "gemma-2b-it",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "google/gemma-2b-it",
        "tokenizer": "google/gemma-2b-it"
    },
    "model_load_integration_id": 170
}

Create Gemma7 Model

To create a Gemma7 Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "gemma-sevennnn",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 128,
"replica": 1,
"path": "",
"framework": "gemma-7b-it",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "google/gemma-7b-it",
    "tokenizer": "google/gemma-7b-it"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "gemma-sevennnn",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 1,
    "path": "",
    "framework": "gemma-7b-it",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "google/gemma-7b-it",
        "tokenizer": "google/gemma-7b-it"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "gemma-sevennnn",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 128,
    "replica": 1,
    "path": "",
    "framework": "gemma-7b-it",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "google/gemma-7b-it",
        "tokenizer": "google/gemma-7b-it"
    },
    "model_load_integration_id": 170
}

Create CodeLLAMA 7b Model

To create a CodeLLAMA 7b Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "codellama-sevenbbbbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "codellama",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
    "tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "codellama-sevenbbbbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "codellama",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
        "tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "codellama-sevenbbbbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "codellama",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "meta-llama/CodeLlama-7b-Instruct-hf",
        "tokenizer": "meta-llama/CodeLlama-7b-Instruct-hf"
    },
    "model_load_integration_id": 170
}

Create Mistral 7B instruct Model

To create a Mistral 7B instruct Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "mistral-7b-instructtttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mistral-7b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
    "tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "mistral-7b-instructtttt",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mistral-7b-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "mistral-7b-instructtttt",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mistral-7b-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "tokenizer": "mistralai/Mistral-7B-Instruct-v0.1"
    },
    "model_load_integration_id": 170
}

Create Mistral 8X7B Model

To create a Mistral 8X7B Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "mistral-8x7bbbb",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mixtral-8x7b-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'   \
--data '{
    "name": "mistral-8x7bbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mixtral-8x7b-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "mistral-8x7bbbb",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mixtral-8x7b-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1"
    },
    "model_load_integration_id": 170
}

Create Stable Diffusion v2.1 model

To create a Stable Diffusion v2.1 model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "stable-diffusuon-v2222",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "",
    "tokenizer": ""
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "stable-diffusuon-v2222",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "stable_diffusion",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "",
        "tokenizer": ""
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "stable-diffusuon-v2222",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-2-1:hf-v1",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "stable_diffusion",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "",
        "tokenizer": ""
    },
    "model_load_integration_id": 170
}

Create Stable Difffusion Xl Base Model

To create a Stable Difffusion Xl Base Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "stable-diffusion-xl-baseee",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "stable_diffusion_xl",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "",
    "tokenizer": ""
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "name": "stable-diffusion-xl-baseee",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "stable_diffusion_xl",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "",
        "tokenizer": ""
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "stable-diffusion-xl-baseee",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "registry.e2enetworks.net/aimle2e/stable-diffusion-xl-base-1.0:hf",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "stable_diffusion_xl",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "",
        "tokenizer": ""
    },
    "model_load_integration_id": 170
}

Create MPT Model

To create a MPT Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%2F"

payload = json.dumps({
"name": "mpttt",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 52,
"replica": 1,
"path": "",
"framework": "mpt",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "mosaicml/mpt-7b-instruct",
    "tokenizer": "EleutherAI/gpt-neox-20b"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location -g 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/?apikey={{tapi_key}}&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'   \
--data '{
    "name": "mpttt",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mpt",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mosaicml/mpt-7b-instruct",
        "tokenizer": "EleutherAI/gpt-neox-20b"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "mpttt",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 52,
    "replica": 1,
    "path": "",
    "framework": "mpt",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "mosaicml/mpt-7b-instruct",
        "tokenizer": "EleutherAI/gpt-neox-20b"
    },
    "model_load_integration_id": 170
}

Create Phi3 Mini Model

To create a Phi3 Mini Model, send a POST request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey={{tapi_key}}&prefix=models%2F

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey={{tapi_key}}prefix=models%2F"

payload = json.dumps({
"name": "tir-endpoint-060511131818",
"server_version": "",
"world_size": 1,
"custom_endpoint_details": {
    "service_port": False,
    "metric_port": False,
    "container": {
    "container_name": "vllm/vllm-openai:latest",
    "container_type": "public",
    "private_image_details": {},
    "advance_config": {
        "image_pull_policy": "Always",
        "is_readiness_probe_enabled": False,
        "is_liveness_probe_enabled": False,
        "readiness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        },
        "liveness_probe": {
        "protocol": "http",
        "initial_delay_seconds": 10,
        "success_threshold": 1,
        "failure_threshold": 3,
        "port": 8080,
        "period_seconds": 10,
        "timeout_seconds": 10,
        "path": "/metrics",
        "grpc_service": "",
        "commands": ""
        }
    }
    },
    "resource_details": {
    "disk_size": 50,
    "mount_path": "",
    "env_variables": []
    },
    "public_ip": "no"
},
"model_id": None,
"sku_id": 118,
"replica": 1,
"path": "",
"framework": "Phi-3-mini-128k-instruct",
"is_auto_scale_enabled": False,
"detailed_info": {
    "commands": "",
    "args": "",
    "hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
    "tokenizer": "microsoft/Phi-3-mini-128k-instruct"
},
"model_load_integration_id": 170
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)

curl --location 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/2494/projects/3005/serving/inference/?apikey=308dc8e1-daea-4aad-9b29-5f607d189a37&prefix=models%252F' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'   \
--data '{
    "name": "tir-endpoint-060511131818",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "Phi-3-mini-128k-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
        "tokenizer": "microsoft/Phi-3-mini-128k-instruct"
    },
    "model_load_integration_id": 170
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "name": "tir-endpoint-060511131818",
    "server_version": "",
    "world_size": 1,
    "custom_endpoint_details": {
        "service_port": false,
        "metric_port": false,
        "container": {
            "container_name": "vllm/vllm-openai:latest",
            "container_type": "public",
            "private_image_details": {},
            "advance_config": {
                "image_pull_policy": "Always",
                "is_readiness_probe_enabled": false,
                "is_liveness_probe_enabled": false,
                "readiness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                },
                "liveness_probe": {
                    "protocol": "http",
                    "initial_delay_seconds": 10,
                    "success_threshold": 1,
                    "failure_threshold": 3,
                    "port": 8080,
                    "period_seconds": 10,
                    "timeout_seconds": 10,
                    "path": "/metrics",
                    "grpc_service": "",
                    "commands": ""
                }
            }
        },
        "resource_details": {
            "disk_size": 50,
            "mount_path": "",
            "env_variables": []
        },
        "public_ip": "no"
    },
    "model_id": null,
    "sku_id": 118,
    "replica": 1,
    "path": "",
    "framework": "Phi-3-mini-128k-instruct",
    "is_auto_scale_enabled": false,
    "detailed_info": {
        "commands": "",
        "args": "",
        "hugging_face_id": "microsoft/Phi-3-mini-128k-instruct",
        "tokenizer": "microsoft/Phi-3-mini-128k-instruct"
    },
    "model_load_integration_id": 170
}

Disable AutoScale and Replica Increment

To disable autoScale and replica increment, send a PUT request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}"

payload = json.dumps({
"action": "disable_auto_scale",
"replicas": 3
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("PUT", url, headers=headers, data=payload)

print(response.text)

curl --location -g --request PUT 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'  \
--data '{
    "action": "disable_auto_scale",
    "replicas": 3
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "action": "disable_auto_scale",
    "replicas": 3
}

Enable Autoscaling

To enable Autoscaling, send a PUT request to the Model Repository Endpoint

https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}

import requests
import json

url = "https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}"

payload = json.dumps({
"auto_scale_policy": {
    "min_replicas": 1,
    "max_replicas": 5,
    "rules": [
    {
        "metric": "cpu",
        "condition_type": "limit",
        "value": 12,
        "watch_period": 60
    }
    ],
    "stability_period": 300
},
"action": "update_auto_scale"
})
headers = {
'Authorization': 'Bearer {{Token}}',
'Content-Type': 'application/json',
}

response = requests.request("PUT", url, headers=headers, data=payload)

print(response.text)

curl --location -g --request PUT 'https://api.e2enetworks.com/myaccount/api/v1/gpu/teams/{{team_id}}/projects/{{project_id}}/serving/inference/1522/?apikey={{tapi_key}}' \
--header 'Authorization: Bearer {{Token}}' \
--header 'Content-Type: application/json'   \
--data '{
    "auto_scale_policy": {
        "min_replicas": 1,
        "max_replicas": 5,
        "rules": [
            {
                "metric": "cpu",
                "condition_type": "limit",
                "value": 12,
                "watch_period": 60
            }
        ],
        "stability_period": 300
    },
    "action": "update_auto_scale"
}'

Content-Type: application/json
Authorization: Bearer eyJhbGciOiJSUzI1NiIsInR5cCIgOiAi...

content-type: application/json; charset=utf-8
status: 202 Accepted
ratelimit-limit: 1200
ratelimit-remaining: 965
ratelimit-reset: 1415984218

{
    "auto_scale_policy": {
        "min_replicas": 1,
        "max_replicas": 5,
        "rules": [
            {
                "metric": "cpu",
                "condition_type": "limit",
                "value": 12,
                "watch_period": 60
            }
        ],
        "stability_period": 300
    },
    "action": "update_auto_scale"
}