diff --git a/runpodctl/reference/runpodctl-serverless.mdx b/runpodctl/reference/runpodctl-serverless.mdx index a09b2440..fe0cfcce 100644 --- a/runpodctl/reference/runpodctl-serverless.mdx +++ b/runpodctl/reference/runpodctl-serverless.mdx @@ -65,6 +65,10 @@ Create a new Serverless endpoint from a template or from a Hub repo: # Create from a template runpodctl serverless create --name "my-endpoint" --template-id "tpl_abc123" +# Create from a template with model references +runpodctl serverless create --template-id "tpl_abc123" --gpu-id ADA_24 \ + --model-reference https://example.com/models/llama:v1 + # Create from a Hub repo runpodctl hub search vllm # Find the hub ID runpodctl serverless create --hub-id cm8h09d9n000008jvh2rqdsmb --name "my-vllm" @@ -159,6 +163,10 @@ Execution timeout in seconds. Jobs that exceed this duration are terminated. The Environment variable in `KEY=VALUE` format. Use multiple `--env` flags to set multiple variables. When deploying from `--hub-id`, these values override the Hub release defaults. + +Model reference URL to attach to the endpoint. Use multiple `--model-reference` flags to attach multiple models. Only supported with `--template-id` (not `--hub-id`) and requires GPU compute type. + + ### Update an endpoint Update endpoint configuration: