runpod · promptless · May 7, 2026 · May 8, 2026 · promptless · May 8, 2026
diff --git a/runpodctl/reference/runpodctl-serverless.mdx b/runpodctl/reference/runpodctl-serverless.mdx
@@ -65,6 +65,10 @@ Create a new Serverless endpoint from a template or from a Hub repo:
 # Create from a template
 runpodctl serverless create --name "my-endpoint" --template-id "tpl_abc123"
 
+# Create from a template with model references
+runpodctl serverless create --template-id "tpl_abc123" --gpu-id ADA_24 \
+  --model-reference https://example.com/models/llama:v1
+
 # Create from a Hub repo
 runpodctl hub search vllm                                         # Find the hub ID
 runpodctl serverless create --hub-id cm8h09d9n000008jvh2rqdsmb --name "my-vllm"
@@ -159,6 +163,10 @@ Execution timeout in seconds. Jobs that exceed this duration are terminated. The
 Environment variable in `KEY=VALUE` format. Use multiple `--env` flags to set multiple variables. When deploying from `--hub-id`, these values override the Hub release defaults.
 </ResponseField>
 
+<ResponseField name="--model-reference" type="string">
+Model reference URL to attach to the endpoint. Use multiple `--model-reference` flags to attach multiple models. Only supported with `--template-id` (not `--hub-id`) and requires GPU compute type.
+</ResponseField>
+
 ### Update an endpoint
 
 Update endpoint configuration: