Add device/nvidia-agx-thor/cyankiwi-gemma-4-26B-A4B-it-AWQ-4bit.md
This commit is contained in:
parent
ae5fe10662
commit
f7859d5848
1 changed files with 51 additions and 0 deletions
|
|
@ -0,0 +1,51 @@
|
|||
- 20260622
|
||||
|
||||
## serve
|
||||
```bash
|
||||
vllm serve "/workspace/thor-wm/cyankiwi-gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--port "8001" \
|
||||
--host "0.0.0.0" \
|
||||
--served-model-name "cyankiwi/gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--max-model-len 262144 \
|
||||
--tensor-parallel-size 1 \
|
||||
--max-num-seqs 8 \
|
||||
--gpu-memory-utilization 0.01 \
|
||||
--kv-cache-memory-bytes 9g \
|
||||
--attention-backend TRITON_ATTN \
|
||||
--enable-chunked-prefil \
|
||||
--max-num-batched-tokens 16384 \
|
||||
--enable-prefix-caching \
|
||||
--reasoning-parser gemma4 \
|
||||
--enable-auto-tool-choice \
|
||||
--tool-call-parser gemma4 \
|
||||
--mm-processor-kwargs '{"max_soft_tokens": 1120}'
|
||||
```
|
||||
|
||||
## bench
|
||||
```bash
|
||||
vllm bench serve \
|
||||
--model "/workspace/thor-wm/cyankiwi-gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--served-model-name "cyankiwi/gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--host localhost \
|
||||
--port 8001 \
|
||||
--dataset-name random \
|
||||
--random-input-len 1024 \
|
||||
--random-output-len 1024 \
|
||||
--num-prompts 5 \
|
||||
--max-concurrency 1
|
||||
```
|
||||

|
||||
|
||||
```bash
|
||||
vllm bench serve \
|
||||
--model "/workspace/thor-wm/cyankiwi-gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--served-model-name "cyankiwi/gemma-4-26B-A4B-it-AWQ-4bit" \
|
||||
--host localhost \
|
||||
--port 8001 \
|
||||
--dataset-name random \
|
||||
--random-input-len 1024 \
|
||||
--random-output-len 1024 \
|
||||
--num-prompts 32 \
|
||||
--max-concurrency 8
|
||||
```
|
||||

|
||||
Loading…
Add table
Add a link
Reference in a new issue