Update device/nvidia-agx-thor/nvidia-diffusiongemma-26B-A4B-it-NVFP4.md
This commit is contained in:
parent
f6f6106973
commit
df42d35102
1 changed files with 30 additions and 1 deletions
|
|
@ -1,4 +1,6 @@
|
|||
- 20260622
|
||||
- 20260622 only for vllm/vllm-openai:gemma container
|
||||
|
||||
## serve
|
||||
|
||||
```bash
|
||||
sudo docker run --rm -it --name=vllm-diffusiongemma --gpus all --runtime=nvidia --ipc=host --network host \
|
||||
|
|
@ -31,4 +33,31 @@ vllm/vllm-openai:gemma \
|
|||
--override-generation-config '{"max_new_tokens": null}' \
|
||||
--mm-processor-kwargs '{"max_soft_tokens": 1120}' \
|
||||
--limit-mm-per-prompt '{"image": 7}'
|
||||
```
|
||||
|
||||
## bench
|
||||
```bash
|
||||
vllm bench serve \
|
||||
--model "/workspace/thor-wm/nvidia-diffusiongemma-26B-A4B-it-NVFP4" \
|
||||
--served-model-name "nvidia/diffusiongemma-26B-A4B-it-NVFP4" \
|
||||
--host localhost \
|
||||
--port 8002 \
|
||||
--dataset-name random \
|
||||
--random-input-len 1024 \
|
||||
--random-output-len 1024 \
|
||||
--num-prompts 5 \
|
||||
--max-concurrency 1
|
||||
```
|
||||
|
||||
```bash
|
||||
vllm bench serve \
|
||||
--model "/workspace/thor-wm/nvidia-diffusiongemma-26B-A4B-it-NVFP4" \
|
||||
--served-model-name "nvidia/diffusiongemma-26B-A4B-it-NVFP4" \
|
||||
--host localhost \
|
||||
--port 8002 \
|
||||
--dataset-name random \
|
||||
--random-input-len 1024 \
|
||||
--random-output-len 1024 \
|
||||
--num-prompts 32 \
|
||||
--max-concurrency 8
|
||||
```
|
||||
Loading…
Add table
Add a link
Reference in a new issue