llama.vscode/package.json

{
  "name": "llama-vscode",
  "displayName": "llama-vscode",
  "description": "Local LLM-assisted text completion using llama.cpp",
  "version": "0.0.32",
  "publisher": "ggml-org",
  "repository": "https://github.com/ggml-org/llama.vscode",
  "engines": {
    "vscode": "^1.100.0"
  },
  "icon": "llama.png",
  "activationEvents": [
    "onLanguage:plaintext",
    "onLanguage:javascript",
    "onLanguage:typescript",
    "onCommand.acceptFirstLine"
  ],
  "main": "./dist/extension.js",
  "contributes": {
    "viewsContainers": {
      "activitybar": [
        {
          "id": "llama-vscode-sidebar",
          "title": "llama-vscode",
          "icon": "resources/llama-sidebar.svg"
        }
      ]
    },
    "languages": [
      {
        "id": "plaintext",
        "aliases": [
          "Plain Text"
        ],
        "extensions": [
          ".txt"
        ]
      }
    ],
    "commands": [
      {
        "command": "extension.triggerInlineCompletion",
        "title": "llama-vscode: Trigger Inline Completion"
      },
      {
        "command": "extension.triggerNoCacheCompletion",
        "title": "llama-vscode: Trigger No Cache Completion"
      },
      {
        "command": "extension.copyIntercept",
        "title": "llama-vscode: Copy Intercept"
      },
      {
        "command": "extension.cutIntercept",
        "title": "llama-vscode: Cut Intercept"
      },
      {
        "command": "extension.acceptFirstLine",
        "title": "llama-vscode: Accept First Line"
      },
      {
        "command": "extension.acceptFirstWord",
        "title": "llama-vscode: Accept First Word"
      },
      {
        "command": "extension.copyChunks",
        "title": "llama-vscode: Copy Chunks"
      },
      {
        "command": "extension.showMenu",
        "title": "llama-vscode: Show Menu"
      },
      {
        "command": "extension.askAi",
        "title": "llama-vscode: Ask AI"
      },
      {
        "command": "extension.askAiWithContext",
        "title": "llama-vscode: Ask AI With Context"
      },
      {
        "command": "extension.editSelectedText",
        "title": "llama-vscode: Edit Selected Text with AI"
      },
      {
        "command": "extension.acceptTextEdit",
        "title": "llama-vscode: Accept Text Edit Suggestion"
      },
      {
        "command": "extension.rejectTextEdit",
        "title": "llama-vscode: Reject Text Edit Suggestion"
      },
      {
        "command": "extension.killAgent",
        "title": "llama-vscode: Kill Agent Session"
      },
      {
        "command": "extension.generateGitCommitMessage",
        "title": "llama-vscode: Generate Commit Message",
        "icon": "$(sparkle)"
      },
      {
        "command": "extension.showLlamaWebview",
        "title": "llama-vscode: Show Llama Agent",
        "icon": "$(window)"
      }
    ],
    "views": {
      "llama-vscode-sidebar": [
        {
          "id": "llama-vscode.webview",
          "name": "llama-vscode",
          "when": "true",
          "type": "webview",
          "icon": "resources/llama-sidebar.svg"
        }
      ]
    },
    "keybindings": [
      {
        "key": "tab",
        "command": "editor.action.inlineSuggest.commit",
        "when": "inlineSuggestionVisible"
      },
      {
        "command": "extension.triggerInlineCompletion",
        "key": "ctrl+l",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.triggerNoCacheCompletion",
        "key": "ctrl+shift+l",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.copyChunks",
        "key": "ctrl+shift+,",
        "when": "true"
      },
      {
        "command": "extension.copyIntercept",
        "key": "ctrl+c",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.cutIntercept",
        "key": "ctrl+x",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.acceptFirstLine",
        "key": "shift+tab",
        "when": "editorTextFocus && inlineSuggestionVisible"
      },
      {
        "command": "extension.acceptFirstWord",
        "key": "ctrl+right",
        "when": "editorTextFocus && inlineSuggestionVisible"
      },
      {
        "command": "extension.showMenu",
        "key": "ctrl+shift+m",
        "when": "true"
      },
      {
        "command": "extension.showLlamaWebview",
        "key": "ctrl+shift+a",
        "when": "true"
      },
      {
        "command": "extension.askAi",
        "key": "ctrl+;",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.askAiWithContext",
        "key": "ctrl+Shift+;",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.askAiWithTools",
        "key": "ctrl+Shift+t",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.editSelectedText",
        "key": "ctrl+shift+e",
        "when": "editorHasSelection"
      },
      {
        "command": "extension.acceptTextEdit",
        "key": "tab",
        "when": "editorTextFocus && textEditSuggestionVisible && resourceScheme == 'llama-suggestion'"
      },
      {
        "command": "extension.killAgent",
        "key": "ctrl+k ctrl+a",
        "when": "editorTextFocus"
      },
      {
        "command": "extension.rejectTextEdit",
        "key": "escape",
        "when": "editorTextFocus && textEditSuggestionVisible && resourceScheme == 'llama-suggestion'"
      }
    ],
    "configuration": {
      "type": "object",
      "title": "llama.vscode Configuration",
      "properties": {
        "llama-vscode.launch_completion": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting local fim llama.cpp server"
        },
        "llama-vscode.launch_chat": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting local chat llama.cpp server"
        },
        "llama-vscode.launch_embeddings": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting local embeddings llama.cpp server"
        },
        "llama-vscode.launch_tools": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting local lools llama.cpp server"
        },
        "llama-vscode.launch_training_completion": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting training a completion (fim) model from the menu"
        },
        "llama-vscode.launch_training_chat": {
          "type": "string",
          "default": "",
          "description": "Shell command for starting training a chat model from the menu"
        },
        "llama-vscode.lora_completion": {
          "type": "string",
          "default": "",
          "description": "Path to the lora adapter file for the completion model. If not empty it will be used (appends --lora lora_completion) on starting the completion server with launch_completion"
        },
        "llama-vscode.lora_chat": {
          "type": "string",
          "default": "",
          "description": "Path to the lora adapter file for the chat model. If not empty it will be used (appends --lora lora_chat) on starting the completion server with launch_chat"
        },
        "llama-vscode.endpoint": {
          "type": "string",
          "default": "",
          "description": "The URL to be used by the extension for code completion."
        },
        "llama-vscode.endpoint_chat": {
          "type": "string",
          "default": "",
          "description": "The URL to be used by the extension for chat with ai."
        },
        "llama-vscode.endpoint_tools": {
          "type": "string",
          "default": "",
          "description": "The URL to be used by the extension for chat with ai with tools use."
        },
        "llama-vscode.endpoint_embeddings": {
          "type": "string",
          "default": "",
          "description": "The URL to be used by the extension for creating embeddings."
        },
        "llama-vscode.new_completion_model_port": {
          "type": "number",
          "default": 8012,
          "description": "The port will be used when a new completion model is created."
        },
        "llama-vscode.new_chat_model_port": {
          "type": "number",
          "default": 8011,
          "description": "The port will be used when a new chat model is created."
        },
        "llama-vscode.new_embeddings_model_port": {
          "type": "number",
          "default": 8010,
          "description": "The port will be used when a new embeddings model is created."
        },
        "llama-vscode.new_tools_model_port": {
          "type": "number",
          "default": 8009,
          "description": "The port will be used when a new tools model is created."
        },
        "llama-vscode.new_completion_model_host": {
          "type": "string",
          "default": "127.0.0.1",
          "description": "The host will be used when a new completion model is created."
        },
        "llama-vscode.new_chat_model_host": {
          "type": "string",
          "default": "127.0.0.1",
          "description": "The host will be used when a new chat model is created."
        },
        "llama-vscode.new_embeddings_model_host": {
          "type": "string",
          "default": "127.0.0.1",
          "description": "The host will be used when a new embeddings model is created."
        },
        "llama-vscode.new_tools_model_host": {
          "type": "string",
          "default": "127.0.0.1",
          "description": "The host will be used when a new tools model is created."
        },
        "llama-vscode.ai_api_version": {
          "type": "string",
          "default": "v1",
          "description": "The version of the API of the model. It is appended to the endpoints for chat and tools"
        },
        "llama-vscode.ai_model": {
          "type": "string",
          "default": "google/gemini-2.5-flash",
          "description": "The model name. This is used in the request to the API. It is important when OpenRouter is used (for example google/gemini-2.5-flash)."
        },
        "llama-vscode.agents_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this agent to be shown to the user"
              },
              "description": {
                "type": "string",
                "description": "Description of the model - for what purposes should be used, what are his strengths, etc."
              },
              "systemInstruction": {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "description": "The system instructions for this agent",
                "default": ""
              },
              "tools": {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "description": "Tools, which will be used by default by this agent",
                "default": []
              }
            },
            "required": [
              "name",
              "system_instruction"
            ]
          },
          "default": [
            {
              "name": "llama-vscode help",
              "description": "This is an agent for helping how to use llama-vscode.",
              "systemInstruction": [
                "You are an agent for helping the user how to use llama-vscode.",
                "Use the available tools to get the help documentation for llama-vscode and answer the questions from the user.",
                "Base your answers on the help documentation from the tools."
              ],
              "tools": [
                "llama_vscode_help"
              ]
            },
            {
              "name": "default",
              "description": "This is the default agent.",
              "systemInstruction": [
                "You are an agent for software development - please keep going until the user’s query is completely resolved, before ending your turn and yielding back to the user.",
                "Only terminate your turn when you are sure that the problem is solved.",
                "If you are not sure about anything pertaining to the user’s request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.",
                "You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.",
                "Read the file content or a section of the file before editing a the file.",
                "",
                "# Workflow",
                "",
                "## High-Level Problem Solving Strategy",
                "",
                "1. Understand the problem deeply. Carefully read the issue and think critically about what is required.",
                "2. Investigate the codebase. Explore relevant files, search for key functions, and gather context.",
                "3. Develop a clear, step-by-step plan. Break down the fix into manageable, incremental steps.",
                "4. Implement the fix incrementally. Make small, testable code changes.",
                "5. Debug as needed. Use debugging techniques to isolate and resolve issues.",
                "6. Iterate until the root cause is fixed.",
                "7. Reflect and validate comprehensively.",
                "",
                "Refer to the detailed sections below for more information on each step.",
                "",
                "## 1. Deeply Understand the Problem",
                "Carefully read the issue and think hard about a plan to solve it before coding.",
                "",
                "## 2. Codebase Investigation",
                "- Explore relevant files and directories.",
                "- Search for key functions, classes, or variables related to the issue.",
                "- Read and understand relevant code snippets.",
                "- Identify the root cause of the problem.",
                "- Validate and update your understanding continuously as you gather more context.",
                "",
                "## 3. Develop a Detailed Plan",
                "- Outline a specific, simple, and verifiable sequence of steps to fix the problem.",
                "- Break down the fix into small, incremental changes.",
                "",
                "## 4. Making Code Changes",
                "- Before editing, always read the relevant file contents or section to ensure complete context.",
                "- If a patch is not applied correctly, attempt to reapply it.",
                "- Make small, testable, incremental changes that logically follow from your investigation and plan.",
                "",
                "## 5. Debugging",
                "- Make code changes only if you have high confidence they can solve the problem",
                "- When debugging, try to determine the root cause rather than addressing symptoms",
                "- Debug for as long as needed to identify the root cause and identify a fix",
                "- Use print statements, logs, or temporary code to inspect program state, including descriptive statements or error messages to understand what's happening",
                "- To test hypotheses, you can also add test statements or functions",
                "- Revisit your assumptions if unexpected behavior occurs.",
                "",
                "",
                "## 6. Final Verification",
                "- Confirm the root cause is fixed.",
                "- Review your solution for logic correctness and robustness.",
                "- Iterate until you are extremely confident the fix is complete.",
                "",
                "## 7. Final Reflection",
                "- If there are changed files, build the application to check for errors.",
                "- Reflect carefully on the original intent of the user and the problem statement.",
                "- Think about potential edge cases or scenarios.",
                "- Continue refining until you are confident the fix is robust and comprehensive.",
                ""
              ],
              "tools": [
                "run_terminal_command",
                "search_source",
                "read_file",
                "list_directory",
                "regex_search",
                "delete_file",
                "get_diff",
                "edit_file",
                "ask_user"
              ]
            }
          ],
          "description": "The list of the agents, which could be selected"
        },
        "llama-vscode.agent_commands": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this agent to be shown to the user"
              },
              "description": {
                "type": "string",
                "description": "Description of the model - for what purposes should be used, what are his strengths, etc."
              },
              "prompt": {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "description": "The prompt to be sent to the agent",
                "default": ""
              },
              "context": {
                "type": "array",
                "items": {
                  "type": "string"
                },
                "description": "Key words, which will be transformed to additional context for the agent",
                "default": []
              }
            },
            "required": [
              "name",
              "prompt"
            ]
          },
          "default": [
            {
              "name": "about",
              "description": "Reviews the project and provides information about it.",
              "prompt": [
                "What is this project about?",
                "Provide an overview of the project - purpose, architecture, language, etc."
              ],
              "context": []
            },
            {
              "name": "explain",
              "description": "Explains the attached code/file.",
              "prompt": [
                "Explain the provided source code."
              ],
              "context": []
            }
          ],
          "description": "The list of agent commands, which could be selected by the user"
        },
        "llama-vscode.completion_models_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this model to be shown to the user"
              },
              "endpoint": {
                "type": "string",
                "description": "The endpoint, from where to access the model",
                "default": ""
              },
              "aiModel": {
                "type": "string",
                "description": "The name of the AI model as expected by the provider",
                "default": ""
              },
              "isKeyRequired": {
                "type": "boolean",
                "description": "Is key requried for the endpoint",
                "default": false
              },
              "localStartCommand": {
                "type": "string",
                "description": "Command to be used for sterting the model locally.",
                "default": ""
              }
            },
            "required": [
              "name"
            ]
          },
          "default": [
            {
              "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
              "localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
              "endpoint": "http://localhost:8012",
              "aiModel": "",
              "isKeyRequired": false
            },
            {
              "name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
              "localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
              "endpoint": "http://localhost:8012",
              "aiModel": "",
              "isKeyRequired": false
            },
            {
              "name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
              "localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
              "endpoint": "http://localhost:8012",
              "aiModel": "",
              "isKeyRequired": false
            },
            {
              "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)",
              "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 --port 8012",
              "endpoint": "http://localhost:8012",
              "aiModel": "",
              "isKeyRequired": false
            }
          ],
          "description": "The list of the completion (FIM) models, which could be selected"
        },
        "llama-vscode.chat_models_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this model to be shown to the user"
              },
              "endpoint": {
                "type": "string",
                "description": "The endpoint, from where to access the model",
                "default": ""
              },
              "aiModel": {
                "type": "string",
                "description": "The name of the AI model as expected by the provider",
                "default": ""
              },
              "isKeyRequired": {
                "type": "boolean",
                "description": "Is key requried for the endpoint",
                "default": false
              },
              "localStartCommand": {
                "type": "string",
                "description": "Command to be used for sterting the model locally.",
                "default": ""
              }
            },
            "required": [
              "name"
            ]
          },
          "default": [
            {
              "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
              "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
              "endpoint": "http://127.0.0.1:8011"
            },
            {
              "name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
              "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
              "endpoint": "http://127.0.0.1:8011"
            },
            {
              "name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
              "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
              "endpoint": "http://127.0.0.1:8011"
            },
            {
              "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
              "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
              "endpoint": "http://127.0.0.1:8011"
            },
            {
              "name": "gemini qat tools",
              "localStartCommand": "llama-server -m c:\\ai\\gemma-3-4B-it-QAT-Q4_0.gguf --port 8011",
              "endpoint": "http://localhost:8011",
              "aiModel": "",
              "isKeyRequired": false
            },
            {
              "name": "OpenAI gpt-oss 20B",
              "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8011",
              "endpoint": "http://localhost:8011",
              "aiModel": "",
              "isKeyRequired": false
            }
          ],
          "description": "The list of chat models, which could be selected"
        },
        "llama-vscode.embeddings_models_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this model to be shown to the user"
              },
              "endpoint": {
                "type": "string",
                "description": "The endpoint, from where to access the model",
                "default": ""
              },
              "aiModel": {
                "type": "string",
                "description": "The name of the AI model as expected by the provider",
                "default": ""
              },
              "isKeyRequired": {
                "type": "boolean",
                "description": "Is key requried for the endpoint",
                "default": false
              },
              "localStartCommand": {
                "type": "string",
                "description": "Command to be used for sterting the model locally.",
                "default": ""
              }
            },
            "required": [
              "name"
            ]
          },
          "default": [
            {
              "name": "Nomic-Embed-Text-V2-GGUF",
              "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
              "endpoint": "http://127.0.0.1:8010"
            }
          ],
          "description": "The list of embeddings models, which could be selected"
        },
        "llama-vscode.tools_models_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name for this model to be shown to the user"
              },
              "endpoint": {
                "type": "string",
                "description": "The endpoint, from where to access the model",
                "default": ""
              },
              "aiModel": {
                "type": "string",
                "description": "The name of the AI model as expected by the provider",
                "default": ""
              },
              "isKeyRequired": {
                "type": "boolean",
                "description": "Is key requried for the endpoint",
                "default": false
              },
              "localStartCommand": {
                "type": "string",
                "description": "Command to be used for sterting the model locally.",
                "default": ""
              }
            },
            "required": [
              "name"
            ]
          },
          "default": [
            {
              "name": "OpenAI gpt-oss 20B (LOCAL) (> 19GB VRAM)",
              "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
              "endpoint": "http://localhost:8009",
              "aiModel": "",
              "isKeyRequired": false
            },
            {
              "name": "xAI: Grok 4 Fast (free for limited period), context: 2 000 000",
              "localStartCommand": "",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "x-ai/grok-4-fast:free"
            },
            {
              "name": "Sonoma Sky - 2,000,000 context $0/M input tokens $0/M output tokens as of 19.09.25 (OpenRouter)",
              "localStartCommand": "",
              "endpoint": "https://openrouter.ai/api",
              "aiModel": "openrouter/sonoma-sky-alpha",
              "isKeyRequired": true
            },
            {
              "name": "Sonoma Dusk - 2,000,000 context $0/M input tokens $0/M output tokens as of 19.09.25 (OpenRouter)",
              "localStartCommand": "",
              "endpoint": "https://openrouter.ai/api",
              "aiModel": "openrouter/sonoma-dusk-alpha",
              "isKeyRequired": true
            },
            {
              "name": "Z.AI: GLM 4.5 - 128000 context $0.60/M input tokens $2.20/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "z-ai/glm-4.5"
            },
            {
              "name": "Z.AI: GLM 4.5 Air - 128.000 context $0.20/M input tokens $1.10/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "z-ai/glm-4.5-air"
            },
            {
              "name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
            },
            {
              "name": "Qwen: Qwen3 Coder - 262K context $0.30/M input tokens $1.20/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "qwen/qwen3-coder"
            },
            {
              "name": "Qwen: Qwen3 235B A22B Instruct 2507 - 262K context $0.12/M input tokens $0.59/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "qwen/qwen3-235b-a22b-2507"
            },
            {
              "name": "MoonshotAI: Kimi K2 - 131K context $0.55/M input tokens $2.20/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "moonshotai/kimi-k2"
            },
            {
              "name": "Google: Gemini 2.5 Flash Lite - 1.05M context $0.10/M input tokens $0.40/M output tokens (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "google/gemini-2.5-flash-lite"
            },
            {
              "name": "Google: Gemini 2.5 Flash - 1.05M context $0.30/M input tokens $2.50/M output tokens $1.238/K input imgs (OpenRouter)",
              "endpoint": "https://openrouter.ai/api",
              "isKeyRequired": true,
              "aiModel": "google/gemini-2.5-flash"
            },
            {
              "name": "openai/gpt-oss-20b - 131K context, $0,04/M input tokens, $0,16/M output tokens (OpenRouter)",
              "localStartCommand": "",
              "endpoint": "https://openrouter.ai/api",
              "aiModel": "openai/gpt-oss-20b",
              "isKeyRequired": true
            },
            {
              "name": "OpenAI gpt-oss 120B - 131K context, $0,09/M input tokens, $0,45/M output tokens (OpenRouter)",
              "localStartCommand": "",
              "endpoint": "https://openrouter.ai/api",
              "aiModel": "openai/gpt-oss-120b",
              "isKeyRequired": true
            }
          ],
          "description": "The list of tools models, which could be selected"
        },
        "llama-vscode.envs_list": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "name": {
                "type": "string",
                "description": "Name of the env"
              },
              "description": {
                "type": "string",
                "description": "Description of the env"
              },
              "completion": {
                "type": "object",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Name for this model to be shown to the user"
                  },
                  "endpoint": {
                    "type": "string",
                    "description": "The endpoint, from where to access the model",
                    "default": ""
                  },
                  "aiModel": {
                    "type": "string",
                    "description": "The name of the AI model as expected by the provider",
                    "default": ""
                  },
                  "isKeyRequired": {
                    "type": "boolean",
                    "description": "Is key requried for the endpoint",
                    "default": false
                  },
                  "localStartCommand": {
                    "type": "string",
                    "description": "Command to be used for sterting the model locally.",
                    "default": ""
                  }
                },
                "required": [
                  "name"
                ]
              },
              "chat": {
                "type": "object",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Name for this model to be shown to the user"
                  },
                  "endpoint": {
                    "type": "string",
                    "description": "The endpoint, from where to access the model",
                    "default": ""
                  },
                  "aiModel": {
                    "type": "string",
                    "description": "The name of the AI model as expected by the provider",
                    "default": ""
                  },
                  "isKeyRequired": {
                    "type": "boolean",
                    "description": "Is key requried for the endpoint",
                    "default": false
                  },
                  "localStartCommand": {
                    "type": "string",
                    "description": "Command to be used for sterting the model locally.",
                    "default": ""
                  }
                },
                "required": [
                  "name"
                ]
              },
              "embeddings": {
                "type": "object",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Name for this model to be shown to the user"
                  },
                  "endpoint": {
                    "type": "string",
                    "description": "The endpoint, from where to access the model",
                    "default": ""
                  },
                  "aiModel": {
                    "type": "string",
                    "description": "The name of the AI model as expected by the provider",
                    "default": ""
                  },
                  "isKeyRequired": {
                    "type": "boolean",
                    "description": "Is key requried for the endpoint",
                    "default": false
                  },
                  "localStartCommand": {
                    "type": "string",
                    "description": "Command to be used for sterting the model locally.",
                    "default": ""
                  }
                },
                "required": [
                  "name"
                ]
              },
              "tools": {
                "type": "object",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Name for this model to be shown to the user"
                  },
                  "endpoint": {
                    "type": "string",
                    "description": "The endpoint, from where to access the model",
                    "default": ""
                  },
                  "aiModel": {
                    "type": "string",
                    "description": "The name of the AI model as expected by the provider",
                    "default": ""
                  },
                  "isKeyRequired": {
                    "type": "boolean",
                    "description": "Is key requried for the endpoint",
                    "default": false
                  },
                  "localStartCommand": {
                    "type": "string",
                    "description": "Command to be used for sterting the model locally.",
                    "default": ""
                  }
                },
                "required": [
                  "name"
                ]
              },
              "agent": {
                "type": "object",
                "properties": {
                  "name": {
                    "type": "string",
                    "description": "Name for this agent to be shown to the user"
                  },
                  "description": {
                    "type": "string",
                    "description": "Description of the model - for what purposes should be used, what are his strengths, etc."
                  },
                  "systemInstruction": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "The system instructions for this agent",
                    "default": ""
                  },
                  "tools": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Tools, which will be used by default by this agent",
                    "default": []
                  }
                },
                "required": [
                  "name"
                ]
              },
              "ragEnabled": {
                "type": "boolean",
                "default": true
              },
              "envStartLastUsed": {
                "type": "boolean",
                "default": false,
                "description": "If true - starts the last used env on startup."
              },
              "complEnabled": {
                "type": "boolean",
                "default": true,
                "description": "Enable/disable completions"
              }
            }
          },
          "default": [
            {
              "name": "Local, full package - min, gpt-oss 20B ( > 24GB VRAM | HD: 16 GB)",
              "description": "Everything local, gpt-oss 20B for agent",
              "completion": {
                "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "OpenAI gpt-oss 20B",
                "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
                "endpoint": "http://localhost:8009",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, full package - medium, gpt-oss 20B (> 34 GB VRAM | HD: 20 GB)",
              "description": "Everything local, gpt-oss 20B for agent",
              "completion": {
                "name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "OpenAI gpt-oss 20B",
                "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
                "endpoint": "http://localhost:8009",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, full package - max, gpt-oss 20B (>48GB VRAM | HD: 30 GB)",
              "description": "Everything local, gpt-oss 20B for agent",
              "completion": {
                "name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "OpenAI gpt-oss 20B",
                "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
                "endpoint": "http://localhost:8009",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, only completions - CPU (HD: 1.6 GB)",
              "description": "For laptops only with CPU, lightweight model for completion ",
              "completion": {
                "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "embeddings": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "tools": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, only completions (<= 8GB VRAM | HD: 1.6 GB) ",
              "description": "Only for code completions model Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
              "completion": {
                "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "embeddings": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "tools": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, only completions (<= 16GB VRAM | HD: 3,2 GB)",
              "description": "Only for completions, model Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM | HD: 3,2 GB)",
              "completion": {
                "name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "embeddings": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "tools": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, only completions (> 16GB VRAM)",
              "description": "Only for code completions, model Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
              "completion": {
                "name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM | HD: 8.1 GB)",
                "localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "embeddings": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "tools": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              }
            },
            {
              "name": "Local, only chat & edit (CPU Only | HD: 2.2 GB)",
              "description": "Only for chat with AI, model Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "",
                "localStartCommand": ""
              },
              "tools": {
                "name": "",
                "localStartCommand": ""
              }
            },
            {
              "name": "Local, only chat, chat with project context & edit (<= 16GB VRAM | HD: 4 GB)",
              "description": "Could be used for edit with AI, chat with AI, chat with AI with project context Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF + embeddings model (<= 16GB VRAM)",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "",
                "localStartCommand": ""
              }
            },
            {
              "name": "Local, only chat & edit (<= 8GB VRAM | HD: 1.65)",
              "description": "Only for chat with AI and edit with AI, Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "",
                "localStartCommand": ""
              },
              "tools": {
                "name": "",
                "localStartCommand": ""
              }
            },
            {
              "name": "Local, only chat, chat with project context & edit (> 16GB VRAM | HD: 8.6 GB)",
              "description": "Good for chat with AI, chat with AI with project context, edit Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF + embeddings model  (> 16GB VRAM)",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "",
                "localStartCommand": ""
              }
            },
            {
              "name": "Agent & chat (<= 16GB VRAM | HD: 3.8 GB) (requires OpenRouter API key)",
              "description": "Agent qwen 3 from OpenRouter (requires OpenRouter API key),  chat and edit with small models (<= 16GB VRAM) ",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
                "endpoint": "https://openrouter.ai/api",
                "isKeyRequired": true,
                "aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
              }
            },
            {
              "name": "Full package - min (<= 16GB VRAM | HD: 4 GB) (requires OpenRouter API key)",
              "description": "The minimal configuration for completions (local), chat (local) and agent (remote - OpenRouter), requires OpenRouter API key for agent",
              "completion": {
                "name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
                "endpoint": "https://openrouter.ai/api",
                "isKeyRequired": true,
                "aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
              }
            },
            {
              "name": "Full package - medium (<= 32GB VRAM | HD: 7.1 GB) (requires OpenRouter API key)",
              "description": "Agent qwen 3 from OpenRouter, completions & chat - medium size models, embeddings (<= 32GB VRAM))",
              "completion": {
                "name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
                "endpoint": "https://openrouter.ai/api",
                "isKeyRequired": true,
                "aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
              }
            },
            {
              "name": "Full package - max (>32 GB VRAM | HD: 17 GB) (requires OpenRouter API key)",
              "description": "Agent - qwen 3 from OpenRouter (API key required), completions, chat (>32 GB VRAM) ",
              "completion": {
                "name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
                "localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
                "endpoint": "http://localhost:8012",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
                "endpoint": "https://openrouter.ai/api",
                "isKeyRequired": true,
                "aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
              }
            },
            {
              "name": "OpenAI gpt-oss,  20B agent, chat - ( < 8GB VRAM | HD: 2.2 GB) (requires OpenRouter API key)",
              "description": "agent - Open AI gpt-oss 20GB from OpenRouter (requires API key), chat - small model (< 8GB VRAM)",
              "completion": {
                "name": "",
                "localStartCommand": ""
              },
              "chat": {
                "name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
                "localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
                "endpoint": "http://127.0.0.1:8011"
              },
              "embeddings": {
                "name": "Nomic-Embed-Text-V2-GGUF",
                "localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
                "endpoint": "http://127.0.0.1:8010"
              },
              "tools": {
                "name": "openai/gpt-oss-20b",
                "localStartCommand": "",
                "endpoint": "https://openrouter.ai/api",
                "aiModel": "openai/gpt-oss-20b",
                "isKeyRequired": true
              }
            },
            {
              "name": "Empty - no models",
              "description": "For cases when the settings (endpoint*, Launch_*, Api_key*,  Ai_model) are used for configuring which servers to be used by llama-vscode instead of env.",
              "completion": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "chat": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "embeddings": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              },
              "tools": {
                "name": "",
                "localStartCommand": "",
                "endpoint": "",
                "aiModel": "",
                "isKeyRequired": false
              }
            }
          ],
          "description": "The list of envs, which could be selected"
        },
        "llama-vscode.agent_rules": {
          "type": "string",
          "default": "",
          "description": "Path to a file (md format or plain text) with user defined rules for the agent, which will be appended to the system instructions. Path could be absolute or relative to the workspace root. Example: coding-rules.md (provided the file is in the project root) or /home/user/coding-rules.md"
        },
        "llama-vscode.env_start_last_used": {
          "type": "boolean",
          "default": false,
          "description": "If true - starts the last used env on startup."
        },
        "llama-vscode.ask_install_llamacpp": {
          "type": "boolean",
          "default": true,
          "description": "If true, on starting VS Code - installation of llama.cpp will be suggested."
        },
        "llama-vscode.ask_upgrade_llamacpp_hours": {
          "type": "number",
          "default": 24,
          "description": "How offen to ask the user to upgrade llama.cpp in hours."
        },
        "llama-vscode.env_start_last_used_confirm": {
          "type": "boolean",
          "default": true,
          "description": "If true, before starting the last used env, the user is asked for confirmation. Used only if env_start_last_used = true"
        },
        "llama-vscode.auto": {
          "type": "boolean",
          "default": true,
          "description": "If code completion should be triggered automatically (true) or only by pressing Ctrl+l."
        },
        "llama-vscode.api_key": {
          "type": "string",
          "default": "",
          "description": "llama.cpp completion server api key or OpenAI endpoint API key (optional)"
        },
        "llama-vscode.api_key_chat": {
          "type": "string",
          "default": "",
          "description": "llama.cpp chat server api key"
        },
        "llama-vscode.api_key_tools": {
          "type": "string",
          "default": "",
          "description": "llama.cpp AI with tools server api key"
        },
        "llama-vscode.api_key_embeddings": {
          "type": "string",
          "default": "",
          "description": "llama.cpp embeddings server api key"
        },
        "llama-vscode.self_signed_certificate": {
          "type": "string",
          "default": "",
          "description": "self-signed certificate file - path/to/cert.pem"
        },
        "llama-vscode.n_prefix": {
          "default": 256,
          "type": "number",
          "description": "number of lines before the cursor location to include in the local prefix"
        },
        "llama-vscode.n_suffix": {
          "type": "number",
          "default": 64,
          "description": "number of lines after  the cursor location to include in the local suffix"
        },
        "llama-vscode.n_predict": {
          "type": "number",
          "default": 128,
          "description": "max number of tokens to predict"
        },
        "llama-vscode.t_max_prompt_ms": {
          "type": "number",
          "default": 500,
          "description": "max alloted time for the prompt processing (TODO: not yet supported)"
        },
        "llama-vscode.t_max_predict_ms": {
          "type": "number",
          "default": 500,
          "description": "max alloted time for the prediction"
        },
        "llama-vscode.show_info": {
          "type": "boolean",
          "default": true,
          "description": "show extra info about the inference (false - disabled, true - show extra info in status line)"
        },
        "llama-vscode.max_line_suffix": {
          "type": "number",
          "default": 8,
          "description": "do not auto-trigger FIM completion if there are more than this number of characters to the right of the cursor"
        },
        "llama-vscode.max_cache_keys": {
          "type": "number",
          "default": 250,
          "description": "max number of cached completions to keep in result_cache"
        },
        "llama-vscode.ring_n_chunks": {
          "type": "number",
          "default": 16,
          "description": "max number of chunks to pass as extra context to the server (0 to disable)"
        },
        "llama-vscode.ring_chunk_size": {
          "type": "number",
          "default": 64,
          "description": "max size of the chunks (in number of lines). Note: adjust these numbers so that you don't overrun your context at ring_n_chunks = 64 and ring_chunk_size = 64 you need ~32k context"
        },
        "llama-vscode.ring_scope": {
          "type": "number",
          "default": 1024,
          "description": "the range around the cursor position (in number of lines) for gathering chunks after FIM"
        },
        "llama-vscode.ring_update_ms": {
          "type": "number",
          "default": 1000,
          "description": "how often to process queued chunks in normal mode"
        },
        "llama-vscode.rag_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable rag features - i.e. chat with AI with project context"
        },
        "llama-vscode.rag_chunk_max_chars": {
          "type": "number",
          "default": 2000,
          "description": "Max number of chars per RAG chunk"
        },
        "llama-vscode.rag_max_lines_per_chunk": {
          "type": "number",
          "default": 60,
          "description": "Max number of lines per RAG chunk"
        },
        "llama-vscode.rag_max_chars_per_chunk_line": {
          "type": "number",
          "default": 300,
          "description": "max chars for a chunk line, the rest of the line is cut"
        },
        "llama-vscode.rag_max_files": {
          "type": "number",
          "default": 10000,
          "description": "max files to index for RAG search, 0 to switch off indexing"
        },
        "llama-vscode.rag_max_chunks": {
          "type": "number",
          "default": 30000,
          "description": "max cunks for the RAG search"
        },
        "llama-vscode.rag_max_bm25_filter_chunks": {
          "type": "number",
          "default": 47,
          "description": "max RAG chunks to filter with BM25 algorithm"
        },
        "llama-vscode.rag_max_embedding_filter_chunks": {
          "type": "number",
          "default": 5,
          "description": "max RAG chunks to provide as context to the LLM"
        },
        "llama-vscode.rag_max_context_files": {
          "type": "number",
          "default": 3,
          "description": "max number of complete files to send as context to the LLM"
        },
        "llama-vscode.rag_max_context_file_chars": {
          "type": "number",
          "default": 5000,
          "description": "max chars for a context file. If the file is bigger it will be cut to avoid too big context."
        },
        "llama-vscode.tool_run_terminal_command_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool run_terminal_command"
        },
        "llama-vscode.tools_custom": {
          "type": "array",
          "description": "Array of tool definitions for REST requests to LLM",
          "items": {
            "type": "object",
            "description": "Tool details",
            "properties": {
              "enabled": {
                "type": "boolean",
                "default": true,
                "description": "If the tool is enabled or not"
              },
              "tool_function": {
                "type": "string",
                "default": "",
                "description": "The function, which will be executed on calling the tool."
              },
              "tool_function_desc": {
                "type": "string",
                "default": "",
                "description": "The function, which will generate the description to show when the tools is executed"
              },
              "tool": {
                "type": "object",
                "description": "Tool definition",
                "properties": {
                  "type": {
                    "type": "string",
                    "description": "Type of the tool",
                    "enum": [
                      "function"
                    ]
                  },
                  "function": {
                    "type": "object",
                    "description": "Function definition",
                    "properties": {
                      "name": {
                        "type": "string",
                        "description": "Name of the function"
                      },
                      "description": {
                        "type": "string",
                        "description": "Description of the function"
                      },
                      "parameters": {
                        "type": "object",
                        "description": "Function parameters schema",
                        "properties": {
                          "type": {
                            "type": "string",
                            "description": "Type of parameters object",
                            "enum": [
                              "object"
                            ]
                          },
                          "properties": {
                            "type": "object",
                            "description": "Function properties definition",
                            "additionalProperties": true
                          },
                          "required": {
                            "type": "array",
                            "description": "Required properties",
                            "items": {
                              "type": "string"
                            }
                          }
                        },
                        "required": [
                          "type",
                          "properties"
                        ]
                      },
                      "strict": {
                        "type": "boolean",
                        "description": "Whether to use strict validation"
                      }
                    },
                    "required": [
                      "name",
                      "description",
                      "parameters"
                    ]
                  }
                },
                "required": [
                  "type",
                  "function"
                ]
              }
            }
          },
          "default": []
        },
        "llama-vscode.context_custom": {
          "type": "object",
          "properties": {
            "get_list": {
              "type": "string",
              "default": "",
              "description": "The function to be executed to get the list of context objects (value | key). Function of type () => string[] "
            },
            "get_item_context": {
              "type": "string",
              "default": "",
              "description": "The function to get the item context from the stored item key/value. Async function of type (k:string, v:string) => string"
            }
          },
          "default": {
            "get_list": "",
            "get_item_context": ""
          }
        },
        "llama-vscode.tool_permit_some_terminal_commands": {
          "type": "boolean",
          "default": false,
          "description": "Permit AI to execute some safe terminal commands, which do not change the environment (no guarantee) "
        },
        "llama-vscode.tool_permit_file_changes": {
          "type": "boolean",
          "default": false,
          "description": "Permit AI to edit and delete files without user confirmation"
        },
        "llama-vscode.tool_search_source_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool search_source"
        },
        "llama-vscode.tool_read_file_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool read_file"
        },
        "llama-vscode.tool_list_directory_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool list_directory"
        },
        "llama-vscode.tool_regex_search_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool regex_search"
        },
        "llama-vscode.tool_delete_file_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool delete_file"
        },
        "llama-vscode.tool_get_diff_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool delete_file"
        },
        "llama-vscode.tool_edit_file_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool edit_file"
        },
        "llama-vscode.tool_ask_user_enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable tool ask_user"
        },
        "llama-vscode.tool_custom_tool_enabled": {
          "type": "boolean",
          "default": false,
          "description": "Enable/disable tool custom_tool"
        },
        "llama-vscode.tool_llama_vscode_help_enabled": {
          "type": "boolean",
          "default": false,
          "description": "Enable/disable tool llama-vscode_help"
        },
        "llama-vscode.tool_save_plan_enabled": {
          "type": "boolean",
          "default": false,
          "description": "Enable/disable tool llama-vscode_help"
        },
        "llama-vscode.tool_update_task_enabled": {
          "type": "boolean",
          "default": false,
          "description": "Enable/disable tool llama-vscode_help"
        },
        "llama-vscode.tool_custom_tool_description": {
          "type": "string",
          "default": "Use this tool to get information about ...",
          "description": "Description for the custom_tool, which will be sent to the AI. The result of the tool execution will be the content of file or web page from property custom_tool_source."
        },
        "llama-vscode.tool_custom_tool_source": {
          "type": "string",
          "default": "https://news.smol.ai/",
          "description": "The long name of a text file (for example c:\\ai\\llms_basics.txt) or URL of a web page (should start with 'http' i.e. https://news.test.com), which content will be returned by the custom_tool when called. Not all web pages are parsed correctly. "
        },
        "llama-vscode.tool_custom_eval_tool_enabled": {
          "type": "boolean",
          "default": false,
          "description": "Enable/disable tool custom_eval_tool"
        },
        "llama-vscode.tool_custom_eval_tool_description": {
          "type": "string",
          "default": "Use this tool to calculate an arithmetic expression.Example: '15 + 4' or '12/4'",
          "description": "Description for the custom_eval_tool, which will be sent to the AI. The result of the tool will be the result from the execution of the typescript code from setting custom_eval_tool_code. This is powerful, but could be security risk. Be careful."
        },
        "llama-vscode.tool_custom_eval_tool_property_description": {
          "type": "string",
          "default": "The arithmetic expression to be calculated. Example: '5 + 7' or '(3456*5678) - 256' ",
          "description": "The description of the property (input) for the custom_eval_tool. "
        },
        "llama-vscode.tool_custom_eval_tool_code": {
          "type": "string",
          "default": "function(input) { return eval(input); }",
          "description": "The javascript function to be executed when the tool is called. It should have one parameter of type string. When called, the parameter will be the value provided by the AI in the tool property. This is powerful, but could be security risk. Be careful."
        },
        "llama-vscode.tools_max_iterations": {
          "type": "number",
          "default": 20,
          "description": "Max number of iterations with AI when working with tools. If you are working with paid AI providers, big number here could result in higher costs."
        },
        "llama-vscode.chats_max_history": {
          "type": "number",
          "default": 50,
          "description": "Max number of chats to store in history. An old chat is removed if needed on adding a new chat"
        },
        "llama-vscode.chats_max_tokens": {
          "type": "number",
          "default": 64000,
          "description": "Max number of tokens per chat (1 token ~4 chars). If the chat is longer, the initial part will be summarized. This is approximate - the detection is by counting the chars in a chat (assuming 1 token is 4 chars)."
        },
        "llama-vscode.chats_summarize_old_msgs": {
          "type": "boolean",
          "default": false,
          "description": "If true - summarizes the old messages (keeps total chats_msgs_keep), when the chats_max_chars limit is reached, to reduce the context size. After the summarization, the first request could be very slow as the whole chat should be processed again (no chache reuse)."
        },
        "llama-vscode.chats_msgs_keep": {
          "type": "number",
          "default": 50,
          "description": "The number of messages to keep summarizing a chat."
        },
        "llama-vscode.tools_log_calls": {
          "type": "boolean",
          "default": false,
          "description": "Show the details about the tools calls in UI - arguments and results."
        },
        "llama-vscode.language": {
          "type": "string",
          "default": "en",
          "description": "language: bg - Bulgarian (Български), cn - Chinese (中文), en - English, fr - French (Français), de - German (Deutsch), ru - Russian (Русский), es - Spanish (Español)"
        },
        "llama-vscode.enabled": {
          "type": "boolean",
          "default": true,
          "description": "Enable/disable completions"
        },
        "llama-vscode.languageSettings": {
          "type": "object",
          "default": {
            "*": true
          },
          "additionalProperties": {
            "type": "boolean"
          },
          "description": "Enable/disable suggestions for specific languages"
        },
        "llama-vscode.use_openai_endpoint": {
          "type": "boolean",
          "default": false,
          "description": "[EXPERIMENTAL] Use OAI endpoint. Slow and poor quality - avoid using"
        },
        "llama-vscode.openai_client_model": {
          "type": "string",
          "default": "",
          "description": "The FIM friendly model supported by your OpenAI compatible endpoint to be used (e.g., Qwen2.5-Coder-14B-4-bit)"
        },
        "llama-vscode.openai_prompt_template": {
          "type": "string",
          "default": "<|fim_prefix|>{inputPrefix}{prompt}<|fim_suffix|>{inputSuffix}<|fim_middle|>",
          "description": "The prompt template to be used for the OpenAI compatible endpoint."
        }
      }
    },
    "menus": {
      "scm/title": [
        {
          "command": "extension.generateGitCommitMessage",
          "when": "scmProvider == git",
          "group": "navigation"
        }
      ],
      "editor/context": [
        {
          "command": "extension.editSelectedText",
          "when": "editorHasSelection",
          "group": "llama@1"
        },
        {
          "command": "extension.showLlamaWebview",
          "when": "editorHasSelection",
          "group": "llama@1"
        }
      ]
    }
  },
  "scripts": {
    "watch": "tsc -watch -p ./",
    "build-ui": "cd ui && npm install && npm run build",
    "dev-ui": "cd ui && npm install && npm run dev",
    "postinstall": "npm run build-ui",
    "test": "node ./dist/test/runTest.js",
    "compile": "tsc -p ./",
    "lint": "eslint --ext .ts,.tsx .",
    "format": "prettier --write --ignore-path .gitignore '**/*'"
  },
  "dependencies": {
    "axios": "^1.1.2",
    "globby": "^14.1.0",
    "ignore": "^7.0.4",
    "openai": "^4.80.1",
    "picomatch": "^4.0.2",
    "remark-gfm": "^4.0.1",
    "simple-git": "^3.28.0"
  },
  "devDependencies": {
    "@vscode/test-cli": "^0.0.11",
    "@babel/types": "^7.28.4",
    "@types/micromatch": "^4.0.9",
    "@types/mocha": "^10.0.10",
    "@types/node": "^18.0.0",
    "@types/picomatch": "^4.0.0",
    "@types/vscode": "^1.100.0",
    "@vscode/test-electron": "^2.5.2",
    "glob": "^11.0.3",
    "mocha": "^11.7.4",
    "typescript": "^4.8.0",
    "webpack": "^5.100.2",
    "webpack-cli": "^4.10.0"
  },
  "extensionDependencies": [
    "vscode.git"
  ]
}