v0.0.47

API key for getting models list, ultiline field for Edit with AI, qwen3.5 models added
- API key is used (if needed and provided) on getting the list of models for adding OpenAI compatible provider - Multiline field for Edit with AI - Qwen3.5 models (2B, 4B, 9B) added in the predefined list - good for tools and chat
2026-05-07 01:15:23 +00:00 · 2026-05-04 08:42:41 +03:00 · 2026-05-04 08:42:01 +03:00 · 2026-04-30 07:59:35 +03:00 · 2026-04-29 21:06:11 +03:00 · 2026-04-29 21:04:33 +03:00
16 changed files with 678 additions and 55 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "llama-vscode",
-  "version": "0.0.39",
+  "version": "0.0.45",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "llama-vscode",
-      "version": "0.0.39",
+      "version": "0.0.45",
      "hasInstallScript": true,
      "dependencies": {
        "axios": "^1.1.2",
@ -24,7 +24,7 @@
        "@types/mocha": "^10.0.10",
        "@types/node": "^18.0.0",
        "@types/picomatch": "^4.0.0",
-        "@types/vscode": "^1.100.0",
+        "@types/vscode": "^1.109.0",
        "@vscode/test-cli": "^0.0.11",
        "@vscode/test-electron": "^2.5.2",
        "esbuild": "^0.27.0",
@ -829,9 +829,9 @@
      "license": "MIT"
    },
    "node_modules/@types/vscode": {
-      "version": "1.103.0",
-      "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.103.0.tgz",
-      "integrity": "sha512-o4hanZAQdNfsKecexq9L3eHICd0AAvdbLk6hA60UzGXbGH/q8b/9xv2RgR7vV3ZcHuyKVq7b37IGd/+gM4Tu+Q==",
+      "version": "1.109.0",
+      "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.109.0.tgz",
+      "integrity": "sha512-0Pf95rnwEIwDbmXGC08r0B4TQhAbsHQ5UyTIgVgoieDe4cOnf92usuR5dEczb6bTKEp7ziZH4TV1TRGPPCExtw==",
      "dev": true,
      "license": "MIT"
    },
@ -1182,7 +1182,6 @@
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@ -1231,7 +1230,6 @@
      "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "fast-deep-equal": "^3.1.3",
        "fast-uri": "^3.0.1",
@ -1432,7 +1430,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "caniuse-lite": "^1.0.30001669",
        "electron-to-chromium": "^1.5.41",
@ -5516,7 +5513,6 @@
      "integrity": "sha512-NLhDfH/h4O6UOy+0LSso42xvYypClINuMNBVVzX4vX98TmTaTUxwRbXdhucbFMd2qLaCTcLq/PdYrvi8onw90w==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@discoveryjs/json-ext": "^0.5.0",
        "@webpack-cli/configtest": "^1.2.0",
--- a/package.json
+++ b/package.json
@ -2,11 +2,11 @@
  "name": "llama-vscode",
  "displayName": "llama-vscode",
  "description": "Local LLM-assisted text completion using llama.cpp",
-  "version": "0.0.43",
+  "version": "0.0.47",
  "publisher": "ggml-org",
  "repository": "https://github.com/ggml-org/llama.vscode",
  "engines": {
-    "vscode": "^1.100.0"
+    "vscode": "^1.109.0"
  },
  "icon": "llama.png",
  "activationEvents": [
@ -17,6 +17,13 @@
  ],
  "main": "./dist/extension.js",
  "contributes": {
+    "languageModelChatProviders": [
+      {
+        "vendor": "llama-vscode",
+        "displayName": "llama.vscode",
+        "managementCommand": "extension.showMenu"
+      }
+    ],
    "viewsContainers": {
      "activitybar": [
        {
@ -691,7 +698,6 @@
                "create_agent"
              ]
            }
-            
          ],
          "description": "The list of the agents, which could be selected"
        },
@ -1748,6 +1754,11 @@
          "default": true,
          "description": "If code completion should be triggered automatically (true) or only by pressing Ctrl+l."
        },
+        "llama-vscode.debounce_ms": {
+          "type": "number",
+          "default": 0,
+          "description": "Milliseconds to wait after the last keystroke before sending a completion request (0 = disabled). Useful on low-end hardware to avoid triggering inference on every keystroke."
+        },
        "llama-vscode.api_key": {
          "type": "string",
          "default": "",
@ -2259,7 +2270,7 @@
    "@types/mocha": "^10.0.10",
    "@types/node": "^18.0.0",
    "@types/picomatch": "^4.0.0",
-    "@types/vscode": "^1.100.0",
+    "@types/vscode": "^1.109.0",
    "@vscode/test-cli": "^0.0.11",
    "@vscode/test-electron": "^2.5.2",
    "esbuild": "^0.27.0",
--- a/resources/help.md
+++ b/resources/help.md
@ -51,7 +51,21 @@ https://github.com/user-attachments/assets/97bb1418-dcea-4a49-8332-13b2ab4da661



-![Code completion](https://private-user-images.githubusercontent.com/1991296/405712196-b19499d9-f50d-49d4-9dff-ff3e8ba23757.gif?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDY5NDc1NDEsIm5iZiI6MTc0Njk0NzI0MSwicGF0aCI6Ii8xOTkxMjk2LzQwNTcxMjE5Ni1iMTk0OTlkOS1mNTBkLTQ5ZDQtOWRmZi1mZjNlOGJhMjM3NTcuZ2lmP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI1MDUxMSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNTA1MTFUMDcwNzIxWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9NmZiMmI0NGYzNTkyZGZkMTM5Njk3M2NjZDFhMjFiNTFkMjVkMmY4MGQ5ZDQ2ZDQ0MDgzOWI2YjM5NTY0NzM2OSZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QifQ.P150YJh87_y1pin20aWIuKoPzivmDjZF0iAemQlk_ok)## Custom eval tool
+![Code completion](https://private-user-images.githubusercontent.com/1991296/405712196-b19499d9-f50d-49d4-9dff-ff3e8ba23757.gif?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDY5NDc1NDEsIm5iZiI6MTc0Njk0NzI0MSwicGF0aCI6Ii8xOTkxMjk2LzQwNTcxMjE5Ni1iMTk0OTlkOS1mNTBkLTQ5ZDQtOWRmZi1mZjNlOGJhMjM3NTcuZ2lmP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI1MDUxMSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNTA1MTFUMDcwNzIxWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9NmZiMmI0NGYzNTkyZGZkMTM5Njk3M2NjZDFhMjFiNTFkMjVkMmY4MGQ5ZDQ2ZDQ0MDgzOWI2YjM5NTY0NzM2OSZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QifQ.P150YJh87_y1pin20aWIuKoPzivmDjZF0iAemQlk_ok)## Copilot Chat Model Provider
+
+### Overview
+Llama-vscode could be used as a VS Code copilot chat model provider. With other words llama-vscode could provide models for the copilot. The provided models could be from local models or openrouter.com or other appliation, which servers the tools models for llama-vscode. This way you could automatically download and start locally models by llama.cpp and llama-vscode and use them with Copilot for free.
+
+### How to use it
+1. Select/Start tools model from llama-vscode (local or external)  
+<img width="485" height="875" alt="copilotSelectToolsModel" src="https://github.com/user-attachments/assets/caa33531-22f4-46dd-b429-7498c45c93e9" />
+  
+2. In VS Code Copilot show the models list -> Other Models -> Manage Models  
+<img width="1404" height="754" alt="CopilotManageModels" src="https://github.com/user-attachments/assets/dc861aa1-db86-46ff-83c1-98c7a435ad06" />
+  
+3. Make the models (all models available by the application serving the tools model are shown) you want to use visible (click on the left of the model name)  
+4. Select the desired model from Copilot and start using it
+## Custom eval tool

 ### Overview
 llama-vscode provides to the users the posibility to partially create their own tool. Custom eval tool is a simple one - has one parameters and and uses the provided by the user javascript function to calculate the result.
@ -204,14 +218,28 @@ Settings:
 <img width="580" height="779" alt="image" src="https://github.com/user-attachments/assets/bb29e0c8-85b4-4e7a-a3d9-f2d9a1679d3d" />


-## Version 0.0.40 is released (05.01.2025)
+## Version 0.0.45 is released (04.03.2026)
 ## What is new

-Generation of multiple completions in parallel:
- Setting max_parallel_completions determines how many completions to generate in parallel (default 3)
- Shortcuts - Alt+] - next completion, Alt+[ - previous completion
- Requires llama.cpp after December, 6, 2025 (commit c42712b) but is backword compatible (generates one completion for older versions)
- [More details](https://github.com/ggml-org/llama.vscode/wiki/Parallel-completions)
+- Configurable debounce for inline completion requests - setting debounce_ms. 
+llama-vscode will wait debounce_ms after a keystroke before sending a request to the LLM for inline code completion. If in the meantime there is another keystroke, the request for the previous keystroke is cancelled. Useful on low end hardware to avoid triggering code completion on every keystroke.
+
+- Notification "Extension is updated" is shown only on version change, not on every setting change (as was before)
+
+
+## Version 0.0.44 is released (03.03.2026)
+## What is new
+
+- Subagents implemented (with tool delegate_task) - now each agent, which has "Available as Subagent" checked could be used as a subagent
+
+- new agent - Unit Test Writer
+
+- new tool create_agent
+
+- new agent "Agent creator"
+
+- Files SOUL.md and USER.md (if available in the project root) will be added to the context
+

 ## Setup instructions for llama.cpp server

--- a/src/application.ts
+++ b/src/application.ts
@ -30,6 +30,7 @@ import { Agent, Chat, Env, LlmModel } from "./types";
 import { ModelType, PERSISTENCE_KEYS } from "./constants";
 import { ApiKeyService } from "./services/api-key-service";
 import { OpenAiCompModelStrategy } from "./services/openai-comp-model-strategy";
+import { LlamaChatModelProvider } from "./llama-chat-model-provider";

 export class Application {
    public static readonly emptyModel = {name: ""};
@ -63,6 +64,7 @@ export class Application {
    public agentCommandService: AgentCommandService
    public chatService: ChatService
    public apiKeyService: ApiKeyService
+    public llamaChatModelProvider: LlamaChatModelProvider

    private selectedComplModel: LlmModel = Application.emptyModel
    private selectedChatModel: LlmModel = Application.emptyModel
@ -105,6 +107,7 @@ export class Application {
        this.agentCommandService = new AgentCommandService(this)
        this.chatService = new ChatService(this) 
        this.apiKeyService = new ApiKeyService(this)
+        this.llamaChatModelProvider = new LlamaChatModelProvider(this);
    }

    public static getInstance(context: vscode.ExtensionContext): Application {
--- a/src/architect.ts
+++ b/src/architect.ts
@ -9,6 +9,7 @@ import { Utils } from './utils';
 import { Env, LlmModel } from './types';
 import { env } from 'process';
 import { PERSISTENCE_KEYS, SETTING_NAME_FOR_LIST, UiView } from './constants';
+import {LlamaChatModelProvider} from "./llama-chat-model-provider";

 export class Architect {
    private app: Application
@ -26,6 +27,14 @@ export class Architect {
            this.app.menu.showHowToUseLlamaVscode();
            this.app.persistence.setGlobalValue("isFirstStart", false)
        }
+        const currentVersion = vscode.extensions.getExtension('ggml-org.llama-vscode')?.packageJSON?.version as string | undefined;
+        const storedVersion = this.app.persistence.getGlobalValue(PERSISTENCE_KEYS.EXTENSION_VERSION) as string | undefined;
+        if (currentVersion && storedVersion && currentVersion !== storedVersion) {
+            vscode.window.showInformationMessage(this.app.configuration.getUiText(`llama-vscode extension is updated.`) ?? "");
+        }
+        if (currentVersion) {
+            this.app.persistence.setGlobalValue(PERSISTENCE_KEYS.EXTENSION_VERSION, currentVersion);
+        }
        await this.installUpgradeLlamaCpp(isFirstStart);
        if (this.app.configuration.env_start_last_used){
            let lastEnv = this.app.persistence.getValue("selectedEnv")
@ -70,7 +79,6 @@ export class Architect {
            if (this.app.configuration.isRagConfigChanged(event)) this.init();
            if (this.app.configuration.isToolChanged(event)) this.app.tools.init();
            if (this.app.configuration.isEnvViewSettingChanged(event)) this.app.llamaWebviewProvider.updateLlamaView();
-            vscode.window.showInformationMessage(this.app.configuration.getUiText(`llama-vscode extension is updated.`)??"");
        });
        context.subscriptions.push(configurationChangeDisp);
    }
@ -204,6 +212,22 @@ export class Architect {
        );
    }

+    registerLlavaVscodeModelProvider = (context: vscode.ExtensionContext) => {
+        // Register the llama.vscode language model chat provider for GitHub Copilot Chat
+        
+        context.subscriptions.push(vscode.lm.registerLanguageModelChatProvider(
+            'llama-vscode',
+            this.app.llamaChatModelProvider
+        ));
+        context.subscriptions.push(vscode.workspace.onDidChangeConfiguration((event) => {
+            if (event.affectsConfiguration('llama-vscode.endpoint_chat')
+                || event.affectsConfiguration('llama-vscode.endpoint_tools')
+                || event.affectsConfiguration('llama-vscode.ai_api_version')) {
+                this.app.llamaChatModelProvider.notifyModelsChanged();
+            }
+        }));
+    }
+
    registerGenarateCommitMsg = (context: vscode.ExtensionContext) => {
        const generateCommitCommand = vscode.commands.registerCommand(
            'extension.generateGitCommitMessage',
--- a/src/completion.ts
+++ b/src/completion.ts
@ -30,6 +30,15 @@ export class Completion {
            return null;
        }

+        // Debounce: wait for the user to pause typing before hitting the backend
+        if (context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic && this.app.configuration.debounce_ms > 0) {
+            await Utils.delay(this.app.configuration.debounce_ms);
+            if (token.isCancellationRequested) {
+                this.app.logger.addEventLog(group, "DEBOUNCE_CANCELLATION_RETURN", "")
+                return null;
+            }
+        }
+
        // Start only if the previous request is finiched
        while (this.isRequestInProgress) {
            await Utils.delay(this.app.configuration.DELAY_BEFORE_COMPL_REQUEST);
--- a/src/configuration.ts
+++ b/src/configuration.ts
@ -29,6 +29,7 @@ export class Configuration {
    new_embeddings_model_host = "127.0.0.1"
    new_tools_model_host = "127.0.0.1"
    auto = true;
+    debounce_ms = 0;
    api_key = "";
    api_key_chat = "";
    api_key_tools = "";
@ -196,6 +197,7 @@ export class Configuration {
        this.openai_client_model = String(config.get<string>("openai_client_model"));
        this.openai_prompt_template = String(config.get<string>("openai_prompt_template"));
        this.auto = Boolean(config.get<boolean>("auto"));
+        this.debounce_ms = Number(config.get<number>("debounce_ms"));
        this.api_key = String(config.get<string>("api_key"));
        this.api_key_chat = String(config.get<string>("api_key_chat"));
        this.api_key_tools = String(config.get<string>("api_key_tools"));
--- a/src/constants.ts
+++ b/src/constants.ts
@ -253,6 +253,7 @@ export const PERSISTENCE_KEYS = {
  SELECTED_CHAT: 'selectedChat' as const,
  SELECTED_AGENT: 'selectedAgent' as const,
  SELECTED_ENV: 'selectedEnv' as const,
+  EXTENSION_VERSION: 'extensionVersion' as const,
 } as const;

 export const SETTING_NAME_FOR_LIST = {
--- a/src/extension.ts
+++ b/src/extension.ts
@ -32,7 +32,10 @@ export function activate(context: vscode.ExtensionContext) {
    app.architect.registerWebviewProvider(context)
    app.architect.registerCommandSelectNextSuggestion(context)
    app.architect.registerCommandSelectPreviousSuggestion(context)
+    app.architect.registerLlavaVscodeModelProvider(context)
    app.architect.init()
+
+    
 }

 export async function deactivate() {
--- a/src/lists.ts
+++ b/src/lists.ts
@ -79,7 +79,36 @@ export const PREDEFINED_LISTS = new Map<string, any>([
              "endpoint": "http://127.0.0.1:8010"
            }
          ]],
-[PREDEFINED_LISTS_KEYS.TOOLS,  [
+[PREDEFINED_LISTS_KEYS.TOOLS,
+    [
+            {
+              "name": "Qwen3.5-2B-GGUF:Q8_0 (LOCAL) (CPU)",
+              "localStartCommand": "llama-server -hf unsloth/Qwen3.5-2B-GGUF:Q8_0 --jinja  -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
+              "endpoint": "http://localhost:8009",
+              "aiModel": "",
+              "isKeyRequired": false
+            },
+            {
+              "name": "Qwen3.5-2B-GGUF:Q8_0 (LOCAL) (VRAM>3GB)",
+              "localStartCommand": "llama-server -hf unsloth/Qwen3.5-2B-GGUF:Q8_0 --jinja -ngl 99  -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
+              "endpoint": "http://localhost:8009",
+              "aiModel": "",
+              "isKeyRequired": false
+            },
+            {
+              "name": "Qwen3.5-4B-GGUF:Q8_0 (LOCAL) (VRAM>6GB)",
+              "localStartCommand": "llama-server -hf unsloth/Qwen3.5-4B-GGUF:Q8_0 --jinja -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
+              "endpoint": "http://localhost:8009",
+              "aiModel": "",
+              "isKeyRequired": false
+            },
+            {
+              "name": "Qwen3.5-9B-GGUF:Q8_0 (LOCAL) (VRAM>12GB)",
+              "localStartCommand": "llama-server -hf unsloth/Qwen3.5-9B-GGUF:Q8_0 --jinja -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
+              "endpoint": "http://localhost:8009",
+              "aiModel": "",
+              "isKeyRequired": false
+            },
            {
              "name": "OpenAI gpt-oss 20B (LOCAL) (> 19GB VRAM)",
              "localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
--- a/src/llama-chat-model-provider.ts
+++ b/src/llama-chat-model-provider.ts
@ -0,0 +1,237 @@
+import * as vscode from 'vscode';
+import axios from 'axios';
+import { Application } from './application';
+import { Utils } from './utils';
+
+const VENDOR = 'llama-vscode';
+
+// Default token limits used when the server does not report them
+const DEFAULT_MAX_INPUT_TOKENS = 8192;
+const DEFAULT_MAX_OUTPUT_TOKENS = 4096;
+
+interface OpenAIModel {
+    id: string;
+    object?: string;
+}
+
+interface OpenAIModelsResponse {
+    data: OpenAIModel[];
+}
+
+export class LlamaChatModelProvider implements vscode.LanguageModelChatProvider {
+    private readonly _onDidChangeLanguageModelChatInformation = new vscode.EventEmitter<void>();
+    readonly onDidChangeLanguageModelChatInformation: vscode.Event<void> =
+        this._onDidChangeLanguageModelChatInformation.event;
+
+    constructor(private readonly app: Application) {}
+
+    /** Called by the configuration change handler to notify VS Code that models may have changed. */
+    notifyModelsChanged(): void {
+        this._onDidChangeLanguageModelChatInformation.fire();
+    }
+
+    async provideLanguageModelChatInformation(
+        _options: vscode.PrepareLanguageModelChatModelOptions,
+        _token: vscode.CancellationToken
+    ): Promise<vscode.LanguageModelChatInformation[]> {
+        const endpoint = this.getChatEndpoint();
+        if (!endpoint) {
+            return [];
+        }
+
+        try {
+            const requestConfig = this.app.configuration.axiosRequestConfigChat;
+            const response = await axios.get<OpenAIModelsResponse>(
+                `${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/models`,
+                requestConfig
+            );
+
+            if (!response.data?.data?.length) {
+                return [];
+            }
+
+            return response.data.data.map((model) => ({
+                id: model.id,
+                name: model.id,
+                family: VENDOR,
+                version: '1',
+                maxInputTokens: DEFAULT_MAX_INPUT_TOKENS,
+                maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
+                capabilities: {
+                    toolCalling: true,
+                    imageInput: false,
+                },
+            }));
+        } catch {
+            return [];
+        }
+    }
+
+    async provideLanguageModelChatResponse(
+        model: vscode.LanguageModelChatInformation,
+        messages: readonly vscode.LanguageModelChatRequestMessage[],
+        options: vscode.ProvideLanguageModelChatResponseOptions,
+        progress: vscode.Progress<vscode.LanguageModelResponsePart>,
+        token: vscode.CancellationToken
+    ): Promise<void> {
+        const endpoint = this.getChatEndpoint();
+        if (!endpoint) {
+            throw new Error('No chat endpoint configured');
+        }
+
+        const openaiMessages = messages.map((msg) => ({
+            role: msg.role === vscode.LanguageModelChatMessageRole.User ? 'user' : 'assistant',
+            content: msg.content
+                .map((part: unknown) =>
+                    part instanceof vscode.LanguageModelTextPart ? part.value : ''
+                )
+                .join(''),
+        }));
+
+        const tools = options.tools?.map((t: vscode.LanguageModelToolInformation) => ({
+            type: 'function',
+            function: {
+                name: t.name,
+                description: t.description,
+                parameters: t.inputSchema,
+            },
+        }));
+
+        const requestBody: Record<string, unknown> = {
+            model: model.id,
+            messages: openaiMessages,
+            stream: true,
+            max_tokens: DEFAULT_MAX_OUTPUT_TOKENS,
+            ...(options.modelOptions?.temperature !== undefined && {
+                temperature: options.modelOptions.temperature,
+            }),
+            ...(tools?.length && { tools }),
+        };
+
+        const abortController = new AbortController();
+        token.onCancellationRequested(() => abortController.abort());
+
+        const requestConfig = this.app.configuration.axiosRequestConfigTools;
+        const streamResponse = await axios.post<NodeJS.ReadableStream>(
+            `${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/chat/completions`,
+            requestBody,
+            { ...requestConfig, responseType: 'stream' as const, signal: abortController.signal }
+        );
+
+        await new Promise<void>((resolve, reject) => {
+            const readable = streamResponse.data;
+            let buffer = '';
+            // Accumulated tool call data indexed by call index
+            const toolCalls: { id: string; name: string; arguments: string }[] = [];
+
+            const finalize = () => {
+                // Emit any completed tool calls that weren't emitted yet
+                for (const tc of toolCalls) {
+                    if (tc.id && tc.name) {
+                        try {
+                            progress.report(
+                                new vscode.LanguageModelToolCallPart(tc.id, tc.name, JSON.parse(tc.arguments || '{}'))
+                            );
+                        } catch (e) {
+                            console.warn('[llama-vscode] Failed to parse tool call arguments:', e);
+                        }
+                    }
+                }
+                resolve();
+            };
+
+            token.onCancellationRequested(() => {
+                (readable as any).destroy?.();
+                resolve();
+            });
+
+            readable.on('data', (chunk: Buffer) => {
+                buffer += chunk.toString('utf8');
+                const lines = buffer.split(/\r?\n/);
+                buffer = lines.pop() ?? '';
+
+                for (const line of lines) {
+                    const trimmed = line.trim();
+                    if (!trimmed || !trimmed.startsWith('data:')) {
+                        continue;
+                    }
+                    const payload = trimmed.slice(5).trim();
+                    if (payload === '[DONE]') {
+                        finalize();
+                        readable.removeAllListeners();
+                        return;
+                    }
+                    try {
+                        const json = JSON.parse(payload);
+                        const choice = json.choices?.[0];
+                        if (!choice) {
+                            continue;
+                        }
+                        const delta = choice.delta ?? {};
+                        if (typeof delta.content === 'string' && delta.content) {
+                            progress.report(new vscode.LanguageModelTextPart(delta.content));
+                        }
+                        if (Array.isArray(delta.tool_calls)) {
+                            for (const tc of delta.tool_calls) {
+                                const idx: number = typeof tc.index === 'number' ? tc.index : 0;
+                                if (!toolCalls[idx]) {
+                                    toolCalls[idx] = { id: '', name: '', arguments: '' };
+                                }
+                                if (tc.id) {
+                                    toolCalls[idx].id = tc.id;
+                                }
+                                if (tc.function?.name) {
+                                    toolCalls[idx].name = tc.function.name;
+                                }
+                                if (tc.function?.arguments) {
+                                    toolCalls[idx].arguments += tc.function.arguments;
+                                }
+                            }
+                        }
+                    } catch {
+                        // Skip malformed SSE chunks
+                    }
+                }
+            });
+
+            readable.on('end', () => {
+                finalize();
+            });
+
+            readable.on('error', (err: Error) => {
+                reject(err);
+            });
+        });
+    }
+
+    provideTokenCount(
+        _model: vscode.LanguageModelChatInformation,
+        text: string | vscode.LanguageModelChatRequestMessage,
+        _token: vscode.CancellationToken
+    ): Thenable<number> {
+        const content =
+            typeof text === 'string'
+                ? text
+                : text.content
+                      .map((p: unknown) => (p instanceof vscode.LanguageModelTextPart ? p.value : ''))
+                      .join('');
+        // Rough approximation: 1 token ≈ 4 characters. The llama.cpp server does not expose a
+        // tokenization endpoint via the standard OpenAI API, so we use this heuristic.
+        // Actual token counts may differ depending on the model's tokenizer.
+        return Promise.resolve(Math.ceil(content.length / 4));
+    }
+
+    private getChatEndpoint(): string {
+        const selectedModel = this.app.getToolsModel();
+        if (selectedModel?.endpoint) {
+            return selectedModel.endpoint;
+        }
+        if (this.app.configuration.endpoint_chat) {
+            return this.app.configuration.endpoint_chat;
+        }
+        if (this.app.configuration.endpoint_tools) {
+            return this.app.configuration.endpoint_tools;
+        }
+        return '';
+    }
+}
--- a/src/services/model-service.ts
+++ b/src/services/model-service.ts
@ -195,6 +195,17 @@ export class ModelService {
        await details.killCmd();
        if (model.localStartCommand) await details.shellCmd(this.sanitizeCommand(model.localStartCommand ?? ""));
        await this.app.persistence.setValue(this.getSelectedProp(type), model);
+        if (type == ModelType.Tools && model?.isKeyRequired !== undefined && model.isKeyRequired){
+            const apiKey = this.app.persistence.getApiKey(model.endpoint??"");
+            if (apiKey){
+                this.app.configuration.axiosRequestConfigTools = {
+                    headers: {
+                        Authorization: `Bearer ${apiKey}`,
+                        "Content-Type": "application/json",
+                    },
+                }
+            }
+        }
    }

    public async addModel(type: ModelType, kind: 'local' | 'external' | 'hf' | 'oaiComp'): Promise<void> {
--- a/src/services/openai-comp-model-strategy.ts
+++ b/src/services/openai-comp-model-strategy.ts
@ -44,13 +44,13 @@ export class OpenAiCompModelStrategy implements IAddStrategy {
                    prompt: 'example: http://localhost:8080 or https://openrauter.ai/api'
                })??""
                isKeyRequired = await Utils.confirmAction(`Is API key required for this endpoint (${endpoint})?`, "");
-            } 
+            }
            if (!endpoint){
                vscode.window.showWarningMessage("Endpoint is not provided!")
                return;
            }
            const providerModels: QuickPickItem[] = [];
-            const models = await this.getModels(endpoint);
+            const models = await this.getModels(endpoint, isKeyRequired);
            if (models.length == 0) {
                vscode.window.showInformationMessage("No models are found.")
                return
@ -108,30 +108,50 @@ export class OpenAiCompModelStrategy implements IAddStrategy {
        }
    }

-    private async getModels(endpoint: string): Promise<OpenAiCompModel[]> {
-        let hfEndpoint = Utils.trimTrailingSlash(endpoint) +"/v1/models";
+    private async getModels(endpoint: string, isKeyRequired: boolean): Promise<OpenAiCompModel[]> {
+        const hfEndpoint = Utils.trimTrailingSlash(endpoint) + "/v1/models";
+
+        // Create a request configuration
+        let requestConfig: any = {};
+
+        if (isKeyRequired) {
+            // We get the saved key for this specific endpoint
+            const apiKey = this.app.persistence.getApiKey(endpoint);
+            if (apiKey) {
+                requestConfig = {
+                    headers: {
+                        'Authorization': `Bearer ${apiKey}`,
+                        'Content-Type': 'application/json'
+                    }
+                };
+            }
+        }
+
        try {
-            let result = await axios.default.get(
-                `${Utils.trimTrailingSlash(hfEndpoint)}`
+            const result = await axios.default.get(
+                `${Utils.trimTrailingSlash(hfEndpoint)}`,
+                requestConfig
            );
+
            let models: OpenAiCompModel[] = [];
-        
-            let modelsList: OpenAiCompModel[] = []
-            if (result && result.data && result.data.models) modelsList = result.data.models
-            else if (result && result.data && result.data.data) modelsList = result.data.data
-            if (modelsList.length > 0){
-                for(let mdl of modelsList){
-                    models.push(mdl)
+            let modelsList: OpenAiCompModel[] = [];
+
+            if (result && result.data && result.data.models) modelsList = result.data.models;
+            else if (result && result.data && result.data.data) modelsList = result.data.data;
+
+            if (modelsList.length > 0) {
+                for (let mdl of modelsList) {
+                    models.push(mdl);
                }
            }
-            
+
            return models;
-        } catch (error){
-            vscode.window.showErrorMessage("Error getting provider models): " + error)
+        } catch (error) {
+            vscode.window.showErrorMessage("Error getting provider models: " + error);
            return [];
        }
    }
-    
+
    private sanitizeInput(input: string): string {
        return input ? input.trim() : '';
    }
--- a/src/text-editor.ts
+++ b/src/text-editor.ts
@ -26,6 +26,192 @@ export class TextEditor {
        vscode.commands.executeCommand('setContext', 'textEditSuggestionVisible', visible);
    }

+    private escapeWebviewAttr(value: string): string {
+        return value
+            .replace(/&/g, '&amp;')
+            .replace(/"/g, '&quot;')
+            .replace(/'/g, '&#39;')
+            .replace(/</g, '&lt;');
+    }
+
+    /**
+     * Multiline instructions (webview); resolves undefined if cancelled or closed.
+     */
+    private showMultilineEditPrompt(): Promise<string | undefined> {
+        const title =
+            this.app.configuration.getUiText('How would you like to modify the selected text?') ??
+            'How would you like to modify the selected text?';
+        const placeholder =
+            this.app.configuration.getUiText('Enter your instructions for editing the text...') ??
+            'Enter your instructions for editing the text...';
+        const submitLabel = this.app.configuration.getUiText('Submit') ?? 'Submit';
+        const cancelLabel = this.app.configuration.getUiText('Cancel') ?? 'Cancel';
+        const emptyHint =
+            this.app.configuration.getUiText('Please enter editing instructions.') ??
+            'Please enter editing instructions.';
+
+        return new Promise((resolve) => {
+            let settled = false;
+            const panel = vscode.window.createWebviewPanel(
+                'editWithAiMultilinePrompt',
+                title,
+                { viewColumn: vscode.ViewColumn.Beside, preserveFocus: false },
+                { enableScripts: true }
+            );
+
+            const finish = (value: string | undefined) => {
+                if (settled) {
+                    return;
+                }
+                settled = true;
+                resolve(value);
+                panel.dispose();
+            };
+
+            const cspSource = panel.webview.cspSource;
+            panel.webview.html = `<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src ${cspSource} 'unsafe-inline'; script-src 'unsafe-inline' ${cspSource};">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <style>
+        body {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 12px;
+            height: 100vh;
+            display: flex;
+            flex-direction: column;
+            font-family: var(--vscode-font-family);
+            font-size: var(--vscode-font-size);
+            color: var(--vscode-foreground);
+            background-color: var(--vscode-editor-background);
+        }
+        label {
+            margin-bottom: 8px;
+        }
+        textarea {
+            flex: 1;
+            min-height: 120px;
+            resize: vertical;
+            padding: 8px;
+            border: 1px solid var(--vscode-input-border);
+            background: var(--vscode-input-background);
+            color: var(--vscode-input-foreground);
+            font-family: var(--vscode-editor-font-family);
+            font-size: var(--vscode-editor-font-size);
+        }
+        textarea:focus {
+            outline: 1px solid var(--vscode-focusBorder);
+        }
+        .actions {
+            margin-top: 12px;
+            display: flex;
+            gap: 8px;
+            justify-content: flex-end;
+        }
+        /* DOM order is Submit then Cancel (Tab: textarea → Submit → Cancel); flex order keeps Cancel left, Submit right. */
+        .actions .secondary {
+            order: 1;
+        }
+        .actions .primary {
+            order: 2;
+        }
+        button {
+            padding: 6px 14px;
+            border: none;
+            cursor: pointer;
+            font-size: var(--vscode-font-size);
+        }
+        .primary {
+            background: var(--vscode-button-background);
+            color: var(--vscode-button-foreground);
+        }
+        .primary:hover {
+            background: var(--vscode-button-hoverBackground);
+        }
+        .secondary {
+            background: var(--vscode-button-secondaryBackground);
+            color: var(--vscode-button-secondaryForeground);
+        }
+        .secondary:hover {
+            background: var(--vscode-button-secondaryHoverBackground);
+        }
+    </style>
+</head>
+<body>
+    <label for="prompt">${this.escapeWebviewAttr(title)}</label>
+    <textarea id="prompt" placeholder="${this.escapeWebviewAttr(placeholder)}" autofocus></textarea>
+    <div class="actions">
+        <button type="button" class="primary" id="submit">${this.escapeWebviewAttr(submitLabel)}</button>
+        <button type="button" class="secondary" id="cancel">${this.escapeWebviewAttr(cancelLabel)}</button>
+    </div>
+    <script>
+        const vscode = acquireVsCodeApi();
+        const ta = document.getElementById('prompt');
+        function focusPrompt() {
+            if (!ta) {
+                return;
+            }
+            ta.focus();
+            const len = ta.value.length;
+            ta.setSelectionRange(len, len);
+        }
+        window.addEventListener('load', focusPrompt);
+        requestAnimationFrame(focusPrompt);
+        setTimeout(focusPrompt, 0);
+        setTimeout(focusPrompt, 100);
+        window.addEventListener('message', (event) => {
+            const data = event.data;
+            if (data && data.command === 'focusPrompt') {
+                focusPrompt();
+            }
+        });
+        document.getElementById('submit').addEventListener('click', () => {
+            vscode.postMessage({ command: 'submit', text: ta.value });
+        });
+        document.getElementById('cancel').addEventListener('click', () => {
+            vscode.postMessage({ command: 'cancel' });
+        });
+    </script>
+</body>
+</html>`;
+
+            const requestPromptFocus = () => {
+                void panel.webview.postMessage({ command: 'focusPrompt' });
+            };
+            panel.onDidChangeViewState((e) => {
+                if (e.webviewPanel.visible) {
+                    requestPromptFocus();
+                }
+            });
+            requestPromptFocus();
+            setTimeout(requestPromptFocus, 50);
+            setTimeout(requestPromptFocus, 200);
+
+            panel.webview.onDidReceiveMessage((message) => {
+                if (message.command === 'submit') {
+                    const text = typeof message.text === 'string' ? message.text : '';
+                    if (!text.trim()) {
+                        void vscode.window.showInformationMessage(emptyHint);
+                        return;
+                    }
+                    finish(text);
+                } else if (message.command === 'cancel') {
+                    finish(undefined);
+                }
+            });
+
+            panel.onDidDispose(() => {
+                if (!settled) {
+                    settled = true;
+                    resolve(undefined);
+                }
+            });
+        });
+    }
+
    async showEditPrompt(editor: vscode.TextEditor) {
        let chatUrl = this.app.configuration.endpoint_chat
        if (!chatUrl) chatUrl = this.app.configuration.endpoint_tools; 
@ -64,12 +250,7 @@ export class TextEditor {
        const contextRange = new vscode.Range(startLine, 0, endLine, editor.document.lineAt(endLine).text.length);
        const context = editor.document.getText(contextRange);
        
-        // Create and show input box
-        const prompt = await vscode.window.showInputBox({
-            placeHolder: 'Enter your instructions for editing the text...',
-            prompt: 'How would you like to modify the selected text?',
-            ignoreFocusOut: true
-        });
+        const prompt = await this.showMultilineEditPrompt();

        if (!prompt) {
            return;
--- a/src/vscode-lm-chat-shim.d.ts
+++ b/src/vscode-lm-chat-shim.d.ts
@ -0,0 +1,77 @@
+// Temporary shim for VS Code LM chat-provider typings.
+// Some @types/vscode versions ship parts of the LM API behind proposal typings.
+// This keeps `tsc` happy while still targeting the runtime VS Code API.
+
+import type * as vscode from 'vscode';
+
+declare module 'vscode' {
+	// eslint-disable-next-line @typescript-eslint/no-namespace
+	export namespace lm {
+		function registerLanguageModelChatProvider(
+			vendor: string,
+			provider: LanguageModelChatProvider
+		): vscode.Disposable;
+	}
+
+	export interface PrepareLanguageModelChatModelOptions {}
+
+	export interface LanguageModelChatCapabilities {
+		toolCalling?: boolean;
+		imageInput?: boolean;
+	}
+
+	export interface LanguageModelChatInformation {
+		id: string;
+		name: string;
+		family?: string;
+		version?: string;
+		maxInputTokens?: number;
+		maxOutputTokens?: number;
+		capabilities?: LanguageModelChatCapabilities;
+	}
+
+	export type LanguageModelChatMessagePart = unknown | LanguageModelTextPart;
+
+	export interface LanguageModelChatRequestMessage {
+		role: LanguageModelChatMessageRole;
+		content: readonly LanguageModelChatMessagePart[];
+	}
+
+	export interface ProvideLanguageModelChatResponseOptions {
+		tools?: readonly LanguageModelToolInformation[];
+		modelOptions?: {
+			temperature?: number;
+			[key: string]: unknown;
+		};
+		[key: string]: unknown;
+	}
+
+	export type LanguageModelResponsePart =
+		| LanguageModelTextPart
+		| LanguageModelToolCallPart
+		| unknown;
+
+	export interface LanguageModelChatProvider {
+		onDidChangeLanguageModelChatInformation?: vscode.Event<void>;
+
+		provideLanguageModelChatInformation(
+			options: PrepareLanguageModelChatModelOptions,
+			token: vscode.CancellationToken
+		): vscode.ProviderResult<LanguageModelChatInformation[]>;
+
+		provideLanguageModelChatResponse(
+			model: LanguageModelChatInformation,
+			messages: readonly LanguageModelChatRequestMessage[],
+			options: ProvideLanguageModelChatResponseOptions,
+			progress: vscode.Progress<LanguageModelResponsePart>,
+			token: vscode.CancellationToken
+		): vscode.ProviderResult<void>;
+
+		provideTokenCount(
+			model: LanguageModelChatInformation,
+			text: string | LanguageModelChatRequestMessage,
+			token: vscode.CancellationToken
+		): vscode.ProviderResult<number>;
+	}
+}
+
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@ -326,7 +326,6 @@
      "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.23.tgz",
      "integrity": "sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@types/prop-types": "*",
        "csstype": "^3.0.2"
@ -666,7 +665,6 @@
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@ -693,7 +691,6 @@
      "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "fast-deep-equal": "^3.1.3",
        "fast-uri": "^3.0.1",
@ -920,7 +917,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "caniuse-lite": "^1.0.30001726",
        "electron-to-chromium": "^1.5.173",
@ -4085,7 +4081,6 @@
        }
      ],
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "nanoid": "^3.3.11",
        "picocolors": "^1.1.1",
@ -4288,7 +4283,6 @@
      "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
      "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "loose-envify": "^1.1.0"
      },
@ -5321,7 +5315,6 @@
      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
      "dev": true,
      "license": "Apache-2.0",
-      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@ -5579,7 +5572,6 @@
      "integrity": "sha512-YJB/ESPUe2Locd0NKXmw72Dx8fZQk1gTzI6rc9TAT4+Sypbnhl8jd8RywB1bDsDF9Dy1RUR7gn3q/ZJTd0OZZg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@types/eslint-scope": "^3.7.7",
        "@types/estree": "^1.0.8",
@ -5629,7 +5621,6 @@
      "integrity": "sha512-pIDJHIEI9LR0yxHXQ+Qh95k2EvXpWzZ5l+d+jIo+RdSm9MiHfzazIxwwni/p7+x4eJZuvG1AJwgC4TNQ7NRgsg==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@discoveryjs/json-ext": "^0.5.0",
        "@webpack-cli/configtest": "^2.1.1",
Author	SHA1	Message	Date
igardev	79b90ef999	v0.0.47	2026-05-04 08:42:41 +03:00
igardev	a544d93511	API key for getting models list, ultiline field for Edit with AI, qwen3.5 models added - API key is used (if needed and provided) on getting the list of models for adding OpenAI compatible provider - Multiline field for Edit with AI - Qwen3.5 models (2B, 4B, 9B) added in the predefined list - good for tools and chat	2026-05-04 08:42:01 +03:00
Alexey Mekhanoshin	a73d9498ab	feat: add authorization headers to models fetch request (#180 ) Adds support for the Authorization header when fetching the list of models from an OpenAI-compatible provider.	2026-04-30 07:59:35 +03:00
igardev	29f6c9973b	v0.0.46	2026-04-29 21:06:11 +03:00
Copilot	f98919badf	Add llama.vscode model provider for GitHub Copilot Chat (#171 ) With this change llama.vscode could provide models for VS Code Copilot: 1. Start tools model from llama-vscode (local or external) 2. In VS Code Copilot show the models list -> Other Models -> Manage Models 3. Make the models (all models available by the application serving the tools model are shown) you want to use visible (click on the left of the model name) 4. Select the desired model from Copilot and start using it Not needed tools from Copilot could be unchecked to reduce contex size if local model is used.	2026-04-29 21:04:33 +03:00
igardev	63c25e6dc9	v0.0.45	2026-03-04 08:23:13 +02:00
Haafiz	0a588177b7	feat: add configurable debounce for inline completion requests (#164 ) Waits for the user to pause typing before sending a request to the server. Set to 0 (default) to disable.	2026-03-04 08:18:42 +02:00
Haafiz	caa0f9363d	fix: show 'extension is updated' notification only on actual version change (#167 ) Add version tracking and update persistence keys	2026-03-04 08:06:32 +02:00
igardev	f8158d9e48	v0.0.44	2026-03-02 23:53:18 +02:00
igardev	bf7d0c2892	Subagents (#169 ) * Read SOUL.md and USER.md files from project root and add them in the prompt if they exist (similar to OpenClaw). * - Subagents implemented - new agent Unit Test Writer - new tool create_agent - new agent "Agent creator" * Update documentation for llama-vscode	2026-03-02 23:52:38 +02:00