Compare commits

..

10 commits

Author SHA1 Message Date
igardev
79b90ef999
v0.0.47 2026-05-04 08:42:41 +03:00
igardev
a544d93511
API key for getting models list, ultiline field for Edit with AI, qwen3.5 models added
- API key is used (if needed and provided) on getting the list of models for adding OpenAI compatible provider
- Multiline field for Edit with AI
- Qwen3.5 models (2B, 4B, 9B) added in the predefined list  - good for tools and chat
2026-05-04 08:42:01 +03:00
Alexey Mekhanoshin
a73d9498ab
feat: add authorization headers to models fetch request (#180)
Adds support for the Authorization header when fetching the list of models from an OpenAI-compatible provider.
2026-04-30 07:59:35 +03:00
igardev
29f6c9973b
v0.0.46 2026-04-29 21:06:11 +03:00
Copilot
f98919badf
Add llama.vscode model provider for GitHub Copilot Chat (#171)
With this change llama.vscode could provide models for VS Code Copilot:
1. Start tools model from llama-vscode (local or external)  
2. In VS Code Copilot show the models list -> Other Models -> Manage Models  
3. Make the models (all models available by the application serving the tools model are shown) you want to use visible (click on the left of the model name)  
4. Select the desired model from Copilot and start using it

Not needed tools from Copilot could be unchecked to reduce contex size if local model is used.
2026-04-29 21:04:33 +03:00
igardev
63c25e6dc9
v0.0.45 2026-03-04 08:23:13 +02:00
Haafiz
0a588177b7
feat: add configurable debounce for inline completion requests (#164)
Waits for the user to pause typing before sending a request to the server.
Set to 0 (default) to disable.
2026-03-04 08:18:42 +02:00
Haafiz
caa0f9363d
fix: show 'extension is updated' notification only on actual version change (#167)
Add version tracking and update persistence keys
2026-03-04 08:06:32 +02:00
igardev
f8158d9e48
v0.0.44 2026-03-02 23:53:18 +02:00
igardev
bf7d0c2892
Subagents (#169)
* Read SOUL.md and USER.md files from project root and add them in the prompt if they exist (similar to OpenClaw).

* - Subagents implemented
- new agent Unit Test Writer
- new tool create_agent
- new agent "Agent creator"

* Update documentation for llama-vscode
2026-03-02 23:52:38 +02:00
16 changed files with 678 additions and 55 deletions

16
package-lock.json generated
View file

@ -1,12 +1,12 @@
{
"name": "llama-vscode",
"version": "0.0.39",
"version": "0.0.45",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "llama-vscode",
"version": "0.0.39",
"version": "0.0.45",
"hasInstallScript": true,
"dependencies": {
"axios": "^1.1.2",
@ -24,7 +24,7 @@
"@types/mocha": "^10.0.10",
"@types/node": "^18.0.0",
"@types/picomatch": "^4.0.0",
"@types/vscode": "^1.100.0",
"@types/vscode": "^1.109.0",
"@vscode/test-cli": "^0.0.11",
"@vscode/test-electron": "^2.5.2",
"esbuild": "^0.27.0",
@ -829,9 +829,9 @@
"license": "MIT"
},
"node_modules/@types/vscode": {
"version": "1.103.0",
"resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.103.0.tgz",
"integrity": "sha512-o4hanZAQdNfsKecexq9L3eHICd0AAvdbLk6hA60UzGXbGH/q8b/9xv2RgR7vV3ZcHuyKVq7b37IGd/+gM4Tu+Q==",
"version": "1.109.0",
"resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.109.0.tgz",
"integrity": "sha512-0Pf95rnwEIwDbmXGC08r0B4TQhAbsHQ5UyTIgVgoieDe4cOnf92usuR5dEczb6bTKEp7ziZH4TV1TRGPPCExtw==",
"dev": true,
"license": "MIT"
},
@ -1182,7 +1182,6 @@
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@ -1231,7 +1230,6 @@
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.3",
"fast-uri": "^3.0.1",
@ -1432,7 +1430,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001669",
"electron-to-chromium": "^1.5.41",
@ -5516,7 +5513,6 @@
"integrity": "sha512-NLhDfH/h4O6UOy+0LSso42xvYypClINuMNBVVzX4vX98TmTaTUxwRbXdhucbFMd2qLaCTcLq/PdYrvi8onw90w==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@discoveryjs/json-ext": "^0.5.0",
"@webpack-cli/configtest": "^1.2.0",

View file

@ -2,11 +2,11 @@
"name": "llama-vscode",
"displayName": "llama-vscode",
"description": "Local LLM-assisted text completion using llama.cpp",
"version": "0.0.43",
"version": "0.0.47",
"publisher": "ggml-org",
"repository": "https://github.com/ggml-org/llama.vscode",
"engines": {
"vscode": "^1.100.0"
"vscode": "^1.109.0"
},
"icon": "llama.png",
"activationEvents": [
@ -17,6 +17,13 @@
],
"main": "./dist/extension.js",
"contributes": {
"languageModelChatProviders": [
{
"vendor": "llama-vscode",
"displayName": "llama.vscode",
"managementCommand": "extension.showMenu"
}
],
"viewsContainers": {
"activitybar": [
{
@ -691,7 +698,6 @@
"create_agent"
]
}
],
"description": "The list of the agents, which could be selected"
},
@ -1748,6 +1754,11 @@
"default": true,
"description": "If code completion should be triggered automatically (true) or only by pressing Ctrl+l."
},
"llama-vscode.debounce_ms": {
"type": "number",
"default": 0,
"description": "Milliseconds to wait after the last keystroke before sending a completion request (0 = disabled). Useful on low-end hardware to avoid triggering inference on every keystroke."
},
"llama-vscode.api_key": {
"type": "string",
"default": "",
@ -2259,7 +2270,7 @@
"@types/mocha": "^10.0.10",
"@types/node": "^18.0.0",
"@types/picomatch": "^4.0.0",
"@types/vscode": "^1.100.0",
"@types/vscode": "^1.109.0",
"@vscode/test-cli": "^0.0.11",
"@vscode/test-electron": "^2.5.2",
"esbuild": "^0.27.0",

View file

@ -51,7 +51,21 @@ https://github.com/user-attachments/assets/97bb1418-dcea-4a49-8332-13b2ab4da661
![Code completion](https://private-user-images.githubusercontent.com/1991296/405712196-b19499d9-f50d-49d4-9dff-ff3e8ba23757.gif?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDY5NDc1NDEsIm5iZiI6MTc0Njk0NzI0MSwicGF0aCI6Ii8xOTkxMjk2LzQwNTcxMjE5Ni1iMTk0OTlkOS1mNTBkLTQ5ZDQtOWRmZi1mZjNlOGJhMjM3NTcuZ2lmP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI1MDUxMSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNTA1MTFUMDcwNzIxWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9NmZiMmI0NGYzNTkyZGZkMTM5Njk3M2NjZDFhMjFiNTFkMjVkMmY4MGQ5ZDQ2ZDQ0MDgzOWI2YjM5NTY0NzM2OSZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QifQ.P150YJh87_y1pin20aWIuKoPzivmDjZF0iAemQlk_ok)## Custom eval tool
![Code completion](https://private-user-images.githubusercontent.com/1991296/405712196-b19499d9-f50d-49d4-9dff-ff3e8ba23757.gif?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDY5NDc1NDEsIm5iZiI6MTc0Njk0NzI0MSwicGF0aCI6Ii8xOTkxMjk2LzQwNTcxMjE5Ni1iMTk0OTlkOS1mNTBkLTQ5ZDQtOWRmZi1mZjNlOGJhMjM3NTcuZ2lmP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI1MDUxMSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNTA1MTFUMDcwNzIxWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9NmZiMmI0NGYzNTkyZGZkMTM5Njk3M2NjZDFhMjFiNTFkMjVkMmY4MGQ5ZDQ2ZDQ0MDgzOWI2YjM5NTY0NzM2OSZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QifQ.P150YJh87_y1pin20aWIuKoPzivmDjZF0iAemQlk_ok)## Copilot Chat Model Provider
### Overview
Llama-vscode could be used as a VS Code copilot chat model provider. With other words llama-vscode could provide models for the copilot. The provided models could be from local models or openrouter.com or other appliation, which servers the tools models for llama-vscode. This way you could automatically download and start locally models by llama.cpp and llama-vscode and use them with Copilot for free.
### How to use it
1. Select/Start tools model from llama-vscode (local or external)
<img width="485" height="875" alt="copilotSelectToolsModel" src="https://github.com/user-attachments/assets/caa33531-22f4-46dd-b429-7498c45c93e9" />
2. In VS Code Copilot show the models list -> Other Models -> Manage Models
<img width="1404" height="754" alt="CopilotManageModels" src="https://github.com/user-attachments/assets/dc861aa1-db86-46ff-83c1-98c7a435ad06" />
3. Make the models (all models available by the application serving the tools model are shown) you want to use visible (click on the left of the model name)
4. Select the desired model from Copilot and start using it
## Custom eval tool
### Overview
llama-vscode provides to the users the posibility to partially create their own tool. Custom eval tool is a simple one - has one parameters and and uses the provided by the user javascript function to calculate the result.
@ -204,14 +218,28 @@ Settings:
<img width="580" height="779" alt="image" src="https://github.com/user-attachments/assets/bb29e0c8-85b4-4e7a-a3d9-f2d9a1679d3d" />
## Version 0.0.40 is released (05.01.2025)
## Version 0.0.45 is released (04.03.2026)
## What is new
Generation of multiple completions in parallel:
- Setting max_parallel_completions determines how many completions to generate in parallel (default 3)
- Shortcuts - Alt+] - next completion, Alt+[ - previous completion
- Requires llama.cpp after December, 6, 2025 (commit c42712b) but is backword compatible (generates one completion for older versions)
- [More details](https://github.com/ggml-org/llama.vscode/wiki/Parallel-completions)
- Configurable debounce for inline completion requests - setting debounce_ms.
llama-vscode will wait debounce_ms after a keystroke before sending a request to the LLM for inline code completion. If in the meantime there is another keystroke, the request for the previous keystroke is cancelled. Useful on low end hardware to avoid triggering code completion on every keystroke.
- Notification "Extension is updated" is shown only on version change, not on every setting change (as was before)
## Version 0.0.44 is released (03.03.2026)
## What is new
- Subagents implemented (with tool delegate_task) - now each agent, which has "Available as Subagent" checked could be used as a subagent
- new agent - Unit Test Writer
- new tool create_agent
- new agent "Agent creator"
- Files SOUL.md and USER.md (if available in the project root) will be added to the context
## Setup instructions for llama.cpp server

View file

@ -30,6 +30,7 @@ import { Agent, Chat, Env, LlmModel } from "./types";
import { ModelType, PERSISTENCE_KEYS } from "./constants";
import { ApiKeyService } from "./services/api-key-service";
import { OpenAiCompModelStrategy } from "./services/openai-comp-model-strategy";
import { LlamaChatModelProvider } from "./llama-chat-model-provider";
export class Application {
public static readonly emptyModel = {name: ""};
@ -63,6 +64,7 @@ export class Application {
public agentCommandService: AgentCommandService
public chatService: ChatService
public apiKeyService: ApiKeyService
public llamaChatModelProvider: LlamaChatModelProvider
private selectedComplModel: LlmModel = Application.emptyModel
private selectedChatModel: LlmModel = Application.emptyModel
@ -105,6 +107,7 @@ export class Application {
this.agentCommandService = new AgentCommandService(this)
this.chatService = new ChatService(this)
this.apiKeyService = new ApiKeyService(this)
this.llamaChatModelProvider = new LlamaChatModelProvider(this);
}
public static getInstance(context: vscode.ExtensionContext): Application {

View file

@ -9,6 +9,7 @@ import { Utils } from './utils';
import { Env, LlmModel } from './types';
import { env } from 'process';
import { PERSISTENCE_KEYS, SETTING_NAME_FOR_LIST, UiView } from './constants';
import {LlamaChatModelProvider} from "./llama-chat-model-provider";
export class Architect {
private app: Application
@ -26,6 +27,14 @@ export class Architect {
this.app.menu.showHowToUseLlamaVscode();
this.app.persistence.setGlobalValue("isFirstStart", false)
}
const currentVersion = vscode.extensions.getExtension('ggml-org.llama-vscode')?.packageJSON?.version as string | undefined;
const storedVersion = this.app.persistence.getGlobalValue(PERSISTENCE_KEYS.EXTENSION_VERSION) as string | undefined;
if (currentVersion && storedVersion && currentVersion !== storedVersion) {
vscode.window.showInformationMessage(this.app.configuration.getUiText(`llama-vscode extension is updated.`) ?? "");
}
if (currentVersion) {
this.app.persistence.setGlobalValue(PERSISTENCE_KEYS.EXTENSION_VERSION, currentVersion);
}
await this.installUpgradeLlamaCpp(isFirstStart);
if (this.app.configuration.env_start_last_used){
let lastEnv = this.app.persistence.getValue("selectedEnv")
@ -70,7 +79,6 @@ export class Architect {
if (this.app.configuration.isRagConfigChanged(event)) this.init();
if (this.app.configuration.isToolChanged(event)) this.app.tools.init();
if (this.app.configuration.isEnvViewSettingChanged(event)) this.app.llamaWebviewProvider.updateLlamaView();
vscode.window.showInformationMessage(this.app.configuration.getUiText(`llama-vscode extension is updated.`)??"");
});
context.subscriptions.push(configurationChangeDisp);
}
@ -204,6 +212,22 @@ export class Architect {
);
}
registerLlavaVscodeModelProvider = (context: vscode.ExtensionContext) => {
// Register the llama.vscode language model chat provider for GitHub Copilot Chat
context.subscriptions.push(vscode.lm.registerLanguageModelChatProvider(
'llama-vscode',
this.app.llamaChatModelProvider
));
context.subscriptions.push(vscode.workspace.onDidChangeConfiguration((event) => {
if (event.affectsConfiguration('llama-vscode.endpoint_chat')
|| event.affectsConfiguration('llama-vscode.endpoint_tools')
|| event.affectsConfiguration('llama-vscode.ai_api_version')) {
this.app.llamaChatModelProvider.notifyModelsChanged();
}
}));
}
registerGenarateCommitMsg = (context: vscode.ExtensionContext) => {
const generateCommitCommand = vscode.commands.registerCommand(
'extension.generateGitCommitMessage',

View file

@ -30,6 +30,15 @@ export class Completion {
return null;
}
// Debounce: wait for the user to pause typing before hitting the backend
if (context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic && this.app.configuration.debounce_ms > 0) {
await Utils.delay(this.app.configuration.debounce_ms);
if (token.isCancellationRequested) {
this.app.logger.addEventLog(group, "DEBOUNCE_CANCELLATION_RETURN", "")
return null;
}
}
// Start only if the previous request is finiched
while (this.isRequestInProgress) {
await Utils.delay(this.app.configuration.DELAY_BEFORE_COMPL_REQUEST);

View file

@ -29,6 +29,7 @@ export class Configuration {
new_embeddings_model_host = "127.0.0.1"
new_tools_model_host = "127.0.0.1"
auto = true;
debounce_ms = 0;
api_key = "";
api_key_chat = "";
api_key_tools = "";
@ -196,6 +197,7 @@ export class Configuration {
this.openai_client_model = String(config.get<string>("openai_client_model"));
this.openai_prompt_template = String(config.get<string>("openai_prompt_template"));
this.auto = Boolean(config.get<boolean>("auto"));
this.debounce_ms = Number(config.get<number>("debounce_ms"));
this.api_key = String(config.get<string>("api_key"));
this.api_key_chat = String(config.get<string>("api_key_chat"));
this.api_key_tools = String(config.get<string>("api_key_tools"));

View file

@ -253,6 +253,7 @@ export const PERSISTENCE_KEYS = {
SELECTED_CHAT: 'selectedChat' as const,
SELECTED_AGENT: 'selectedAgent' as const,
SELECTED_ENV: 'selectedEnv' as const,
EXTENSION_VERSION: 'extensionVersion' as const,
} as const;
export const SETTING_NAME_FOR_LIST = {

View file

@ -32,7 +32,10 @@ export function activate(context: vscode.ExtensionContext) {
app.architect.registerWebviewProvider(context)
app.architect.registerCommandSelectNextSuggestion(context)
app.architect.registerCommandSelectPreviousSuggestion(context)
app.architect.registerLlavaVscodeModelProvider(context)
app.architect.init()
}
export async function deactivate() {

View file

@ -79,7 +79,36 @@ export const PREDEFINED_LISTS = new Map<string, any>([
"endpoint": "http://127.0.0.1:8010"
}
]],
[PREDEFINED_LISTS_KEYS.TOOLS, [
[PREDEFINED_LISTS_KEYS.TOOLS,
[
{
"name": "Qwen3.5-2B-GGUF:Q8_0 (LOCAL) (CPU)",
"localStartCommand": "llama-server -hf unsloth/Qwen3.5-2B-GGUF:Q8_0 --jinja -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
"endpoint": "http://localhost:8009",
"aiModel": "",
"isKeyRequired": false
},
{
"name": "Qwen3.5-2B-GGUF:Q8_0 (LOCAL) (VRAM>3GB)",
"localStartCommand": "llama-server -hf unsloth/Qwen3.5-2B-GGUF:Q8_0 --jinja -ngl 99 -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
"endpoint": "http://localhost:8009",
"aiModel": "",
"isKeyRequired": false
},
{
"name": "Qwen3.5-4B-GGUF:Q8_0 (LOCAL) (VRAM>6GB)",
"localStartCommand": "llama-server -hf unsloth/Qwen3.5-4B-GGUF:Q8_0 --jinja -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
"endpoint": "http://localhost:8009",
"aiModel": "",
"isKeyRequired": false
},
{
"name": "Qwen3.5-9B-GGUF:Q8_0 (LOCAL) (VRAM>12GB)",
"localStartCommand": "llama-server -hf unsloth/Qwen3.5-9B-GGUF:Q8_0 --jinja -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port 8009 --host 127.0.0.1",
"endpoint": "http://localhost:8009",
"aiModel": "",
"isKeyRequired": false
},
{
"name": "OpenAI gpt-oss 20B (LOCAL) (> 19GB VRAM)",
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",

View file

@ -0,0 +1,237 @@
import * as vscode from 'vscode';
import axios from 'axios';
import { Application } from './application';
import { Utils } from './utils';
const VENDOR = 'llama-vscode';
// Default token limits used when the server does not report them
const DEFAULT_MAX_INPUT_TOKENS = 8192;
const DEFAULT_MAX_OUTPUT_TOKENS = 4096;
interface OpenAIModel {
id: string;
object?: string;
}
interface OpenAIModelsResponse {
data: OpenAIModel[];
}
export class LlamaChatModelProvider implements vscode.LanguageModelChatProvider {
private readonly _onDidChangeLanguageModelChatInformation = new vscode.EventEmitter<void>();
readonly onDidChangeLanguageModelChatInformation: vscode.Event<void> =
this._onDidChangeLanguageModelChatInformation.event;
constructor(private readonly app: Application) {}
/** Called by the configuration change handler to notify VS Code that models may have changed. */
notifyModelsChanged(): void {
this._onDidChangeLanguageModelChatInformation.fire();
}
async provideLanguageModelChatInformation(
_options: vscode.PrepareLanguageModelChatModelOptions,
_token: vscode.CancellationToken
): Promise<vscode.LanguageModelChatInformation[]> {
const endpoint = this.getChatEndpoint();
if (!endpoint) {
return [];
}
try {
const requestConfig = this.app.configuration.axiosRequestConfigChat;
const response = await axios.get<OpenAIModelsResponse>(
`${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/models`,
requestConfig
);
if (!response.data?.data?.length) {
return [];
}
return response.data.data.map((model) => ({
id: model.id,
name: model.id,
family: VENDOR,
version: '1',
maxInputTokens: DEFAULT_MAX_INPUT_TOKENS,
maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
capabilities: {
toolCalling: true,
imageInput: false,
},
}));
} catch {
return [];
}
}
async provideLanguageModelChatResponse(
model: vscode.LanguageModelChatInformation,
messages: readonly vscode.LanguageModelChatRequestMessage[],
options: vscode.ProvideLanguageModelChatResponseOptions,
progress: vscode.Progress<vscode.LanguageModelResponsePart>,
token: vscode.CancellationToken
): Promise<void> {
const endpoint = this.getChatEndpoint();
if (!endpoint) {
throw new Error('No chat endpoint configured');
}
const openaiMessages = messages.map((msg) => ({
role: msg.role === vscode.LanguageModelChatMessageRole.User ? 'user' : 'assistant',
content: msg.content
.map((part: unknown) =>
part instanceof vscode.LanguageModelTextPart ? part.value : ''
)
.join(''),
}));
const tools = options.tools?.map((t: vscode.LanguageModelToolInformation) => ({
type: 'function',
function: {
name: t.name,
description: t.description,
parameters: t.inputSchema,
},
}));
const requestBody: Record<string, unknown> = {
model: model.id,
messages: openaiMessages,
stream: true,
max_tokens: DEFAULT_MAX_OUTPUT_TOKENS,
...(options.modelOptions?.temperature !== undefined && {
temperature: options.modelOptions.temperature,
}),
...(tools?.length && { tools }),
};
const abortController = new AbortController();
token.onCancellationRequested(() => abortController.abort());
const requestConfig = this.app.configuration.axiosRequestConfigTools;
const streamResponse = await axios.post<NodeJS.ReadableStream>(
`${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/chat/completions`,
requestBody,
{ ...requestConfig, responseType: 'stream' as const, signal: abortController.signal }
);
await new Promise<void>((resolve, reject) => {
const readable = streamResponse.data;
let buffer = '';
// Accumulated tool call data indexed by call index
const toolCalls: { id: string; name: string; arguments: string }[] = [];
const finalize = () => {
// Emit any completed tool calls that weren't emitted yet
for (const tc of toolCalls) {
if (tc.id && tc.name) {
try {
progress.report(
new vscode.LanguageModelToolCallPart(tc.id, tc.name, JSON.parse(tc.arguments || '{}'))
);
} catch (e) {
console.warn('[llama-vscode] Failed to parse tool call arguments:', e);
}
}
}
resolve();
};
token.onCancellationRequested(() => {
(readable as any).destroy?.();
resolve();
});
readable.on('data', (chunk: Buffer) => {
buffer += chunk.toString('utf8');
const lines = buffer.split(/\r?\n/);
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || !trimmed.startsWith('data:')) {
continue;
}
const payload = trimmed.slice(5).trim();
if (payload === '[DONE]') {
finalize();
readable.removeAllListeners();
return;
}
try {
const json = JSON.parse(payload);
const choice = json.choices?.[0];
if (!choice) {
continue;
}
const delta = choice.delta ?? {};
if (typeof delta.content === 'string' && delta.content) {
progress.report(new vscode.LanguageModelTextPart(delta.content));
}
if (Array.isArray(delta.tool_calls)) {
for (const tc of delta.tool_calls) {
const idx: number = typeof tc.index === 'number' ? tc.index : 0;
if (!toolCalls[idx]) {
toolCalls[idx] = { id: '', name: '', arguments: '' };
}
if (tc.id) {
toolCalls[idx].id = tc.id;
}
if (tc.function?.name) {
toolCalls[idx].name = tc.function.name;
}
if (tc.function?.arguments) {
toolCalls[idx].arguments += tc.function.arguments;
}
}
}
} catch {
// Skip malformed SSE chunks
}
}
});
readable.on('end', () => {
finalize();
});
readable.on('error', (err: Error) => {
reject(err);
});
});
}
provideTokenCount(
_model: vscode.LanguageModelChatInformation,
text: string | vscode.LanguageModelChatRequestMessage,
_token: vscode.CancellationToken
): Thenable<number> {
const content =
typeof text === 'string'
? text
: text.content
.map((p: unknown) => (p instanceof vscode.LanguageModelTextPart ? p.value : ''))
.join('');
// Rough approximation: 1 token ≈ 4 characters. The llama.cpp server does not expose a
// tokenization endpoint via the standard OpenAI API, so we use this heuristic.
// Actual token counts may differ depending on the model's tokenizer.
return Promise.resolve(Math.ceil(content.length / 4));
}
private getChatEndpoint(): string {
const selectedModel = this.app.getToolsModel();
if (selectedModel?.endpoint) {
return selectedModel.endpoint;
}
if (this.app.configuration.endpoint_chat) {
return this.app.configuration.endpoint_chat;
}
if (this.app.configuration.endpoint_tools) {
return this.app.configuration.endpoint_tools;
}
return '';
}
}

View file

@ -195,6 +195,17 @@ export class ModelService {
await details.killCmd();
if (model.localStartCommand) await details.shellCmd(this.sanitizeCommand(model.localStartCommand ?? ""));
await this.app.persistence.setValue(this.getSelectedProp(type), model);
if (type == ModelType.Tools && model?.isKeyRequired !== undefined && model.isKeyRequired){
const apiKey = this.app.persistence.getApiKey(model.endpoint??"");
if (apiKey){
this.app.configuration.axiosRequestConfigTools = {
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
}
}
}
}
public async addModel(type: ModelType, kind: 'local' | 'external' | 'hf' | 'oaiComp'): Promise<void> {

View file

@ -44,13 +44,13 @@ export class OpenAiCompModelStrategy implements IAddStrategy {
prompt: 'example: http://localhost:8080 or https://openrauter.ai/api'
})??""
isKeyRequired = await Utils.confirmAction(`Is API key required for this endpoint (${endpoint})?`, "");
}
}
if (!endpoint){
vscode.window.showWarningMessage("Endpoint is not provided!")
return;
}
const providerModels: QuickPickItem[] = [];
const models = await this.getModels(endpoint);
const models = await this.getModels(endpoint, isKeyRequired);
if (models.length == 0) {
vscode.window.showInformationMessage("No models are found.")
return
@ -108,30 +108,50 @@ export class OpenAiCompModelStrategy implements IAddStrategy {
}
}
private async getModels(endpoint: string): Promise<OpenAiCompModel[]> {
let hfEndpoint = Utils.trimTrailingSlash(endpoint) +"/v1/models";
private async getModels(endpoint: string, isKeyRequired: boolean): Promise<OpenAiCompModel[]> {
const hfEndpoint = Utils.trimTrailingSlash(endpoint) + "/v1/models";
// Create a request configuration
let requestConfig: any = {};
if (isKeyRequired) {
// We get the saved key for this specific endpoint
const apiKey = this.app.persistence.getApiKey(endpoint);
if (apiKey) {
requestConfig = {
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
}
};
}
}
try {
let result = await axios.default.get(
`${Utils.trimTrailingSlash(hfEndpoint)}`
const result = await axios.default.get(
`${Utils.trimTrailingSlash(hfEndpoint)}`,
requestConfig
);
let models: OpenAiCompModel[] = [];
let modelsList: OpenAiCompModel[] = []
if (result && result.data && result.data.models) modelsList = result.data.models
else if (result && result.data && result.data.data) modelsList = result.data.data
if (modelsList.length > 0){
for(let mdl of modelsList){
models.push(mdl)
let modelsList: OpenAiCompModel[] = [];
if (result && result.data && result.data.models) modelsList = result.data.models;
else if (result && result.data && result.data.data) modelsList = result.data.data;
if (modelsList.length > 0) {
for (let mdl of modelsList) {
models.push(mdl);
}
}
return models;
} catch (error){
vscode.window.showErrorMessage("Error getting provider models): " + error)
} catch (error) {
vscode.window.showErrorMessage("Error getting provider models: " + error);
return [];
}
}
private sanitizeInput(input: string): string {
return input ? input.trim() : '';
}

View file

@ -26,6 +26,192 @@ export class TextEditor {
vscode.commands.executeCommand('setContext', 'textEditSuggestionVisible', visible);
}
private escapeWebviewAttr(value: string): string {
return value
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;')
.replace(/</g, '&lt;');
}
/**
* Multiline instructions (webview); resolves undefined if cancelled or closed.
*/
private showMultilineEditPrompt(): Promise<string | undefined> {
const title =
this.app.configuration.getUiText('How would you like to modify the selected text?') ??
'How would you like to modify the selected text?';
const placeholder =
this.app.configuration.getUiText('Enter your instructions for editing the text...') ??
'Enter your instructions for editing the text...';
const submitLabel = this.app.configuration.getUiText('Submit') ?? 'Submit';
const cancelLabel = this.app.configuration.getUiText('Cancel') ?? 'Cancel';
const emptyHint =
this.app.configuration.getUiText('Please enter editing instructions.') ??
'Please enter editing instructions.';
return new Promise((resolve) => {
let settled = false;
const panel = vscode.window.createWebviewPanel(
'editWithAiMultilinePrompt',
title,
{ viewColumn: vscode.ViewColumn.Beside, preserveFocus: false },
{ enableScripts: true }
);
const finish = (value: string | undefined) => {
if (settled) {
return;
}
settled = true;
resolve(value);
panel.dispose();
};
const cspSource = panel.webview.cspSource;
panel.webview.html = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="Content-Security-Policy" content="default-src 'none'; style-src ${cspSource} 'unsafe-inline'; script-src 'unsafe-inline' ${cspSource};">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
box-sizing: border-box;
margin: 0;
padding: 12px;
height: 100vh;
display: flex;
flex-direction: column;
font-family: var(--vscode-font-family);
font-size: var(--vscode-font-size);
color: var(--vscode-foreground);
background-color: var(--vscode-editor-background);
}
label {
margin-bottom: 8px;
}
textarea {
flex: 1;
min-height: 120px;
resize: vertical;
padding: 8px;
border: 1px solid var(--vscode-input-border);
background: var(--vscode-input-background);
color: var(--vscode-input-foreground);
font-family: var(--vscode-editor-font-family);
font-size: var(--vscode-editor-font-size);
}
textarea:focus {
outline: 1px solid var(--vscode-focusBorder);
}
.actions {
margin-top: 12px;
display: flex;
gap: 8px;
justify-content: flex-end;
}
/* DOM order is Submit then Cancel (Tab: textarea → Submit → Cancel); flex order keeps Cancel left, Submit right. */
.actions .secondary {
order: 1;
}
.actions .primary {
order: 2;
}
button {
padding: 6px 14px;
border: none;
cursor: pointer;
font-size: var(--vscode-font-size);
}
.primary {
background: var(--vscode-button-background);
color: var(--vscode-button-foreground);
}
.primary:hover {
background: var(--vscode-button-hoverBackground);
}
.secondary {
background: var(--vscode-button-secondaryBackground);
color: var(--vscode-button-secondaryForeground);
}
.secondary:hover {
background: var(--vscode-button-secondaryHoverBackground);
}
</style>
</head>
<body>
<label for="prompt">${this.escapeWebviewAttr(title)}</label>
<textarea id="prompt" placeholder="${this.escapeWebviewAttr(placeholder)}" autofocus></textarea>
<div class="actions">
<button type="button" class="primary" id="submit">${this.escapeWebviewAttr(submitLabel)}</button>
<button type="button" class="secondary" id="cancel">${this.escapeWebviewAttr(cancelLabel)}</button>
</div>
<script>
const vscode = acquireVsCodeApi();
const ta = document.getElementById('prompt');
function focusPrompt() {
if (!ta) {
return;
}
ta.focus();
const len = ta.value.length;
ta.setSelectionRange(len, len);
}
window.addEventListener('load', focusPrompt);
requestAnimationFrame(focusPrompt);
setTimeout(focusPrompt, 0);
setTimeout(focusPrompt, 100);
window.addEventListener('message', (event) => {
const data = event.data;
if (data && data.command === 'focusPrompt') {
focusPrompt();
}
});
document.getElementById('submit').addEventListener('click', () => {
vscode.postMessage({ command: 'submit', text: ta.value });
});
document.getElementById('cancel').addEventListener('click', () => {
vscode.postMessage({ command: 'cancel' });
});
</script>
</body>
</html>`;
const requestPromptFocus = () => {
void panel.webview.postMessage({ command: 'focusPrompt' });
};
panel.onDidChangeViewState((e) => {
if (e.webviewPanel.visible) {
requestPromptFocus();
}
});
requestPromptFocus();
setTimeout(requestPromptFocus, 50);
setTimeout(requestPromptFocus, 200);
panel.webview.onDidReceiveMessage((message) => {
if (message.command === 'submit') {
const text = typeof message.text === 'string' ? message.text : '';
if (!text.trim()) {
void vscode.window.showInformationMessage(emptyHint);
return;
}
finish(text);
} else if (message.command === 'cancel') {
finish(undefined);
}
});
panel.onDidDispose(() => {
if (!settled) {
settled = true;
resolve(undefined);
}
});
});
}
async showEditPrompt(editor: vscode.TextEditor) {
let chatUrl = this.app.configuration.endpoint_chat
if (!chatUrl) chatUrl = this.app.configuration.endpoint_tools;
@ -64,12 +250,7 @@ export class TextEditor {
const contextRange = new vscode.Range(startLine, 0, endLine, editor.document.lineAt(endLine).text.length);
const context = editor.document.getText(contextRange);
// Create and show input box
const prompt = await vscode.window.showInputBox({
placeHolder: 'Enter your instructions for editing the text...',
prompt: 'How would you like to modify the selected text?',
ignoreFocusOut: true
});
const prompt = await this.showMultilineEditPrompt();
if (!prompt) {
return;

77
src/vscode-lm-chat-shim.d.ts vendored Normal file
View file

@ -0,0 +1,77 @@
// Temporary shim for VS Code LM chat-provider typings.
// Some @types/vscode versions ship parts of the LM API behind proposal typings.
// This keeps `tsc` happy while still targeting the runtime VS Code API.
import type * as vscode from 'vscode';
declare module 'vscode' {
// eslint-disable-next-line @typescript-eslint/no-namespace
export namespace lm {
function registerLanguageModelChatProvider(
vendor: string,
provider: LanguageModelChatProvider
): vscode.Disposable;
}
export interface PrepareLanguageModelChatModelOptions {}
export interface LanguageModelChatCapabilities {
toolCalling?: boolean;
imageInput?: boolean;
}
export interface LanguageModelChatInformation {
id: string;
name: string;
family?: string;
version?: string;
maxInputTokens?: number;
maxOutputTokens?: number;
capabilities?: LanguageModelChatCapabilities;
}
export type LanguageModelChatMessagePart = unknown | LanguageModelTextPart;
export interface LanguageModelChatRequestMessage {
role: LanguageModelChatMessageRole;
content: readonly LanguageModelChatMessagePart[];
}
export interface ProvideLanguageModelChatResponseOptions {
tools?: readonly LanguageModelToolInformation[];
modelOptions?: {
temperature?: number;
[key: string]: unknown;
};
[key: string]: unknown;
}
export type LanguageModelResponsePart =
| LanguageModelTextPart
| LanguageModelToolCallPart
| unknown;
export interface LanguageModelChatProvider {
onDidChangeLanguageModelChatInformation?: vscode.Event<void>;
provideLanguageModelChatInformation(
options: PrepareLanguageModelChatModelOptions,
token: vscode.CancellationToken
): vscode.ProviderResult<LanguageModelChatInformation[]>;
provideLanguageModelChatResponse(
model: LanguageModelChatInformation,
messages: readonly LanguageModelChatRequestMessage[],
options: ProvideLanguageModelChatResponseOptions,
progress: vscode.Progress<LanguageModelResponsePart>,
token: vscode.CancellationToken
): vscode.ProviderResult<void>;
provideTokenCount(
model: LanguageModelChatInformation,
text: string | LanguageModelChatRequestMessage,
token: vscode.CancellationToken
): vscode.ProviderResult<number>;
}
}

9
ui/package-lock.json generated
View file

@ -326,7 +326,6 @@
"resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.23.tgz",
"integrity": "sha512-/LDXMQh55EzZQ0uVAZmKKhfENivEvWz6E+EYzh+/MCjMhNsotd+ZHhBGIjFDTi6+fz0OhQQQLbTgdQIxxCsC0w==",
"license": "MIT",
"peer": true,
"dependencies": {
"@types/prop-types": "*",
"csstype": "^3.0.2"
@ -666,7 +665,6 @@
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"acorn": "bin/acorn"
},
@ -693,7 +691,6 @@
"integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"fast-deep-equal": "^3.1.3",
"fast-uri": "^3.0.1",
@ -920,7 +917,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001726",
"electron-to-chromium": "^1.5.173",
@ -4085,7 +4081,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"nanoid": "^3.3.11",
"picocolors": "^1.1.1",
@ -4288,7 +4283,6 @@
"resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz",
"integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"loose-envify": "^1.1.0"
},
@ -5321,7 +5315,6 @@
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"dev": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
@ -5579,7 +5572,6 @@
"integrity": "sha512-YJB/ESPUe2Locd0NKXmw72Dx8fZQk1gTzI6rc9TAT4+Sypbnhl8jd8RywB1bDsDF9Dy1RUR7gn3q/ZJTd0OZZg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@types/eslint-scope": "^3.7.7",
"@types/estree": "^1.0.8",
@ -5629,7 +5621,6 @@
"integrity": "sha512-pIDJHIEI9LR0yxHXQ+Qh95k2EvXpWzZ5l+d+jIo+RdSm9MiHfzazIxwwni/p7+x4eJZuvG1AJwgC4TNQ7NRgsg==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@discoveryjs/json-ext": "^0.5.0",
"@webpack-cli/configtest": "^2.1.1",