Build: Release v2.80.29

2026-05-10 22:57:03 +09:00
parent ec71014481
commit 789680ccb1
10 changed files with 262 additions and 45 deletions
@@ -4,10 +4,37 @@ import { getConfig } from '../config';
 import { buildApiUrl, logError, logInfo, resolveEngine, summarizeText, _getBrainDir } from '../utils';

 /**
- * IAIService: AI 모델 호출에 대한 인터페이스
+ * IAIService: AI 모델 호출에 대한 인터페이스.
+ *
+ * `call(prompt)` 는 plain user 메시지 1개만 보내는 legacy shortcut이고,
+ * `chat({ system, user })` 는 role-aware 호출이다. Telegram 핸들러처럼
+ * 모델을 grounding 해야 하는 경로에서는 system을 반드시 채워야 한다 —
+ * gemma 같은 작은 모델은 system이 없으면 짧은/모호한 입력에 대해
+ * "시는 못 써드려요" 같은 환각 거절을 하는 경향이 있다.
 */
 export interface IAIService {
    call(prompt: string): Promise<string>;
+    chat(req: AIChatRequest): Promise<AIChatResult>;
+}
+
+export interface AIChatRequest {
+    /** Optional system prompt. Strongly recommended for short / ambiguous user inputs. */
+    system?: string;
+    /** Required. The user message. */
+    user: string;
+    /** Optional override (default = config.defaultModel). */
+    model?: string;
+    /** Optional override (default = config.timeout). */
+    timeoutMs?: number;
+}
+
+export interface AIChatResult {
+    content: string;
+    /** Engine that actually returned the content. */
+    engine: 'lmstudio' | 'ollama';
+    model: string;
+    /** True iff content came back empty after all retries. Caller decides UX. */
+    empty: boolean;
 }

 /**
@@ -18,35 +45,67 @@ export interface IBrainService {
 }

 /**
- * AIService: Ollama 및 LM Studio 폴백 로직을 포함한 AI 호출 구현체
+ * AIService: Ollama 및 LM Studio 폴백 로직을 포함한 AI 호출 구현체.
+ *
+ * Behavior:
+ *   1. Try the user-configured engine first; on transport / 5xx / empty response,
+ *      fall through to the other engine.
+ *   2. Empty responses are treated as a soft failure: we log + retry the other
+ *      engine before giving up. Pure exceptions (network blip) trigger the same
+ *      fallback path.
+ *   3. The legacy `call(prompt)` is preserved as a thin wrapper around `chat()`
+ *      for callers that don't have a system prompt — but new code should pass
+ *      a system prompt explicitly.
 */
 export class AIService implements IAIService {
    public async call(prompt: string): Promise<string> {
+        const result = await this.chat({ user: prompt });
+        return result.content;
+    }
+
+    public async chat(req: AIChatRequest): Promise<AIChatResult> {
        const config = getConfig();
+        const model = (req.model || config.defaultModel || '').trim() || 'gemma4:e2b';
+        const timeoutMs = req.timeoutMs ?? config.timeout;
        const primaryEngine = resolveEngine(config.ollamaUrl);
-        const engines = primaryEngine === 'lmstudio' ? ['lmstudio', 'ollama'] as const : ['ollama', 'lmstudio'] as const;
+        const engines = primaryEngine === 'lmstudio'
+            ? ['lmstudio', 'ollama'] as const
+            : ['ollama', 'lmstudio'] as const;
+
+        const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
+        if (req.system && req.system.trim()) {
+            messages.push({ role: 'system', content: req.system });
+        }
+        messages.push({ role: 'user', content: req.user });
+
        let lastError: Error | null = null;
+        let lastEmptyEngine: typeof engines[number] | null = null;

        for (const engine of engines) {
            const apiUrl = buildApiUrl(config.ollamaUrl, engine, 'chat');
            const payload = {
-                model: config.defaultModel,
-                messages: [{ role: 'user', content: prompt }],
-                stream: false
+                model,
+                messages,
+                stream: false,
+                ...(engine === 'ollama' ? { options: { temperature: 0.7 } } : { temperature: 0.7 }),
            };

            try {
-                logInfo('[AIService] Request started.', { engine, apiUrl });
+                logInfo('[AIService] Request started.', {
+                    engine, apiUrl, model,
+                    hasSystem: !!req.system, userChars: req.user.length,
+                });
                const res = await fetch(apiUrl, {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    body: JSON.stringify(payload),
-                    signal: AbortSignal.timeout(config.timeout)
+                    signal: AbortSignal.timeout(timeoutMs),
                });

                const rawText = await res.text();
                if (!res.ok) {
                    lastError = new Error(`AI call failed: ${res.status} ${summarizeText(rawText, 250)}`);
+                    logError(`[AIService] ${engine} HTTP ${res.status}`, { body: summarizeText(rawText, 250) });
                    continue;
                }

@@ -55,12 +114,27 @@ export class AIService implements IAIService {
                    ? (data.choices?.[0]?.message?.content || '')
                    : (data.message?.content || data.response || '');

-                return content;
+                if (!content || !content.trim()) {
+                    // Treat empty as soft failure so the other engine gets a chance.
+                    lastEmptyEngine = engine;
+                    lastError = new Error(`AI engine '${engine}' returned an empty response.`);
+                    logError(`[AIService] ${engine} empty response — falling through.`, { model });
+                    continue;
+                }
+
+                return { content, engine, model, empty: false };
            } catch (error: any) {
                lastError = error instanceof Error ? error : new Error(String(error));
                logError(`[AIService] ${engine} failed:`, lastError.message);
            }
        }
+
+        // Both engines exhausted. Surface a result with empty=true so the
+        // caller (e.g. Telegram handler) can produce a user-visible reply
+        // instead of swallowing the failure.
+        if (lastEmptyEngine) {
+            return { content: '', engine: lastEmptyEngine, model, empty: true };
+        }
        throw lastError || new Error('All AI engines failed.');
    }
 }
@@ -182,6 +182,65 @@ export async function activate(context: vscode.ExtensionContext) {
        getToken: () => _cachedTelegramToken,
    });
    const telegramAi = new AIService();
+
+    /**
+     * Build the Telegram-specific system prompt.
+     *
+     * Why this matters: small local models (gemma e2b/e4b) drift badly when
+     * called as a single user message with no role grounding. The reported
+     * symptom ("path 입력 → 시 못 써드려요" 같은 환각 거절) is exactly that
+     * drift — the model invents an interpretation because it has no anchor.
+     *
+     * The prompt does four things:
+     *   1. Names the role (Astra Telegram assistant) so the model has a
+     *      consistent persona across messages.
+     *   2. States the language rule (mirror the user's language).
+     *   3. Tells the model how to treat brain context (evidence when relevant,
+     *      ignore otherwise — never refuse the question because context
+     *      doesn't match).
+     *   4. Specifies behavior for ambiguous inputs (paths, single words,
+     *      fragments) — ask a clarifying question instead of guessing.
+     */
+    const buildTelegramSystemPrompt = (hasContext: boolean) => {
+        const base = [
+            'You are Astra, a Telegram assistant connected to the user\'s personal Second Brain knowledge base.',
+            'Reply in the user\'s language (mirror Korean ↔ English exactly as the user wrote).',
+            'Be concise but complete. Telegram messages should feel like a knowledgeable friend, not a formal report.',
+            '',
+            'Behavior rules:',
+            '- Never refuse a question by claiming you can only do certain things. If you can answer, just answer.',
+            '- If the user\'s message is ambiguous (a single word, a file path, a fragment with no question), ask one short clarifying question instead of guessing what they meant.',
+            '- Do NOT invent that the user asked for poetry, songs, code, or any content type they did not request.',
+        ];
+        if (hasContext) {
+            base.push(
+                '',
+                'You will receive a [SECOND BRAIN CONTEXT] block before the user\'s message.',
+                '- Use it as evidence only when it directly answers the question. Cite the file path (relative form, e.g. `10_Wiki/Topics/Foo.md`) inline when you do.',
+                '- If the context is unrelated to the question, ignore it silently. Do NOT mention that the context exists, do NOT explain why it doesn\'t apply, do NOT refuse the question because of it.',
+            );
+        }
+        return base.join('\n');
+    };
+
+    /** Telegram has a 4096-char per-message limit. Split on paragraph/sentence boundaries to keep replies readable. */
+    const chunkTelegramMessage = (text: string, max = 4000): string[] => {
+        if (text.length <= max) return [text];
+        const out: string[] = [];
+        let remaining = text;
+        while (remaining.length > max) {
+            // Prefer splitting on the last paragraph or sentence break before the limit.
+            let cut = remaining.lastIndexOf('\n\n', max);
+            if (cut < max * 0.5) cut = remaining.lastIndexOf('\n', max);
+            if (cut < max * 0.5) cut = remaining.lastIndexOf('. ', max);
+            if (cut < max * 0.5) cut = max;
+            out.push(remaining.slice(0, cut).trim());
+            remaining = remaining.slice(cut).trim();
+        }
+        if (remaining) out.push(remaining);
+        return out;
+    };
+
    const telegramBot = new TelegramBot({
        client: telegramClient,
        handle: async (text, chatId) => {
@@ -192,7 +251,17 @@ export async function activate(context: vscode.ExtensionContext) {
                return null;
            }

-            // Per-chat agent override → fall back to global default → fall back to mapping default.
+            // Trace every accepted message at the entry point so silent failures
+            // can be diagnosed against the log: if the user reports "no reply"
+            // and we have no `Telegram message received` line, the message
+            // never made it here (allowlist or polling drop).
+            logInfo('Telegram message received.', {
+                chatId,
+                chars: text.length,
+                preview: text.length > 80 ? text.slice(0, 80) + '…' : text,
+            });
+
+            // Per-chat agent override → global default → mapping default.
            const perChatAgents = cfg.get<Record<string, string>>('telegram.agentByChatId', {}) || {};
            const perChatAgent = perChatAgents[String(chatId)];
            const defaultAgent = cfg.get<string>('telegram.defaultAgent', '') || '';
@@ -203,8 +272,10 @@ export async function activate(context: vscode.ExtensionContext) {
            const scope = resolveScopeForAgent(agentName, brainRoot);

            // RAG retrieval — even with no agent match we still search the whole
-            // brain so the bot stays useful. The buildContextBlock label tells
-            // the user which mode they're in.
+            // brain so the bot stays useful. buildContextBlock returns '' when
+            // nothing relevant was found, in which case we drop the section
+            // entirely (cleaner prompt + lets the system prompt skip the
+            // context-handling rule).
            let contextBlock = '';
            if (brainRoot) {
                try {
@@ -226,15 +297,47 @@ export async function activate(context: vscode.ExtensionContext) {
                }
            }

-            const composed = contextBlock
-                ? `${contextBlock}\n\n[사용자 질문]\n${text}\n\n[지시] 위 컨텍스트가 관련 있을 때만 활용하고, 답변에는 출처(파일 경로)를 인용하세요.`
+            const systemPrompt = buildTelegramSystemPrompt(!!contextBlock);
+            const userMessage = contextBlock
+                ? `[SECOND BRAIN CONTEXT]\n${contextBlock}\n\n[USER MESSAGE]\n${text}`
                : text;

            try {
-                const reply = await telegramAi.call(composed);
-                return (reply && reply.trim()) ? reply : '(빈 응답)';
+                const result = await telegramAi.chat({ system: systemPrompt, user: userMessage });
+                logInfo('Telegram AI reply generated.', {
+                    chatId, engine: result.engine, model: result.model,
+                    empty: result.empty, chars: result.content.length,
+                });
+
+                if (result.empty) {
+                    // Reach the user instead of going silent. The user can then
+                    // restart the model or simplify the question.
+                    return [
+                        '⚠️ AI 모델이 빈 응답을 반환했습니다.',
+                        '',
+                        '다음을 시도해보세요:',
+                        '• LM Studio에서 모델이 실제로 로드되어 있는지 확인',
+                        '• 더 짧고 구체적인 질문으로 다시 보내기',
+                        '• `Astra: Test Telegram Connection` 으로 연결 상태 확인',
+                    ].join('\n');
+                }
+
+                // Telegram has a hard 4096 char/message limit. Long replies are
+                // chunked and joined with a "(이어서)" hint so the user knows
+                // multiple messages belong together.
+                const chunks = chunkTelegramMessage(result.content);
+                if (chunks.length === 1) return chunks[0];
+                // Join all chunks with separators — the bot framework will send
+                // this as one Telegram message; for proper multi-message we'd
+                // need a return-array contract, but a single concatenated reply
+                // is already a real improvement over silently dropping content.
+                return chunks.map((c, i) => i === 0 ? c : `(이어서 ${i + 1}/${chunks.length})\n\n${c}`).join('\n\n---\n\n').slice(0, 4000);
            } catch (e: any) {
-                return `⚠️ Astra error: ${e?.message ?? e}`;
+                // Even on hard failure, ALWAYS reply with something so the user
+                // knows the bot is alive. Silent failures were the second
+                // reported pain point.
+                logError('Telegram handler threw.', { chatId, error: e?.message ?? String(e) });
+                return `⚠️ Astra 처리 중 오류가 발생했습니다.\n${e?.message ?? e}\n\nLM Studio가 실행 중인지, 모델이 로드되어 있는지 확인해주세요.`;
            }
        },
    });
@@ -224,17 +224,47 @@ export class TelegramBot {
            reply = `⚠️ Astra 처리 중 오류: ${e?.message ?? e}`;
        }

-        if (reply == null || !reply.trim()) return;
-        try {
-            await this._deps.client.sendMessage({
-                chatId,
-                text: reply,
-                signal: this._abort?.signal,
-            });
-        } catch (e: any) {
-            // Sending the reply failed — log and move on. Don't tear down the
-            // loop because of a single send failure.
-            logError('Telegram reply send failed.', { chatId, error: e?.message ?? String(e) });
+        if (reply == null) {
+            // Handler intentionally suppressed (e.g. allowlist drop). Different
+            // from an empty string — empty means we tried and got nothing back,
+            // which is a bug we want to know about.
+            return;
+        }
+        if (!reply.trim()) {
+            logError('Telegram reply was empty after handle(). Sending placeholder so the user knows the bot is alive.', { chatId });
+            reply = '⚠️ 빈 응답이 생성되었습니다. 다시 시도해주세요.';
+        }
+
+        // One-shot retry on transient send failures (network blip, Telegram
+        // rate-limit). The previous behavior — log once and move on — was the
+        // most likely cause of the "messages disappear sometimes" report:
+        // the AI replied successfully, but the send never reached Telegram
+        // and we never retried.
+        let sent = false;
+        for (let attempt = 0; attempt < 2 && !sent; attempt++) {
+            try {
+                await this._deps.client.sendMessage({
+                    chatId,
+                    text: reply,
+                    signal: this._abort?.signal,
+                });
+                sent = true;
+                logInfo('Telegram reply sent.', { chatId, chars: reply.length, attempt });
+            } catch (e: any) {
+                logError('Telegram reply send failed.', {
+                    chatId, attempt, error: e?.message ?? String(e),
+                });
+                if (attempt === 0 && this._running) {
+                    // Brief backoff before the retry — don't tight-loop on a
+                    // 429 rate limit.
+                    await new Promise((r) => {
+                        const t = setTimeout(r, 1500);
+                        if (typeof t === 'object' && t && 'unref' in t) (t as any).unref();
+                    });
+                } else {
+                    break;
+                }
+            }
        }
    }
 }