feat: US-014 - Migrate production chat from Gemini to OpenRouter
This commit is contained in:
@@ -273,11 +273,7 @@
|
|||||||
"Verify in browser: chat opens, sends a message, streams a response correctly"
|
"Verify in browser: chat opens, sends a message, streams a response correctly"
|
||||||
],
|
],
|
||||||
"priority": 14,
|
"priority": 14,
|
||||||
<<<<<<< Updated upstream
|
|
||||||
"passes": true,
|
"passes": true,
|
||||||
"notes": "The current API base is 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash'. Change the model segment to 'gemini-3-flash-preview'. The API path structure (v1beta/models/{model}:streamGenerateContent) should be the same. Verify that gemini-3-flash-preview is the correct model ID — check Google AI Studio or the API docs. For the display name, use a human-friendly string like 'Gemini 3 Flash' (not the full model ID). The constant should be defined at the top of gemini.ts and exported for use in ChatWidget."
|
|
||||||
=======
|
|
||||||
"passes": false,
|
|
||||||
"notes": "OpenRouter uses the OpenAI-compatible format. Key differences from Gemini: (1) Auth via Bearer token header, not URL param. (2) System prompt is a message with role:'system', not a separate system_instruction field. (3) Streaming SSE data lines contain {choices:[{delta:{content:'...'}}]}, not candidates[0].content.parts[0].text. (4) The [DONE] sentinel is the same. (5) Add headers: 'HTTP-Referer': window.location.origin, 'X-Title': 'Andy Charlwood Portfolio'. The buildSystemPrompt() function and its content stay the same — only the API transport changes. The buildRequestBody() function needs the most changes."
|
"notes": "OpenRouter uses the OpenAI-compatible format. Key differences from Gemini: (1) Auth via Bearer token header, not URL param. (2) System prompt is a message with role:'system', not a separate system_instruction field. (3) Streaming SSE data lines contain {choices:[{delta:{content:'...'}}]}, not candidates[0].content.parts[0].text. (4) The [DONE] sentinel is the same. (5) Add headers: 'HTTP-Referer': window.location.origin, 'X-Title': 'Andy Charlwood Portfolio'. The buildSystemPrompt() function and its content stay the same — only the API transport changes. The buildRequestBody() function needs the most changes."
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -375,7 +371,6 @@
|
|||||||
"priority": 19,
|
"priority": 19,
|
||||||
"passes": false,
|
"passes": false,
|
||||||
"notes": "This is the iterative loop. In a single Ralph iteration, run the benchmark, review results, and if needed make targeted improvements to the system prompt in llm.ts. Focus on structural fixes: if Q7 (clinical specialties) fails, ensure the system prompt lists specialties under the relevant role — this helps ALL specialty questions, not just Q7. If the benchmark takes too many iterations, focus on getting the most impactful improvements in and document remaining gaps. The anti-benchmaxing rules apply: no hardcoded answers, no question-specific prompt clauses."
|
"notes": "This is the iterative loop. In a single Ralph iteration, run the benchmark, review results, and if needed make targeted improvements to the system prompt in llm.ts. Focus on structural fixes: if Q7 (clinical specialties) fails, ensure the system prompt lists specialties under the relevant role — this helps ALL specialty questions, not just Q7. If the benchmark takes too many iterations, focus on getting the most impactful improvements in and document remaining gaps. The anti-benchmaxing rules apply: no hardcoded answers, no question-specific prompt clauses."
|
||||||
>>>>>>> Stashed changes
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
+37
-5
@@ -18,12 +18,14 @@
|
|||||||
- `loadEmbeddings()` and `paletteMap` (Map<id, PaletteItem>) are precomputed via `useMemo` — no re-computation on each search
|
- `loadEmbeddings()` and `paletteMap` (Map<id, PaletteItem>) are precomputed via `useMemo` — no re-computation on each search
|
||||||
- ChatWidget is mounted in DashboardLayout alongside CommandPalette and DetailPanel — z-index 90 (below command palette z-1000)
|
- ChatWidget is mounted in DashboardLayout alongside CommandPalette and DetailPanel — z-index 90 (below command palette z-1000)
|
||||||
- `prefersReducedMotion` pattern: read `window.matchMedia` at module level, use in framer-motion variants to skip animation
|
- `prefersReducedMotion` pattern: read `window.matchMedia` at module level, use in framer-motion variants to skip animation
|
||||||
- ChatWidget stores messages as `Array<{ role: 'user' | 'assistant', content: string }>` — same shape as LLM message format, ready for Gemini integration
|
- ChatWidget stores messages as `Array<{ role: 'user' | 'assistant', content: string }>` — same shape as LLM message format
|
||||||
- ChatWidget `isOpen` state controls both panel visibility and button icon (MessageCircle ↔ X) — panel rendering handled by AnimatePresence
|
- ChatWidget `isOpen` state controls both panel visibility and button icon (MessageCircle ↔ X) — panel rendering handled by AnimatePresence
|
||||||
- `src/lib/gemini.ts` exports `sendChatMessage(messages)` (async generator), `isGeminiAvailable()`, `parseItemIds(text)`, `stripItemsSuffix(text)` — ChatMessage type is `{ role: 'user' | 'assistant', content: string }`
|
- `src/lib/llm.ts` exports `sendChatMessage(messages)` (async generator), `isLLMAvailable()`, `buildSystemPrompt()`, `parseItemIds(text)`, `stripItemsSuffix(text)`, `LLM_MODEL`, `LLM_DISPLAY_NAME` — ChatMessage type is `{ role: 'user' | 'assistant', content: string }`
|
||||||
- Gemini API uses SSE streaming: POST to `:streamGenerateContent?alt=sse&key=KEY`, parse `data:` lines as JSON, extract `candidates[0].content.parts[0].text`
|
- LLM API uses OpenRouter (OpenAI-compatible): POST to `https://openrouter.ai/api/v1/chat/completions` with `stream: true`, auth via `Authorization: Bearer` header, parse SSE `data:` lines as JSON, extract `choices[0].delta.content`
|
||||||
- System prompt built from `buildEmbeddingTexts()` — instructs model to end responses with `[ITEMS: id1, id2, id3]` for portfolio item linking
|
- System prompt sent as `role: 'system'` message (first in messages array), built from `buildEmbeddingTexts()` — instructs model to end responses with `[ITEMS: id1, id2, id3]` for portfolio item linking
|
||||||
- `isGeminiAvailable()` checks `import.meta.env.VITE_GEMINI_API_KEY` — when missing, chat panel shows "unavailable" message but button remains visible
|
- `isLLMAvailable()` checks `import.meta.env.VITE_OPEN_ROUTER_API_KEY` — when missing, chat panel shows "unavailable" message but button remains visible
|
||||||
|
- OpenRouter requires `HTTP-Referer` and `X-Title` headers — set to `window.location.origin` and `'Andy Charlwood Portfolio'` respectively
|
||||||
|
- Model is `z-ai/glm-5` (set in `LLM_MODEL` constant in `llm.ts`)
|
||||||
- Assistant messages store item IDs as `<!--ITEMS:id1,id2-->` HTML comment suffix for US-010 to parse — `getDisplayText()` strips this before rendering
|
- Assistant messages store item IDs as `<!--ITEMS:id1,id2-->` HTML comment suffix for US-010 to parse — `getDisplayText()` strips this before rendering
|
||||||
- Conversation history capped at 10 messages (`MAX_HISTORY`), metadata stripped before sending to API
|
- Conversation history capped at 10 messages (`MAX_HISTORY`), metadata stripped before sending to API
|
||||||
- Icon/color mappings (`iconByType`, `iconColorStyles`) live in `src/lib/palette-icons.ts` — shared between CommandPalette and ChatWidget
|
- Icon/color mappings (`iconByType`, `iconColorStyles`) live in `src/lib/palette-icons.ts` — shared between CommandPalette and ChatWidget
|
||||||
@@ -312,3 +314,33 @@
|
|||||||
- Concrete examples in format instructions (e.g., `[ITEMS: exp-nhs-nwicb, skill-python]`) are more reliable than generic placeholders (`[ITEMS: id1, id2]`)
|
- Concrete examples in format instructions (e.g., `[ITEMS: exp-nhs-nwicb, skill-python]`) are more reliable than generic placeholders (`[ITEMS: id1, id2]`)
|
||||||
- The `GEMINI_MODEL` and `GEMINI_DISPLAY_NAME` constants in `gemini.ts` are already exported and used by `ChatWidget.tsx` — single source of truth for model identity
|
- The `GEMINI_MODEL` and `GEMINI_DISPLAY_NAME` constants in `gemini.ts` are already exported and used by `ChatWidget.tsx` — single source of truth for model identity
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 2026-02-16 - US-014
|
||||||
|
- Renamed `src/lib/gemini.ts` → `src/lib/llm.ts` via `git mv`
|
||||||
|
- Rewrote `llm.ts` for OpenRouter API (OpenAI-compatible format):
|
||||||
|
- API endpoint: `https://openrouter.ai/api/v1/chat/completions`
|
||||||
|
- Model: `z-ai/glm-5` (exported as `LLM_MODEL`)
|
||||||
|
- Display name: `GLM-5` (exported as `LLM_DISPLAY_NAME`)
|
||||||
|
- Auth: `Authorization: Bearer` header using `VITE_OPEN_ROUTER_API_KEY` env var
|
||||||
|
- Added `HTTP-Referer` and `X-Title` headers per OpenRouter docs
|
||||||
|
- System prompt sent as `role: 'system'` message (first in messages array) instead of Gemini's `system_instruction` field
|
||||||
|
- SSE streaming parses `choices[0].delta.content` instead of Gemini's `candidates[0].content.parts[0].text`
|
||||||
|
- No `'model'` role mapping needed — OpenRouter uses `'assistant'` directly
|
||||||
|
- Request body uses `max_tokens` (OpenAI format) instead of `maxOutputTokens` (Gemini format)
|
||||||
|
- Renamed `isGeminiAvailable()` → `isLLMAvailable()`, updated all call sites in `ChatWidget.tsx`
|
||||||
|
- Updated all imports: `ChatWidget.tsx` now imports from `@/lib/llm` instead of `@/lib/gemini`
|
||||||
|
- Renamed `GEMINI_DISPLAY_NAME` → `LLM_DISPLAY_NAME` and updated ChatWidget header display
|
||||||
|
- `buildSystemPrompt()` now exported (was private) for use by benchmark script in US-015
|
||||||
|
- Fixed merge conflict in `Ralph/prd.json` (resolved to keep OpenRouter migration stories US-014–US-019)
|
||||||
|
- `parseItemIds()` and `stripItemsSuffix()` unchanged — response format spec is the same
|
||||||
|
- Typecheck (0 errors), lint (0 new errors), production build all pass
|
||||||
|
- Files changed: `src/lib/gemini.ts` → `src/lib/llm.ts` (renamed + rewritten), `src/components/ChatWidget.tsx`, `Ralph/prd.json`
|
||||||
|
- **Learnings for future iterations:**
|
||||||
|
- OpenRouter uses OpenAI-compatible format: `messages` array with `role: 'system'|'user'|'assistant'`, `choices[0].delta.content` for streaming
|
||||||
|
- Gemini's `system_instruction` field → OpenRouter's first message with `role: 'system'`
|
||||||
|
- Gemini's `'model'` role → OpenRouter's `'assistant'` role (no mapping needed since ChatMessage already uses 'assistant')
|
||||||
|
- OpenRouter requires `HTTP-Referer` and `X-Title` headers — use `window.location.origin` for referer
|
||||||
|
- `VITE_OPEN_ROUTER_API_KEY` replaces `VITE_GEMINI_API_KEY` — update `.env` file accordingly
|
||||||
|
- `buildSystemPrompt()` is now exported from `llm.ts` — benchmark script (US-015) can import it directly instead of duplicating the logic
|
||||||
|
- The benchmark script (`scripts/benchmark.ts`) still uses the old Gemini API — needs separate migration in US-015
|
||||||
|
---
|
||||||
|
|||||||
@@ -3,12 +3,12 @@ import { motion, AnimatePresence } from 'framer-motion'
|
|||||||
import { MessageCircle, X, Send, Loader2 } from 'lucide-react'
|
import { MessageCircle, X, Send, Loader2 } from 'lucide-react'
|
||||||
import {
|
import {
|
||||||
sendChatMessage,
|
sendChatMessage,
|
||||||
isGeminiAvailable,
|
isLLMAvailable,
|
||||||
parseItemIds,
|
parseItemIds,
|
||||||
stripItemsSuffix,
|
stripItemsSuffix,
|
||||||
GEMINI_DISPLAY_NAME,
|
LLM_DISPLAY_NAME,
|
||||||
type ChatMessage,
|
type ChatMessage,
|
||||||
} from '@/lib/gemini'
|
} from '@/lib/llm'
|
||||||
import { buildPaletteData } from '@/lib/search'
|
import { buildPaletteData } from '@/lib/search'
|
||||||
import type { PaletteItem, PaletteAction } from '@/lib/search'
|
import type { PaletteItem, PaletteAction } from '@/lib/search'
|
||||||
import { iconByType, iconColorStyles } from '@/lib/palette-icons'
|
import { iconByType, iconColorStyles } from '@/lib/palette-icons'
|
||||||
@@ -64,7 +64,7 @@ export function ChatWidget({ onAction }: ChatWidgetProps) {
|
|||||||
const messagesEndRef = useRef<HTMLDivElement>(null)
|
const messagesEndRef = useRef<HTMLDivElement>(null)
|
||||||
const inputRef = useRef<HTMLTextAreaElement>(null)
|
const inputRef = useRef<HTMLTextAreaElement>(null)
|
||||||
|
|
||||||
const geminiAvailable = isGeminiAvailable()
|
const llmAvailable = isLLMAvailable()
|
||||||
|
|
||||||
// Build palette map for looking up items by ID
|
// Build palette map for looking up items by ID
|
||||||
const paletteMap = useMemo(() => {
|
const paletteMap = useMemo(() => {
|
||||||
@@ -264,7 +264,7 @@ export function ChatWidget({ onAction }: ChatWidgetProps) {
|
|||||||
color: 'var(--text-tertiary)',
|
color: 'var(--text-tertiary)',
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{GEMINI_DISPLAY_NAME}
|
{LLM_DISPLAY_NAME}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<button
|
<button
|
||||||
@@ -306,7 +306,7 @@ export function ChatWidget({ onAction }: ChatWidgetProps) {
|
|||||||
}}
|
}}
|
||||||
className="pmr-scrollbar"
|
className="pmr-scrollbar"
|
||||||
>
|
>
|
||||||
{!geminiAvailable && (
|
{!llmAvailable && (
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
textAlign: 'center',
|
textAlign: 'center',
|
||||||
@@ -320,7 +320,7 @@ export function ChatWidget({ onAction }: ChatWidgetProps) {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{geminiAvailable && messages.length === 0 && (
|
{llmAvailable && messages.length === 0 && (
|
||||||
<div style={{ display: 'flex', flexDirection: 'column', gap: '12px' }}>
|
<div style={{ display: 'flex', flexDirection: 'column', gap: '12px' }}>
|
||||||
{/* Welcome bubble — styled as assistant message */}
|
{/* Welcome bubble — styled as assistant message */}
|
||||||
<div style={{ display: 'flex', justifyContent: 'flex-start' }}>
|
<div style={{ display: 'flex', justifyContent: 'flex-start' }}>
|
||||||
@@ -537,7 +537,7 @@ export function ChatWidget({ onAction }: ChatWidgetProps) {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Input area */}
|
{/* Input area */}
|
||||||
{geminiAvailable && (
|
{llmAvailable && (
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
padding: '12px 16px',
|
padding: '12px 16px',
|
||||||
|
|||||||
@@ -5,20 +5,20 @@ export interface ChatMessage {
|
|||||||
content: string
|
content: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export const GEMINI_MODEL = 'gemini-3-flash-preview'
|
export const LLM_MODEL = 'z-ai/glm-5'
|
||||||
export const GEMINI_DISPLAY_NAME = 'Gemini 3 Flash'
|
export const LLM_DISPLAY_NAME = 'GLM-5'
|
||||||
|
|
||||||
const GEMINI_API_BASE = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}`
|
const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'
|
||||||
|
|
||||||
function getApiKey(): string | undefined {
|
function getApiKey(): string | undefined {
|
||||||
return import.meta.env.VITE_GEMINI_API_KEY as string | undefined
|
return import.meta.env.VITE_OPEN_ROUTER_API_KEY as string | undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isGeminiAvailable(): boolean {
|
export function isLLMAvailable(): boolean {
|
||||||
return !!getApiKey()
|
return !!getApiKey()
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildSystemPrompt(): string {
|
export function buildSystemPrompt(): string {
|
||||||
const texts = buildEmbeddingTexts()
|
const texts = buildEmbeddingTexts()
|
||||||
const cvContent = texts.map((t) => `[${t.id}] ${t.text}`).join('\n')
|
const cvContent = texts.map((t) => `[${t.id}] ${t.text}`).join('\n')
|
||||||
|
|
||||||
@@ -45,20 +45,18 @@ function buildRequestBody(
|
|||||||
messages: ChatMessage[],
|
messages: ChatMessage[],
|
||||||
systemPrompt: string,
|
systemPrompt: string,
|
||||||
): object {
|
): object {
|
||||||
const contents = messages.map((msg) => ({
|
|
||||||
role: msg.role === 'assistant' ? 'model' : 'user',
|
|
||||||
parts: [{ text: msg.content }],
|
|
||||||
}))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
system_instruction: {
|
model: LLM_MODEL,
|
||||||
parts: [{ text: systemPrompt }],
|
stream: true,
|
||||||
},
|
|
||||||
contents,
|
|
||||||
generationConfig: {
|
|
||||||
temperature: 0.7,
|
temperature: 0.7,
|
||||||
maxOutputTokens: 512,
|
max_tokens: 512,
|
||||||
},
|
messages: [
|
||||||
|
{ role: 'system', content: systemPrompt },
|
||||||
|
...messages.map((msg) => ({
|
||||||
|
role: msg.role,
|
||||||
|
content: msg.content,
|
||||||
|
})),
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,23 +65,25 @@ export async function* sendChatMessage(
|
|||||||
): AsyncGenerator<string> {
|
): AsyncGenerator<string> {
|
||||||
const apiKey = getApiKey()
|
const apiKey = getApiKey()
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
throw new Error('Gemini API key not configured')
|
throw new Error('LLM API key not configured')
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemPrompt = buildSystemPrompt()
|
const systemPrompt = buildSystemPrompt()
|
||||||
const body = buildRequestBody(messages, systemPrompt)
|
const body = buildRequestBody(messages, systemPrompt)
|
||||||
|
|
||||||
const response = await fetch(
|
const response = await fetch(OPENROUTER_API_URL, {
|
||||||
`${GEMINI_API_BASE}:streamGenerateContent?alt=sse&key=${apiKey}`,
|
|
||||||
{
|
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: {
|
||||||
body: JSON.stringify(body),
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${apiKey}`,
|
||||||
|
'HTTP-Referer': window.location.origin,
|
||||||
|
'X-Title': 'Andy Charlwood Portfolio',
|
||||||
},
|
},
|
||||||
)
|
body: JSON.stringify(body),
|
||||||
|
})
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(`Gemini API error: ${response.status}`)
|
throw new Error(`LLM API error: ${response.status}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
const reader = response.body?.getReader()
|
const reader = response.body?.getReader()
|
||||||
@@ -102,7 +102,6 @@ export async function* sendChatMessage(
|
|||||||
buffer += decoder.decode(value, { stream: true })
|
buffer += decoder.decode(value, { stream: true })
|
||||||
|
|
||||||
const lines = buffer.split('\n')
|
const lines = buffer.split('\n')
|
||||||
// Keep the last potentially incomplete line in the buffer
|
|
||||||
buffer = lines.pop() ?? ''
|
buffer = lines.pop() ?? ''
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
@@ -114,7 +113,7 @@ export async function* sendChatMessage(
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(jsonStr)
|
const parsed = JSON.parse(jsonStr)
|
||||||
const text = parsed?.candidates?.[0]?.content?.parts?.[0]?.text
|
const text = parsed?.choices?.[0]?.delta?.content
|
||||||
if (text) {
|
if (text) {
|
||||||
yield text
|
yield text
|
||||||
}
|
}
|
||||||
@@ -130,7 +129,7 @@ export async function* sendChatMessage(
|
|||||||
if (jsonStr && jsonStr !== '[DONE]') {
|
if (jsonStr && jsonStr !== '[DONE]') {
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(jsonStr)
|
const parsed = JSON.parse(jsonStr)
|
||||||
const text = parsed?.candidates?.[0]?.content?.parts?.[0]?.text
|
const text = parsed?.choices?.[0]?.delta?.content
|
||||||
if (text) {
|
if (text) {
|
||||||
yield text
|
yield text
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user