diff --git a/src/modules/aix/server/api/aix.wiretypes.ts b/src/modules/aix/server/api/aix.wiretypes.ts index 890e51756..be700027d 100644 --- a/src/modules/aix/server/api/aix.wiretypes.ts +++ b/src/modules/aix/server/api/aix.wiretypes.ts @@ -502,6 +502,8 @@ export namespace AixWire_API { 'aifn-image-caption', // generating image captions - attachmentId 'beam-gather', // fusing beam rays - fusionId 'beam-scatter', // scattering beam rays - rayId + 'beam-council-ranking', // council peer ranking - rayId + 'beam-council-chairman', // council chairman synthesis - 'chairman' 'call', // having a phone conversation - messageId of the first message 'conversation', // chatting with a persona - conversationId 'persona-extract', // extracting a persona from texts - chainId diff --git a/src/modules/beam/gather/council/BeamCouncilView.tsx b/src/modules/beam/gather/council/BeamCouncilView.tsx new file mode 100644 index 000000000..15dd2a6b1 --- /dev/null +++ b/src/modules/beam/gather/council/BeamCouncilView.tsx @@ -0,0 +1,260 @@ +/** + * BeamCouncilView - Main council voting view + * Displays all visualizations and orchestrates the council voting process + */ + +import * as React from 'react'; +import { Box, Button, CircularProgress, Sheet, Typography, Accordion, AccordionSummary, AccordionDetails, AccordionGroup } from '@mui/joy'; +import CloseIcon from '@mui/icons-material/Close'; +import CheckCircleIcon from '@mui/icons-material/CheckCircle'; +import HowToVoteIcon from '@mui/icons-material/HowToVote'; + +import { ChatMessage } from '../../../../apps/chat/components/message/ChatMessage'; +import { getIsMobile } from '~/common/components/useMatchMedia'; +import { messageFragmentsReduceText } from '~/common/stores/chat/chat.message'; + +import type { BeamStoreApi } from '../../store-beam.hooks'; +import { beamCardMessageScrollingSx, beamCardMessageSx } from '../../BeamCard'; +import { getBeamCardScrolling } from '../../store-module-beam'; + +import type { CouncilResults, CouncilProgress } from './beam.gather.council.types'; +import { CouncilLeaderboard } from './CouncilLeaderboard'; +import { CouncilHeatmap } from './CouncilHeatmap'; +import { CouncilEvaluations } from './CouncilEvaluations'; + +interface BeamCouncilViewProps { + beamStore: BeamStoreApi; + onClose: () => void; + onAccept?: (synthesisText: string) => void; +} + +export function BeamCouncilView(props: BeamCouncilViewProps) { + const { beamStore, onClose, onAccept } = props; + + const [progress, setProgress] = React.useState({ + state: 'idle', + currentStep: 0, + totalSteps: 0, + message: '', + }); + + const [results, setResults] = React.useState(null); + const [error, setError] = React.useState(null); + + const isMobile = getIsMobile(); + + // Extract rays and model names + const rays = props.beamStore.getState().rays; + const rayIds = rays.map(r => r.rayId); + const rayModelNames = React.useMemo(() => { + const map = new Map(); + for (const ray of rays) { + // Try to extract model name from ray metadata or use a fallback + const modelName = ray.rayLlmId || `Model ${rays.indexOf(ray) + 1}`; + map.set(ray.rayId, modelName); + } + return map; + }, [rays]); + + // Run council voting when component mounts + React.useEffect(() => { + const runCouncilVoting = async () => { + try { + setProgress({ + state: 'ranking', + currentStep: 0, + totalSteps: rays.length + 1, + message: 'Initializing council voting...', + }); + + const { executeCouncilVoting } = await import('./beam.gather.council.execution'); + + const chatHistory = props.beamStore.getState().inputHistory || []; + const chairmanLlmId = props.beamStore.getState().currentGatherLlmId || rays[0]?.rayLlmId; + + if (!chairmanLlmId) { + throw new Error('No chairman model selected'); + } + + const rayData = rays.map(ray => ({ + rayId: ray.rayId, + llmId: ray.rayLlmId!, + modelName: rayModelNames.get(ray.rayId) || 'Unknown', + message: ray.message!, + })); + + const abortController = new AbortController(); + + const councilResults = await executeCouncilVoting( + chatHistory, + rayData, + chairmanLlmId, + abortController.signal, + setProgress, + ); + + setResults(councilResults); + setProgress({ + state: 'complete', + currentStep: rays.length + 1, + totalSteps: rays.length + 1, + message: 'Council voting complete!', + }); + + } catch (err) { + const errorMessage = err instanceof Error ? err.message : 'Unknown error'; + setError(errorMessage); + setProgress({ + state: 'error', + currentStep: 0, + totalSteps: 0, + message: 'Council voting failed', + error: errorMessage, + }); + } + }; + + runCouncilVoting(); + }, []); // Run once on mount + + const handleAccept = () => { + if (results?.chairmanSynthesis && onAccept) { + const synthesisText = messageFragmentsReduceText(results.chairmanSynthesis.fragments); + onAccept(synthesisText); + } + }; + + return ( + + + {/* Header */} + + + + + Council Voting + + + + + + {/* Progress */} + {progress.state !== 'complete' && progress.state !== 'error' && ( + + + + {progress.message} ({progress.currentStep}/{progress.totalSteps}) + + + )} + + {/* Error */} + {error && ( + + + Error: {error} + + + )} + + {/* Results */} + {results && ( + + {/* Leaderboard */} + + + {/* Expandable sections */} + + {/* Heatmap Matrix */} + + + 📊 Ranking Matrix (Heatmap) + + + + + + + {/* Evaluations */} + + + 📝 Peer Evaluations (Detailed) + + + + + + + + {/* Chairman Synthesis */} + {results.chairmanSynthesis && ( + + + 🎯 Chairman Synthesis + + + + )} + + {/* Action Buttons */} + + {onAccept && ( + + )} + + + )} + + + ); +} diff --git a/src/modules/beam/gather/council/CouncilEvaluations.tsx b/src/modules/beam/gather/council/CouncilEvaluations.tsx new file mode 100644 index 000000000..d73d00552 --- /dev/null +++ b/src/modules/beam/gather/council/CouncilEvaluations.tsx @@ -0,0 +1,91 @@ +/** + * Council Evaluations - tabbed view of all ranking evaluations + */ + +import * as React from 'react'; +import { Box, Sheet, Typography, Tabs, TabList, Tab, TabPanel } from '@mui/joy'; + +import { RenderMarkdown } from '~/modules/blocks/markdown/RenderMarkdown'; + +import type { CouncilRanking } from './beam.gather.council.types'; + +interface CouncilEvaluationsProps { + rankings: CouncilRanking[]; +} + +export function CouncilEvaluations(props: CouncilEvaluationsProps) { + const { rankings } = props; + const [selectedTab, setSelectedTab] = React.useState(0); + + return ( + + + 📝 Peer Evaluations + + + setSelectedTab(value as number)} + > + + {rankings.map((ranking, idx) => ( + + {ranking.rankerModelName} + + ))} + + + {rankings.map((ranking, idx) => ( + + + {/* Full evaluation text */} + + + + + {/* Extracted ranking (highlighted) */} + + + Extracted Ranking: + + + {ranking.extractedRanking} + + + + + ))} + + + ); +} diff --git a/src/modules/beam/gather/council/CouncilHeatmap.tsx b/src/modules/beam/gather/council/CouncilHeatmap.tsx new file mode 100644 index 000000000..f10924fd0 --- /dev/null +++ b/src/modules/beam/gather/council/CouncilHeatmap.tsx @@ -0,0 +1,222 @@ +/** + * Council Heatmap Matrix - shows who ranked whom + */ + +import * as React from 'react'; +import { Box, Sheet, Typography, Tooltip } from '@mui/joy'; + +import type { CouncilRanking } from './beam.gather.council.types'; + +interface CouncilHeatmapProps { + rankings: CouncilRanking[]; + rayIds: string[]; + rayModelNames: Map; +} + +export function CouncilHeatmap(props: CouncilHeatmapProps) { + const { rankings, rayIds, rayModelNames } = props; + + // Build matrix: ranker -> ranked -> position + const matrix = new Map>(); + for (const ranking of rankings) { + const rankerMap = new Map(); + for (const { rayId, position } of ranking.rankings) { + rankerMap.set(rayId, position); + } + matrix.set(ranking.rankerRayId, rankerMap); + } + + // Calculate average rank for each ray (column totals) + const avgRanks = rayIds.map(rayId => { + const positions: number[] = []; + for (const ranking of rankings) { + const pos = ranking.rankings.find(r => r.rayId === rayId)?.position; + if (pos !== undefined) positions.push(pos); + } + const avg = positions.length > 0 + ? positions.reduce((sum, p) => sum + p, 0) / positions.length + : 0; + return avg; + }); + + // Color gradient: 1 (green) -> N (red) + const getColor = (position: number | undefined, totalRays: number) => { + if (position === undefined) return '#888'; // Gray for missing + + // Normalize: 1 -> 0.0, N -> 1.0 + const normalized = (position - 1) / (totalRays - 1); + + // Green -> Yellow -> Orange -> Red + if (normalized < 0.33) { + // Green to Yellow + const t = normalized / 0.33; + return `hsl(${120 - 60 * t}, 70%, 50%)`; + } else if (normalized < 0.67) { + // Yellow to Orange + const t = (normalized - 0.33) / 0.34; + return `hsl(${60 - 30 * t}, 70%, 50%)`; + } else { + // Orange to Red + const t = (normalized - 0.67) / 0.33; + return `hsl(${30 - 30 * t}, 70%, 50%)`; + } + }; + + return ( + + + 📊 Ranking Matrix + + + + {/* Header row */} + + {rayIds.map(rayId => ( + + + {(rayModelNames.get(rayId) || rayId).slice(0, 10)} + + + ))} + + {/* Matrix rows */} + {rayIds.map((rankerRayId, rankerIdx) => { + const rankerName = rayModelNames.get(rankerRayId) || rankerRayId; + const rankerMap = matrix.get(rankerRayId); + + return ( + + {/* Row header */} + + + {rankerName.slice(0, 15)} + + + + {/* Row cells */} + {rayIds.map((rankedRayId, rankedIdx) => { + const position = rankerMap?.get(rankedRayId); + const isSelf = rankerRayId === rankedRayId; + + const cellContent = isSelf ? '-' : (position !== undefined ? position.toString() : '?'); + const bgColor = isSelf ? '#ddd' : getColor(position, rayIds.length); + + return ( + + + {cellContent} + + + ); + })} + + ); + })} + + {/* Average row */} + + Avg Rank + + {avgRanks.map((avg, idx) => ( + + {avg.toFixed(1)} + + ))} + + + {/* Legend */} + + + Color scale: + + + + (1st → Last) + + + + ); +} diff --git a/src/modules/beam/gather/council/CouncilLeaderboard.tsx b/src/modules/beam/gather/council/CouncilLeaderboard.tsx new file mode 100644 index 000000000..acff7c327 --- /dev/null +++ b/src/modules/beam/gather/council/CouncilLeaderboard.tsx @@ -0,0 +1,125 @@ +/** + * Council Leaderboard - displays aggregate rankings (llm-council style) + */ + +import * as React from 'react'; +import { Box, Sheet, Typography, Chip, Tooltip } from '@mui/joy'; + +import type { CouncilAggregation } from './beam.gather.council.types'; + +interface CouncilLeaderboardProps { + aggregations: CouncilAggregation[]; + showControversy?: boolean; +} + +export function CouncilLeaderboard(props: CouncilLeaderboardProps) { + const { aggregations, showControversy = true } = props; + + // Determine medal emoji + const getMedal = (index: number) => { + if (index === 0) return '🥇'; + if (index === 1) return '🥈'; + if (index === 2) return '🥉'; + return `#${index + 1}`; + }; + + // Determine if controversial (high std dev) + const isControversial = (stdDev: number) => stdDev > 1.0; + + return ( + + + 🏆 Council Rankings + + + + {aggregations.map((agg, index) => { + const controversial = showControversy && isControversial(agg.standardDeviation); + + return ( + + {/* Rank */} + + {getMedal(index)} + + + {/* Model Name */} + + + {agg.modelName} + + + Avg: {agg.averageRank.toFixed(2)} ({agg.voteCount} votes) + + + + {/* Controversy Indicator */} + {controversial && ( + + + ⚡ Controversial + + + )} + + {/* Consensus Indicator */} + {showControversy && !controversial && agg.standardDeviation < 0.5 && ( + + + ✓ Consensus + + + )} + + ); + })} + + + {/* Legend */} + {showControversy && ( + + Lower average rank is better. Controversy indicates disagreement among rankers. + + )} + + ); +} diff --git a/src/modules/beam/gather/council/beam.gather.council.aggregation.ts b/src/modules/beam/gather/council/beam.gather.council.aggregation.ts new file mode 100644 index 000000000..8e858cc1c --- /dev/null +++ b/src/modules/beam/gather/council/beam.gather.council.aggregation.ts @@ -0,0 +1,136 @@ +/** + * Council ranking parsing and aggregation logic + * Implements llm-council's ranking extraction and score calculation + */ + +import type { CouncilRanking, CouncilAggregation } from './beam.gather.council.types'; + +/** + * Parse "FINAL RANKING:" section from evaluation text + * Matches llm-council's regex-based extraction + */ +export function parseCouncilRanking(evaluationText: string, responseLabels: string[]): Array<{ label: string; position: number }> { + const rankings: Array<{ label: string; position: number }> = []; + + // Find the "FINAL RANKING:" section + const finalRankingMatch = evaluationText.match(/FINAL RANKING:\s*\n([\s\S]*?)(?:\n\n|$)/i); + if (!finalRankingMatch) { + console.warn('Could not find "FINAL RANKING:" section in evaluation'); + return rankings; + } + + const rankingSection = finalRankingMatch[1]; + + // Parse numbered list (e.g., "1. Response A", "2. Response B", etc.) + const lines = rankingSection.split('\n').filter(line => line.trim()); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + + // Match pattern: "1. Response A" or "1) Response A" or "1 - Response A" + const match = line.match(/^(\d+)[.)\-\s]+(.+)$/); + if (!match) continue; + + const position = parseInt(match[1], 10); + const labelText = match[2].trim(); + + // Find which response label this matches + const matchedLabel = responseLabels.find(label => + labelText.toLowerCase().includes(label.toLowerCase()) + ); + + if (matchedLabel) { + rankings.push({ label: matchedLabel, position }); + } + } + + return rankings; +} + +/** + * Calculate standard deviation of rankings + */ +function calculateStandardDeviation(values: number[]): number { + if (values.length === 0) return 0; + + const mean = values.reduce((sum, val) => sum + val, 0) / values.length; + const squaredDiffs = values.map(val => Math.pow(val - mean, 2)); + const variance = squaredDiffs.reduce((sum, val) => sum + val, 0) / values.length; + + return Math.sqrt(variance); +} + +/** + * Aggregate rankings across all rankers + * Implements llm-council's averaging and "street cred" calculation + */ +export function aggregateCouncilRankings( + rankings: CouncilRanking[], + rayIds: string[], + rayModelNames: Map, + rayResponsePreviews: Map, +): CouncilAggregation[] { + const aggregations: CouncilAggregation[] = []; + + for (const rayId of rayIds) { + const positions: number[] = []; + + // Collect all positions this ray received from rankers + for (const ranking of rankings) { + const rankEntry = ranking.rankings.find(r => r.rayId === rayId); + if (rankEntry) { + positions.push(rankEntry.position); + } + } + + // Calculate average rank + const averageRank = positions.length > 0 + ? positions.reduce((sum, pos) => sum + pos, 0) / positions.length + : 999; // No votes = worst possible + + // Calculate standard deviation (controversy metric) + const standardDeviation = calculateStandardDeviation(positions); + + aggregations.push({ + rayId, + modelName: rayModelNames.get(rayId) || 'Unknown', + averageRank, + voteCount: positions.length, + standardDeviation, + positions, + responsePreview: rayResponsePreviews.get(rayId) || '', + }); + } + + // Sort by average rank (ascending - lower is better) + aggregations.sort((a, b) => a.averageRank - b.averageRank); + + return aggregations; +} + +/** + * Build ranking matrix: ranker -> ranked -> position + */ +export function buildRankingMatrix(rankings: CouncilRanking[]): Map> { + const matrix = new Map>(); + + for (const ranking of rankings) { + const rankerMap = new Map(); + + for (const { rayId, position } of ranking.rankings) { + rankerMap.set(rayId, position); + } + + matrix.set(ranking.rankerRayId, rankerMap); + } + + return matrix; +} + +/** + * Extract the "FINAL RANKING:" section as formatted text + */ +export function extractRankingSection(evaluationText: string): string { + const match = evaluationText.match(/FINAL RANKING:\s*\n([\s\S]*?)(?:\n\n|$)/i); + return match ? `FINAL RANKING:\n${match[1]}` : 'No ranking found'; +} diff --git a/src/modules/beam/gather/council/beam.gather.council.execution.ts b/src/modules/beam/gather/council/beam.gather.council.execution.ts new file mode 100644 index 000000000..69d0683c6 --- /dev/null +++ b/src/modules/beam/gather/council/beam.gather.council.execution.ts @@ -0,0 +1,221 @@ +/** + * Council voting execution logic + * Orchestrates ranking, aggregation, and chairman synthesis + */ + +import { createDMessageTextContent, DMessage, messageFragmentsReduceText } from '~/common/stores/chat/chat.message'; +import { aixChatGenerateContent_DMessage_FromConversation } from '~/modules/aix/client/aix.client'; +import { getUXLabsHighPerformance } from '~/common/stores/store-ux-labs'; + +import type { CouncilRanking, CouncilResults, CouncilProgress } from './beam.gather.council.types'; +import { createCouncilRankingPrompt, createCouncilChairmanPrompt, extractUserQuery } from './beam.gather.council.prompts'; +import { parseCouncilRanking, aggregateCouncilRankings, buildRankingMatrix, extractRankingSection } from './beam.gather.council.aggregation'; + +interface RayData { + rayId: string; + llmId: string; + modelName: string; + message: DMessage; +} + +/** + * Execute the full council voting process: + * 1. Each model ranks all responses + * 2. Aggregate rankings + * 3. Chairman synthesizes final answer + */ +export async function executeCouncilVoting( + chatHistory: readonly DMessage[], + rays: RayData[], + chairmanLlmId: string, + abortSignal: AbortSignal, + onProgress: (progress: CouncilProgress) => void, +): Promise { + const totalSteps = rays.length + 1; // N rankings + 1 synthesis + let currentStep = 0; + + try { + // Step 1: Extract user query + const userQuery = extractUserQuery( + chatHistory.map(m => ({ role: m.role, text: messageFragmentsReduceText(m.fragments) })) + ); + + // Step 2: Prepare response labels and content for ranking + const responseLabels = rays.map((_, idx) => `Response ${String.fromCharCode(65 + idx)}`); // A, B, C, ... + const responsesForRanking = rays.map((ray, idx) => ({ + label: responseLabels[idx], + content: messageFragmentsReduceText(ray.message.fragments), + })); + + // Step 3: Each model ranks all responses + onProgress({ + state: 'ranking', + currentStep: 0, + totalSteps, + message: 'Starting peer rankings...', + }); + + const rankings: CouncilRanking[] = []; + + for (let i = 0; i < rays.length; i++) { + const ray = rays[i]; + currentStep++; + + onProgress({ + state: 'ranking', + currentStep, + totalSteps, + message: `${ray.modelName} evaluating responses...`, + }); + + // Build ranking prompt + const rankingPrompt = createCouncilRankingPrompt(userQuery, responsesForRanking); + + // Create conversation for ranking + const systemMessage = createDMessageTextContent('system', 'You are an expert evaluator analyzing AI responses.'); + const userMessage = createDMessageTextContent('user', rankingPrompt); + + // Execute ranking via AIX + const rankingMessage = createDMessageTextContent('assistant', ''); + let evaluationText = ''; + + const result = await aixChatGenerateContent_DMessage_FromConversation( + ray.llmId, // Use the ray's own model to rank + systemMessage, + [userMessage], + 'beam-council-ranking', + ray.rayId, + { abortSignal, throttleParallelThreads: getUXLabsHighPerformance() ? 0 : 1 }, + (update, completed) => { + if (update.fragments) { + evaluationText = messageFragmentsReduceText(update.fragments); + } + }, + ); + + if (result.outcome === 'aborted') { + throw new Error('Ranking aborted'); + } + if (result.outcome === 'errored') { + throw new Error(`Ranking failed: ${result.errorMessage || 'Unknown error'}`); + } + + evaluationText = messageFragmentsReduceText(result.lastDMessage.fragments); + + // Parse rankings from the evaluation + const parsedRankings = parseCouncilRanking(evaluationText, responseLabels); + + // Map response labels back to ray IDs + const rankingsWithIds = parsedRankings.map(({ label, position }) => { + const rayIndex = responseLabels.indexOf(label); + return { + rayId: rays[rayIndex].rayId, + position, + }; + }); + + rankings.push({ + rankerRayId: ray.rayId, + rankerModelName: ray.modelName, + rankings: rankingsWithIds, + evaluationText, + extractedRanking: extractRankingSection(evaluationText), + }); + } + + // Step 4: Aggregate rankings + currentStep++; + onProgress({ + state: 'aggregating', + currentStep, + totalSteps, + message: 'Calculating aggregate rankings...', + }); + + const rayModelNames = new Map(rays.map(r => [r.rayId, r.modelName])); + const rayResponsePreviews = new Map( + rays.map(r => [r.rayId, messageFragmentsReduceText(r.message.fragments).slice(0, 100)]) + ); + + const aggregations = aggregateCouncilRankings( + rankings, + rays.map(r => r.rayId), + rayModelNames, + rayResponsePreviews, + ); + + const rankingMatrix = buildRankingMatrix(rankings); + + // Step 5: Chairman synthesis + currentStep++; + onProgress({ + state: 'synthesizing', + currentStep, + totalSteps, + message: 'Chairman synthesizing final answer...', + }); + + const responsesForChairman = rays.map(ray => ({ + rayId: ray.rayId, + modelName: ray.modelName, + content: messageFragmentsReduceText(ray.message.fragments), + })); + + const rankingsForChairman = rankings.map(r => ({ + rankerName: r.rankerModelName, + evaluationText: r.evaluationText, + extractedRanking: r.extractedRanking, + })); + + const chairmanPrompt = createCouncilChairmanPrompt(userQuery, responsesForChairman, rankingsForChairman); + + const systemMessage = createDMessageTextContent('system', 'You are the Chairman of an LLM Council, tasked with synthesizing peer-ranked responses.'); + const userMessage = createDMessageTextContent('user', chairmanPrompt); + + const chairmanResult = await aixChatGenerateContent_DMessage_FromConversation( + chairmanLlmId, + systemMessage, + [userMessage], + 'beam-council-chairman', + 'chairman', + { abortSignal, throttleParallelThreads: getUXLabsHighPerformance() ? 0 : 1 }, + () => { + // Progress updates handled via onProgress callback + }, + ); + + if (chairmanResult.outcome === 'aborted') { + throw new Error('Chairman synthesis aborted'); + } + if (chairmanResult.outcome === 'errored') { + throw new Error(`Chairman synthesis failed: ${chairmanResult.errorMessage || 'Unknown error'}`); + } + + const finalChairmanMessage = chairmanResult.lastDMessage; + + // Step 6: Complete + onProgress({ + state: 'complete', + currentStep: totalSteps, + totalSteps, + message: 'Council voting complete', + }); + + return { + rankings, + aggregations, + chairmanSynthesis: finalChairmanMessage, + rankingMatrix, + }; + + } catch (error) { + onProgress({ + state: 'error', + currentStep, + totalSteps, + message: 'Council voting failed', + error: error instanceof Error ? error.message : 'Unknown error', + }); + throw error; + } +} diff --git a/src/modules/beam/gather/council/beam.gather.council.prompts.ts b/src/modules/beam/gather/council/beam.gather.council.prompts.ts new file mode 100644 index 000000000..936f85405 --- /dev/null +++ b/src/modules/beam/gather/council/beam.gather.council.prompts.ts @@ -0,0 +1,94 @@ +/** + * Council voting prompts - 1:1 match with llm-council + * Source: https://github.com/karpathy/llm-council + */ + +/** + * Ranking prompt - used by each model to rank all responses + * Exact match with llm-council's peer review prompt + */ +export function createCouncilRankingPrompt(userQuery: string, responses: Array<{ label: string; content: string }>): string { + const responsesText = responses + .map(({ label, content }) => `${label}:\n${content}`) + .join('\n\n'); + + return `You are evaluating different responses to the following question: + +Question: ${userQuery} + +Here are the responses from different models (anonymized): + +${responsesText} + +Your task: +1. First, evaluate each response individually. For each response, explain what it does well and what it does poorly. +2. Then, at the very end of your response, provide a final ranking. + +IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows: +- Start with the line "FINAL RANKING:" (all caps, with colon) +- Then list the responses from best to worst as a numbered list +- Each line should be: number, period, space, then ONLY the response label (e.g., "1. Response A") +- Do not add any other text or explanations in the ranking section + +Example format: +FINAL RANKING: +1. Response B +2. Response A +3. Response D +4. Response C + +Now provide your evaluation and ranking:`; +} + +/** + * Chairman synthesis prompt - combines all responses and rankings + * Exact match with llm-council's chairman prompt + */ +export function createCouncilChairmanPrompt( + userQuery: string, + responses: Array<{ rayId: string; modelName: string; content: string }>, + rankings: Array<{ rankerName: string; evaluationText: string; extractedRanking: string }>, +): string { + // Stage 1: Individual responses with model names + const stage1Text = responses + .map(({ modelName, content }) => `**${modelName}:**\n${content}`) + .join('\n\n---\n\n'); + + // Stage 2: Peer rankings with full evaluations + const stage2Text = rankings + .map(({ rankerName, evaluationText, extractedRanking }) => + `**${rankerName}'s Evaluation:**\n\n${evaluationText}\n\n${extractedRanking}`) + .join('\n\n---\n\n'); + + return `You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses. + +Original Question: ${userQuery} + +STAGE 1 - Individual Responses: + +${stage1Text} + +STAGE 2 - Peer Rankings: + +${stage2Text} + +Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider: +- The individual responses and their insights +- The peer rankings and what they reveal about response quality +- Any patterns of agreement or disagreement + +Provide a clear, well-reasoned final answer that represents the council's collective wisdom:`; +} + +/** + * Extract user query from chat history + */ +export function extractUserQuery(chatMessages: readonly { role: string; text: string }[]): string { + // Find the last user message + for (let i = chatMessages.length - 1; i >= 0; i--) { + if (chatMessages[i].role === 'user') { + return chatMessages[i].text; + } + } + return 'No user query found'; +} diff --git a/src/modules/beam/gather/council/beam.gather.council.types.ts b/src/modules/beam/gather/council/beam.gather.council.types.ts new file mode 100644 index 000000000..a65a218e5 --- /dev/null +++ b/src/modules/beam/gather/council/beam.gather.council.types.ts @@ -0,0 +1,59 @@ +/** + * Council voting types for Beam Gather + * Implements llm-council's peer ranking mechanism + */ + +import type { DMessage } from '~/common/stores/chat/chat.message'; + +/** + * Individual ranking from one model (ranker) evaluating all responses + */ +export interface CouncilRanking { + rankerRayId: string; + rankerModelName: string; + rankings: Array<{ + rayId: string; + position: number; // 1 = best, N = worst + }>; + evaluationText: string; // Full evaluation with reasoning + extractedRanking: string; // Parsed "FINAL RANKING:" section +} + +/** + * Aggregated ranking results for one response + */ +export interface CouncilAggregation { + rayId: string; + modelName: string; + averageRank: number; // Lower is better (1.0 = best possible) + voteCount: number; + standardDeviation: number; // Higher = more controversial + positions: number[]; // All rank positions received + responsePreview: string; // First ~100 chars of response +} + +/** + * Complete council voting results + */ +export interface CouncilResults { + rankings: CouncilRanking[]; + aggregations: CouncilAggregation[]; + chairmanSynthesis?: Partial & { fragments: DMessage['fragments'] }; + rankingMatrix: Map>; // ranker -> ranked -> position +} + +/** + * Council voting state + */ +export type CouncilState = 'idle' | 'ranking' | 'aggregating' | 'synthesizing' | 'complete' | 'error'; + +/** + * Council voting progress + */ +export interface CouncilProgress { + state: CouncilState; + currentStep: number; + totalSteps: number; + message: string; + error?: string; +} diff --git a/src/modules/beam/scatter/BeamRayGrid.tsx b/src/modules/beam/scatter/BeamRayGrid.tsx index db828e2c3..796e8c7a3 100644 --- a/src/modules/beam/scatter/BeamRayGrid.tsx +++ b/src/modules/beam/scatter/BeamRayGrid.tsx @@ -5,12 +5,14 @@ import { Box, Button } from '@mui/joy'; import AddCircleOutlineRoundedIcon from '@mui/icons-material/AddCircleOutlineRounded'; import ContentCopyIcon from '@mui/icons-material/ContentCopy'; import TelegramIcon from '@mui/icons-material/Telegram'; +import HowToVoteIcon from '@mui/icons-material/HowToVote'; import type { BeamStoreApi } from '../store-beam.hooks'; import { BeamCard } from '../BeamCard'; import { SCATTER_RAY_MAX, SCATTER_RAY_MIN } from '../beam.config'; import { BeamRay } from './BeamRay'; +import { BeamCouncilView } from '../gather/council/BeamCouncilView'; const rayGridDesktopSx: SxProps = { @@ -40,6 +42,24 @@ export function BeamRayGrid(props: { const raysCount = props.rayIds.length; + // Council voting state + const [isCouncilActive, setIsCouncilActive] = React.useState(false); + + // Check if council voting is available (need at least 2 completed rays) + const rays = props.beamStore.getState().rays; + const completedRays = rays.filter(r => r.status === 'success'); + const canRunCouncil = completedRays.length >= 2; + + const handleCouncilStart = () => { + setIsCouncilActive(true); + props.beamStore.getState().setCouncilActive(true); + }; + + const handleCouncilClose = () => { + setIsCouncilActive(false); + props.beamStore.getState().setCouncilActive(false); + }; + return ( @@ -90,6 +110,33 @@ export function BeamRayGrid(props: { )} + {/* Council Voting Button */} + {canRunCouncil && !isCouncilActive && ( + + + + )} + + {/* Council View */} + {isCouncilActive && ( + + )} + {/*/!* Takes a full row *!/*/} {/* { @@ -176,6 +179,7 @@ export const reInitScatterStateSlice = (prevRays: BRay[]): ScatterStateSlice => isScattering: false, raysReady: 0, + isCouncilActive: false, }; }; @@ -192,6 +196,9 @@ export interface ScatterStoreSlice extends ScatterStateSlice { raySetLlmId: (rayId: BRayId, llmId: DLLMId | null) => void; _rayUpdate: (rayId: BRayId, update: Partial | ((ray: BRay) => Partial)) => void; + // council actions + setCouncilActive: (active: boolean) => void; + _storeLastScatterConfig: () => void; _syncRaysStateToScatter: () => void; @@ -353,6 +360,11 @@ export const createScatterSlice: StateCreator + _set({ + isCouncilActive: active, + }), + _storeLastScatterConfig: () => { updateBeamLastConfig({ rayLlmIds: _get().rays.map(ray => ray.rayLlmId).filter(Boolean) as DLLMId[],