Compare commits

...

1 Commits

Author SHA1 Message Date
claude[bot] 7de7a6bd46 feat: Implement council voting with all graphical options for Beam
- Add complete council voting implementation matching llm-council exactly
- Core logic: ranking, aggregation, chairman synthesis
- UI components: leaderboard, heatmap matrix, tabbed evaluations
- Controversy indicators with standard deviation highlighting
- Integration: 'Run Council Vote' button in RayGrid
- Wire up state management in BeamStore
- Add AIX context names for council operations

Implements 1:1 llm-council prompts and algorithm:
- Stage 1: Scatter (N models generate responses)
- Stage 2: Peer ranking (everyone ranks everyone)
- Stage 3: Chairman synthesis (weighted by rankings)

Visualizations:
- Leaderboard with consensus/controversy indicators
- Interactive heatmap matrix (color-coded rankings)
- Tabbed peer evaluations with extracted rankings
- Expandable accordions for detailed views

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: Enrico Ros <enricoros@users.noreply.github.com>
2025-11-23 21:06:40 +00:00
11 changed files with 1269 additions and 0 deletions
@@ -502,6 +502,8 @@ export namespace AixWire_API {
'aifn-image-caption', // generating image captions - attachmentId
'beam-gather', // fusing beam rays - fusionId
'beam-scatter', // scattering beam rays - rayId
'beam-council-ranking', // council peer ranking - rayId
'beam-council-chairman', // council chairman synthesis - 'chairman'
'call', // having a phone conversation - messageId of the first message
'conversation', // chatting with a persona - conversationId
'persona-extract', // extracting a persona from texts - chainId
@@ -0,0 +1,260 @@
/**
* BeamCouncilView - Main council voting view
* Displays all visualizations and orchestrates the council voting process
*/
import * as React from 'react';
import { Box, Button, CircularProgress, Sheet, Typography, Accordion, AccordionSummary, AccordionDetails, AccordionGroup } from '@mui/joy';
import CloseIcon from '@mui/icons-material/Close';
import CheckCircleIcon from '@mui/icons-material/CheckCircle';
import HowToVoteIcon from '@mui/icons-material/HowToVote';
import { ChatMessage } from '../../../../apps/chat/components/message/ChatMessage';
import { getIsMobile } from '~/common/components/useMatchMedia';
import { messageFragmentsReduceText } from '~/common/stores/chat/chat.message';
import type { BeamStoreApi } from '../../store-beam.hooks';
import { beamCardMessageScrollingSx, beamCardMessageSx } from '../../BeamCard';
import { getBeamCardScrolling } from '../../store-module-beam';
import type { CouncilResults, CouncilProgress } from './beam.gather.council.types';
import { CouncilLeaderboard } from './CouncilLeaderboard';
import { CouncilHeatmap } from './CouncilHeatmap';
import { CouncilEvaluations } from './CouncilEvaluations';
interface BeamCouncilViewProps {
beamStore: BeamStoreApi;
onClose: () => void;
onAccept?: (synthesisText: string) => void;
}
export function BeamCouncilView(props: BeamCouncilViewProps) {
const { beamStore, onClose, onAccept } = props;
const [progress, setProgress] = React.useState<CouncilProgress>({
state: 'idle',
currentStep: 0,
totalSteps: 0,
message: '',
});
const [results, setResults] = React.useState<CouncilResults | null>(null);
const [error, setError] = React.useState<string | null>(null);
const isMobile = getIsMobile();
// Extract rays and model names
const rays = props.beamStore.getState().rays;
const rayIds = rays.map(r => r.rayId);
const rayModelNames = React.useMemo(() => {
const map = new Map<string, string>();
for (const ray of rays) {
// Try to extract model name from ray metadata or use a fallback
const modelName = ray.rayLlmId || `Model ${rays.indexOf(ray) + 1}`;
map.set(ray.rayId, modelName);
}
return map;
}, [rays]);
// Run council voting when component mounts
React.useEffect(() => {
const runCouncilVoting = async () => {
try {
setProgress({
state: 'ranking',
currentStep: 0,
totalSteps: rays.length + 1,
message: 'Initializing council voting...',
});
const { executeCouncilVoting } = await import('./beam.gather.council.execution');
const chatHistory = props.beamStore.getState().inputHistory || [];
const chairmanLlmId = props.beamStore.getState().currentGatherLlmId || rays[0]?.rayLlmId;
if (!chairmanLlmId) {
throw new Error('No chairman model selected');
}
const rayData = rays.map(ray => ({
rayId: ray.rayId,
llmId: ray.rayLlmId!,
modelName: rayModelNames.get(ray.rayId) || 'Unknown',
message: ray.message!,
}));
const abortController = new AbortController();
const councilResults = await executeCouncilVoting(
chatHistory,
rayData,
chairmanLlmId,
abortController.signal,
setProgress,
);
setResults(councilResults);
setProgress({
state: 'complete',
currentStep: rays.length + 1,
totalSteps: rays.length + 1,
message: 'Council voting complete!',
});
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Unknown error';
setError(errorMessage);
setProgress({
state: 'error',
currentStep: 0,
totalSteps: 0,
message: 'Council voting failed',
error: errorMessage,
});
}
};
runCouncilVoting();
}, []); // Run once on mount
const handleAccept = () => {
if (results?.chairmanSynthesis && onAccept) {
const synthesisText = messageFragmentsReduceText(results.chairmanSynthesis.fragments);
onAccept(synthesisText);
}
};
return (
<Box
sx={{
gridColumn: '1 / -1',
mt: 2,
mb: 2,
}}
>
<Sheet
variant='outlined'
sx={{
borderRadius: 'lg',
p: 3,
backgroundColor: 'background.surface',
}}
>
{/* Header */}
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', mb: 3 }}>
<Box sx={{ display: 'flex', alignItems: 'center', gap: 2 }}>
<HowToVoteIcon sx={{ fontSize: '2rem', color: 'primary.solidBg' }} />
<Typography level='h3'>
Council Voting
</Typography>
</Box>
<Button
size='sm'
variant='plain'
color='neutral'
onClick={onClose}
startDecorator={<CloseIcon />}
>
Close
</Button>
</Box>
{/* Progress */}
{progress.state !== 'complete' && progress.state !== 'error' && (
<Box sx={{ display: 'flex', alignItems: 'center', gap: 2, mb: 3 }}>
<CircularProgress size='sm' />
<Typography level='body-sm'>
{progress.message} ({progress.currentStep}/{progress.totalSteps})
</Typography>
</Box>
)}
{/* Error */}
{error && (
<Box
sx={{
p: 2,
backgroundColor: 'danger.softBg',
borderRadius: 'md',
mb: 3,
}}
>
<Typography level='body-sm' sx={{ color: 'danger.solidColor' }}>
Error: {error}
</Typography>
</Box>
)}
{/* Results */}
{results && (
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 3 }}>
{/* Leaderboard */}
<CouncilLeaderboard
aggregations={results.aggregations}
showControversy={true}
/>
{/* Expandable sections */}
<AccordionGroup>
{/* Heatmap Matrix */}
<Accordion>
<AccordionSummary>
<Typography level='title-sm'>📊 Ranking Matrix (Heatmap)</Typography>
</AccordionSummary>
<AccordionDetails>
<CouncilHeatmap
rankings={results.rankings}
rayIds={rayIds}
rayModelNames={rayModelNames}
/>
</AccordionDetails>
</Accordion>
{/* Evaluations */}
<Accordion>
<AccordionSummary>
<Typography level='title-sm'>📝 Peer Evaluations (Detailed)</Typography>
</AccordionSummary>
<AccordionDetails>
<CouncilEvaluations rankings={results.rankings} />
</AccordionDetails>
</Accordion>
</AccordionGroup>
{/* Chairman Synthesis */}
{results.chairmanSynthesis && (
<Box>
<Typography level='title-md' sx={{ mb: 2 }}>
🎯 Chairman Synthesis
</Typography>
<ChatMessage
message={results.chairmanSynthesis as any}
fitScreen={isMobile}
isMobile={isMobile}
hideAvatar
adjustContentScaling={-1}
sx={!getBeamCardScrolling() ? beamCardMessageSx : beamCardMessageScrollingSx}
/>
</Box>
)}
{/* Action Buttons */}
<Box sx={{ display: 'flex', gap: 2, justifyContent: 'flex-end', mt: 2 }}>
{onAccept && (
<Button
size='lg'
variant='solid'
color='success'
onClick={handleAccept}
startDecorator={<CheckCircleIcon />}
>
Accept Synthesis
</Button>
)}
</Box>
</Box>
)}
</Sheet>
</Box>
);
}
@@ -0,0 +1,91 @@
/**
* Council Evaluations - tabbed view of all ranking evaluations
*/
import * as React from 'react';
import { Box, Sheet, Typography, Tabs, TabList, Tab, TabPanel } from '@mui/joy';
import { RenderMarkdown } from '~/modules/blocks/markdown/RenderMarkdown';
import type { CouncilRanking } from './beam.gather.council.types';
interface CouncilEvaluationsProps {
rankings: CouncilRanking[];
}
export function CouncilEvaluations(props: CouncilEvaluationsProps) {
const { rankings } = props;
const [selectedTab, setSelectedTab] = React.useState(0);
return (
<Sheet
variant='soft'
sx={{
borderRadius: 'md',
p: 2,
}}
>
<Typography level='title-md' sx={{ mb: 2 }}>
📝 Peer Evaluations
</Typography>
<Tabs
value={selectedTab}
onChange={(_, value) => setSelectedTab(value as number)}
>
<TabList>
{rankings.map((ranking, idx) => (
<Tab key={ranking.rankerRayId} value={idx}>
{ranking.rankerModelName}
</Tab>
))}
</TabList>
{rankings.map((ranking, idx) => (
<TabPanel key={ranking.rankerRayId} value={idx} sx={{ p: 2 }}>
<Box
sx={{
maxHeight: '400px',
overflowY: 'auto',
pr: 1,
}}
>
{/* Full evaluation text */}
<Box sx={{ mb: 3 }}>
<RenderMarkdown
content={ranking.evaluationText}
sx={{ fontSize: 'sm' }}
/>
</Box>
{/* Extracted ranking (highlighted) */}
<Box
sx={{
mt: 2,
p: 2,
backgroundColor: 'primary.softBg',
borderLeft: '3px solid',
borderColor: 'primary.solidBg',
borderRadius: 'sm',
}}
>
<Typography level='body-xs' sx={{ fontWeight: 'bold', mb: 1 }}>
Extracted Ranking:
</Typography>
<Typography
level='body-sm'
sx={{
fontFamily: 'monospace',
whiteSpace: 'pre-wrap',
}}
>
{ranking.extractedRanking}
</Typography>
</Box>
</Box>
</TabPanel>
))}
</Tabs>
</Sheet>
);
}
@@ -0,0 +1,222 @@
/**
* Council Heatmap Matrix - shows who ranked whom
*/
import * as React from 'react';
import { Box, Sheet, Typography, Tooltip } from '@mui/joy';
import type { CouncilRanking } from './beam.gather.council.types';
interface CouncilHeatmapProps {
rankings: CouncilRanking[];
rayIds: string[];
rayModelNames: Map<string, string>;
}
export function CouncilHeatmap(props: CouncilHeatmapProps) {
const { rankings, rayIds, rayModelNames } = props;
// Build matrix: ranker -> ranked -> position
const matrix = new Map<string, Map<string, number>>();
for (const ranking of rankings) {
const rankerMap = new Map<string, number>();
for (const { rayId, position } of ranking.rankings) {
rankerMap.set(rayId, position);
}
matrix.set(ranking.rankerRayId, rankerMap);
}
// Calculate average rank for each ray (column totals)
const avgRanks = rayIds.map(rayId => {
const positions: number[] = [];
for (const ranking of rankings) {
const pos = ranking.rankings.find(r => r.rayId === rayId)?.position;
if (pos !== undefined) positions.push(pos);
}
const avg = positions.length > 0
? positions.reduce((sum, p) => sum + p, 0) / positions.length
: 0;
return avg;
});
// Color gradient: 1 (green) -> N (red)
const getColor = (position: number | undefined, totalRays: number) => {
if (position === undefined) return '#888'; // Gray for missing
// Normalize: 1 -> 0.0, N -> 1.0
const normalized = (position - 1) / (totalRays - 1);
// Green -> Yellow -> Orange -> Red
if (normalized < 0.33) {
// Green to Yellow
const t = normalized / 0.33;
return `hsl(${120 - 60 * t}, 70%, 50%)`;
} else if (normalized < 0.67) {
// Yellow to Orange
const t = (normalized - 0.33) / 0.34;
return `hsl(${60 - 30 * t}, 70%, 50%)`;
} else {
// Orange to Red
const t = (normalized - 0.67) / 0.33;
return `hsl(${30 - 30 * t}, 70%, 50%)`;
}
};
return (
<Sheet
variant='soft'
sx={{
borderRadius: 'md',
p: 2,
overflowX: 'auto',
}}
>
<Typography level='title-md' sx={{ mb: 2 }}>
📊 Ranking Matrix
</Typography>
<Box
sx={{
display: 'grid',
gridTemplateColumns: `120px repeat(${rayIds.length}, 60px)`,
gap: 0.5,
fontSize: 'xs',
}}
>
{/* Header row */}
<Box />
{rayIds.map(rayId => (
<Tooltip key={rayId} title={rayModelNames.get(rayId) || rayId}>
<Box
sx={{
textAlign: 'center',
fontWeight: 'bold',
fontSize: '0.75rem',
transform: 'rotate(-45deg)',
transformOrigin: 'center',
whiteSpace: 'nowrap',
overflow: 'hidden',
textOverflow: 'ellipsis',
height: '60px',
display: 'flex',
alignItems: 'flex-end',
justifyContent: 'center',
}}
>
{(rayModelNames.get(rayId) || rayId).slice(0, 10)}
</Box>
</Tooltip>
))}
{/* Matrix rows */}
{rayIds.map((rankerRayId, rankerIdx) => {
const rankerName = rayModelNames.get(rankerRayId) || rankerRayId;
const rankerMap = matrix.get(rankerRayId);
return (
<React.Fragment key={rankerRayId}>
{/* Row header */}
<Tooltip title={rankerName}>
<Box
sx={{
fontWeight: 'bold',
fontSize: '0.75rem',
overflow: 'hidden',
textOverflow: 'ellipsis',
whiteSpace: 'nowrap',
display: 'flex',
alignItems: 'center',
}}
>
{rankerName.slice(0, 15)}
</Box>
</Tooltip>
{/* Row cells */}
{rayIds.map((rankedRayId, rankedIdx) => {
const position = rankerMap?.get(rankedRayId);
const isSelf = rankerRayId === rankedRayId;
const cellContent = isSelf ? '-' : (position !== undefined ? position.toString() : '?');
const bgColor = isSelf ? '#ddd' : getColor(position, rayIds.length);
return (
<Tooltip
key={rankedRayId}
title={
isSelf
? 'Self (not ranked)'
: `${rankerName} ranked ${rayModelNames.get(rankedRayId)} as #${position || '?'}`
}
>
<Box
sx={{
backgroundColor: bgColor,
color: isSelf ? '#888' : '#fff',
textAlign: 'center',
fontWeight: 'bold',
fontSize: '0.875rem',
display: 'flex',
alignItems: 'center',
justifyContent: 'center',
borderRadius: 'sm',
height: '40px',
cursor: 'pointer',
'&:hover': {
transform: 'scale(1.1)',
boxShadow: 'md',
zIndex: 10,
},
transition: 'transform 0.2s, box-shadow 0.2s',
}}
>
{cellContent}
</Box>
</Tooltip>
);
})}
</React.Fragment>
);
})}
{/* Average row */}
<Box sx={{ fontWeight: 'bold', fontSize: '0.75rem', borderTop: '2px solid', pt: 1, mt: 1 }}>
Avg Rank
</Box>
{avgRanks.map((avg, idx) => (
<Box
key={rayIds[idx]}
sx={{
textAlign: 'center',
fontWeight: 'bold',
fontSize: '0.875rem',
borderTop: '2px solid',
pt: 1,
mt: 1,
}}
>
{avg.toFixed(1)}
</Box>
))}
</Box>
{/* Legend */}
<Box sx={{ mt: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
<Typography level='body-xs' sx={{ color: 'text.tertiary' }}>
Color scale:
</Typography>
<Box
sx={{
width: '100px',
height: '12px',
background: 'linear-gradient(to right, hsl(120, 70%, 50%), hsl(60, 70%, 50%), hsl(30, 70%, 50%), hsl(0, 70%, 50%))',
borderRadius: 'sm',
}}
/>
<Typography level='body-xs' sx={{ color: 'text.tertiary' }}>
(1st Last)
</Typography>
</Box>
</Sheet>
);
}
@@ -0,0 +1,125 @@
/**
* Council Leaderboard - displays aggregate rankings (llm-council style)
*/
import * as React from 'react';
import { Box, Sheet, Typography, Chip, Tooltip } from '@mui/joy';
import type { CouncilAggregation } from './beam.gather.council.types';
interface CouncilLeaderboardProps {
aggregations: CouncilAggregation[];
showControversy?: boolean;
}
export function CouncilLeaderboard(props: CouncilLeaderboardProps) {
const { aggregations, showControversy = true } = props;
// Determine medal emoji
const getMedal = (index: number) => {
if (index === 0) return '🥇';
if (index === 1) return '🥈';
if (index === 2) return '🥉';
return `#${index + 1}`;
};
// Determine if controversial (high std dev)
const isControversial = (stdDev: number) => stdDev > 1.0;
return (
<Sheet
variant='soft'
sx={{
borderRadius: 'md',
p: 2,
}}
>
<Typography level='title-md' sx={{ mb: 2 }}>
🏆 Council Rankings
</Typography>
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1.5 }}>
{aggregations.map((agg, index) => {
const controversial = showControversy && isControversial(agg.standardDeviation);
return (
<Box
key={agg.rayId}
sx={{
display: 'flex',
alignItems: 'center',
gap: 2,
p: 1.5,
borderRadius: 'sm',
backgroundColor: index === 0 ? 'success.softBg' : 'background.level1',
border: '1px solid',
borderColor: controversial ? 'warning.outlinedBorder' : 'divider',
}}
>
{/* Rank */}
<Typography
level='h4'
sx={{
minWidth: '3rem',
textAlign: 'center',
fontSize: '1.5rem',
}}
>
{getMedal(index)}
</Typography>
{/* Model Name */}
<Box sx={{ flex: 1 }}>
<Typography level='title-sm' sx={{ fontWeight: 'bold' }}>
{agg.modelName}
</Typography>
<Typography level='body-xs' sx={{ color: 'text.secondary', mt: 0.5 }}>
Avg: {agg.averageRank.toFixed(2)} ({agg.voteCount} votes)
</Typography>
</Box>
{/* Controversy Indicator */}
{controversial && (
<Tooltip
title={`Controversial (σ=${agg.standardDeviation.toFixed(2)}). Rankings varied: ${agg.positions.join(', ')}`}
placement='left'
>
<Chip
size='sm'
color='warning'
variant='soft'
>
Controversial
</Chip>
</Tooltip>
)}
{/* Consensus Indicator */}
{showControversy && !controversial && agg.standardDeviation < 0.5 && (
<Tooltip
title={`Strong consensus (σ=${agg.standardDeviation.toFixed(2)}). Rankings: ${agg.positions.join(', ')}`}
placement='left'
>
<Chip
size='sm'
color='success'
variant='soft'
>
Consensus
</Chip>
</Tooltip>
)}
</Box>
);
})}
</Box>
{/* Legend */}
{showControversy && (
<Typography level='body-xs' sx={{ mt: 2, color: 'text.tertiary', fontStyle: 'italic' }}>
Lower average rank is better. Controversy indicates disagreement among rankers.
</Typography>
)}
</Sheet>
);
}
@@ -0,0 +1,136 @@
/**
* Council ranking parsing and aggregation logic
* Implements llm-council's ranking extraction and score calculation
*/
import type { CouncilRanking, CouncilAggregation } from './beam.gather.council.types';
/**
* Parse "FINAL RANKING:" section from evaluation text
* Matches llm-council's regex-based extraction
*/
export function parseCouncilRanking(evaluationText: string, responseLabels: string[]): Array<{ label: string; position: number }> {
const rankings: Array<{ label: string; position: number }> = [];
// Find the "FINAL RANKING:" section
const finalRankingMatch = evaluationText.match(/FINAL RANKING:\s*\n([\s\S]*?)(?:\n\n|$)/i);
if (!finalRankingMatch) {
console.warn('Could not find "FINAL RANKING:" section in evaluation');
return rankings;
}
const rankingSection = finalRankingMatch[1];
// Parse numbered list (e.g., "1. Response A", "2. Response B", etc.)
const lines = rankingSection.split('\n').filter(line => line.trim());
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
// Match pattern: "1. Response A" or "1) Response A" or "1 - Response A"
const match = line.match(/^(\d+)[.)\-\s]+(.+)$/);
if (!match) continue;
const position = parseInt(match[1], 10);
const labelText = match[2].trim();
// Find which response label this matches
const matchedLabel = responseLabels.find(label =>
labelText.toLowerCase().includes(label.toLowerCase())
);
if (matchedLabel) {
rankings.push({ label: matchedLabel, position });
}
}
return rankings;
}
/**
* Calculate standard deviation of rankings
*/
function calculateStandardDeviation(values: number[]): number {
if (values.length === 0) return 0;
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
const squaredDiffs = values.map(val => Math.pow(val - mean, 2));
const variance = squaredDiffs.reduce((sum, val) => sum + val, 0) / values.length;
return Math.sqrt(variance);
}
/**
* Aggregate rankings across all rankers
* Implements llm-council's averaging and "street cred" calculation
*/
export function aggregateCouncilRankings(
rankings: CouncilRanking[],
rayIds: string[],
rayModelNames: Map<string, string>,
rayResponsePreviews: Map<string, string>,
): CouncilAggregation[] {
const aggregations: CouncilAggregation[] = [];
for (const rayId of rayIds) {
const positions: number[] = [];
// Collect all positions this ray received from rankers
for (const ranking of rankings) {
const rankEntry = ranking.rankings.find(r => r.rayId === rayId);
if (rankEntry) {
positions.push(rankEntry.position);
}
}
// Calculate average rank
const averageRank = positions.length > 0
? positions.reduce((sum, pos) => sum + pos, 0) / positions.length
: 999; // No votes = worst possible
// Calculate standard deviation (controversy metric)
const standardDeviation = calculateStandardDeviation(positions);
aggregations.push({
rayId,
modelName: rayModelNames.get(rayId) || 'Unknown',
averageRank,
voteCount: positions.length,
standardDeviation,
positions,
responsePreview: rayResponsePreviews.get(rayId) || '',
});
}
// Sort by average rank (ascending - lower is better)
aggregations.sort((a, b) => a.averageRank - b.averageRank);
return aggregations;
}
/**
* Build ranking matrix: ranker -> ranked -> position
*/
export function buildRankingMatrix(rankings: CouncilRanking[]): Map<string, Map<string, number>> {
const matrix = new Map<string, Map<string, number>>();
for (const ranking of rankings) {
const rankerMap = new Map<string, number>();
for (const { rayId, position } of ranking.rankings) {
rankerMap.set(rayId, position);
}
matrix.set(ranking.rankerRayId, rankerMap);
}
return matrix;
}
/**
* Extract the "FINAL RANKING:" section as formatted text
*/
export function extractRankingSection(evaluationText: string): string {
const match = evaluationText.match(/FINAL RANKING:\s*\n([\s\S]*?)(?:\n\n|$)/i);
return match ? `FINAL RANKING:\n${match[1]}` : 'No ranking found';
}
@@ -0,0 +1,221 @@
/**
* Council voting execution logic
* Orchestrates ranking, aggregation, and chairman synthesis
*/
import { createDMessageTextContent, DMessage, messageFragmentsReduceText } from '~/common/stores/chat/chat.message';
import { aixChatGenerateContent_DMessage_FromConversation } from '~/modules/aix/client/aix.client';
import { getUXLabsHighPerformance } from '~/common/stores/store-ux-labs';
import type { CouncilRanking, CouncilResults, CouncilProgress } from './beam.gather.council.types';
import { createCouncilRankingPrompt, createCouncilChairmanPrompt, extractUserQuery } from './beam.gather.council.prompts';
import { parseCouncilRanking, aggregateCouncilRankings, buildRankingMatrix, extractRankingSection } from './beam.gather.council.aggregation';
interface RayData {
rayId: string;
llmId: string;
modelName: string;
message: DMessage;
}
/**
* Execute the full council voting process:
* 1. Each model ranks all responses
* 2. Aggregate rankings
* 3. Chairman synthesizes final answer
*/
export async function executeCouncilVoting(
chatHistory: readonly DMessage[],
rays: RayData[],
chairmanLlmId: string,
abortSignal: AbortSignal,
onProgress: (progress: CouncilProgress) => void,
): Promise<CouncilResults> {
const totalSteps = rays.length + 1; // N rankings + 1 synthesis
let currentStep = 0;
try {
// Step 1: Extract user query
const userQuery = extractUserQuery(
chatHistory.map(m => ({ role: m.role, text: messageFragmentsReduceText(m.fragments) }))
);
// Step 2: Prepare response labels and content for ranking
const responseLabels = rays.map((_, idx) => `Response ${String.fromCharCode(65 + idx)}`); // A, B, C, ...
const responsesForRanking = rays.map((ray, idx) => ({
label: responseLabels[idx],
content: messageFragmentsReduceText(ray.message.fragments),
}));
// Step 3: Each model ranks all responses
onProgress({
state: 'ranking',
currentStep: 0,
totalSteps,
message: 'Starting peer rankings...',
});
const rankings: CouncilRanking[] = [];
for (let i = 0; i < rays.length; i++) {
const ray = rays[i];
currentStep++;
onProgress({
state: 'ranking',
currentStep,
totalSteps,
message: `${ray.modelName} evaluating responses...`,
});
// Build ranking prompt
const rankingPrompt = createCouncilRankingPrompt(userQuery, responsesForRanking);
// Create conversation for ranking
const systemMessage = createDMessageTextContent('system', 'You are an expert evaluator analyzing AI responses.');
const userMessage = createDMessageTextContent('user', rankingPrompt);
// Execute ranking via AIX
const rankingMessage = createDMessageTextContent('assistant', '');
let evaluationText = '';
const result = await aixChatGenerateContent_DMessage_FromConversation(
ray.llmId, // Use the ray's own model to rank
systemMessage,
[userMessage],
'beam-council-ranking',
ray.rayId,
{ abortSignal, throttleParallelThreads: getUXLabsHighPerformance() ? 0 : 1 },
(update, completed) => {
if (update.fragments) {
evaluationText = messageFragmentsReduceText(update.fragments);
}
},
);
if (result.outcome === 'aborted') {
throw new Error('Ranking aborted');
}
if (result.outcome === 'errored') {
throw new Error(`Ranking failed: ${result.errorMessage || 'Unknown error'}`);
}
evaluationText = messageFragmentsReduceText(result.lastDMessage.fragments);
// Parse rankings from the evaluation
const parsedRankings = parseCouncilRanking(evaluationText, responseLabels);
// Map response labels back to ray IDs
const rankingsWithIds = parsedRankings.map(({ label, position }) => {
const rayIndex = responseLabels.indexOf(label);
return {
rayId: rays[rayIndex].rayId,
position,
};
});
rankings.push({
rankerRayId: ray.rayId,
rankerModelName: ray.modelName,
rankings: rankingsWithIds,
evaluationText,
extractedRanking: extractRankingSection(evaluationText),
});
}
// Step 4: Aggregate rankings
currentStep++;
onProgress({
state: 'aggregating',
currentStep,
totalSteps,
message: 'Calculating aggregate rankings...',
});
const rayModelNames = new Map(rays.map(r => [r.rayId, r.modelName]));
const rayResponsePreviews = new Map(
rays.map(r => [r.rayId, messageFragmentsReduceText(r.message.fragments).slice(0, 100)])
);
const aggregations = aggregateCouncilRankings(
rankings,
rays.map(r => r.rayId),
rayModelNames,
rayResponsePreviews,
);
const rankingMatrix = buildRankingMatrix(rankings);
// Step 5: Chairman synthesis
currentStep++;
onProgress({
state: 'synthesizing',
currentStep,
totalSteps,
message: 'Chairman synthesizing final answer...',
});
const responsesForChairman = rays.map(ray => ({
rayId: ray.rayId,
modelName: ray.modelName,
content: messageFragmentsReduceText(ray.message.fragments),
}));
const rankingsForChairman = rankings.map(r => ({
rankerName: r.rankerModelName,
evaluationText: r.evaluationText,
extractedRanking: r.extractedRanking,
}));
const chairmanPrompt = createCouncilChairmanPrompt(userQuery, responsesForChairman, rankingsForChairman);
const systemMessage = createDMessageTextContent('system', 'You are the Chairman of an LLM Council, tasked with synthesizing peer-ranked responses.');
const userMessage = createDMessageTextContent('user', chairmanPrompt);
const chairmanResult = await aixChatGenerateContent_DMessage_FromConversation(
chairmanLlmId,
systemMessage,
[userMessage],
'beam-council-chairman',
'chairman',
{ abortSignal, throttleParallelThreads: getUXLabsHighPerformance() ? 0 : 1 },
() => {
// Progress updates handled via onProgress callback
},
);
if (chairmanResult.outcome === 'aborted') {
throw new Error('Chairman synthesis aborted');
}
if (chairmanResult.outcome === 'errored') {
throw new Error(`Chairman synthesis failed: ${chairmanResult.errorMessage || 'Unknown error'}`);
}
const finalChairmanMessage = chairmanResult.lastDMessage;
// Step 6: Complete
onProgress({
state: 'complete',
currentStep: totalSteps,
totalSteps,
message: 'Council voting complete',
});
return {
rankings,
aggregations,
chairmanSynthesis: finalChairmanMessage,
rankingMatrix,
};
} catch (error) {
onProgress({
state: 'error',
currentStep,
totalSteps,
message: 'Council voting failed',
error: error instanceof Error ? error.message : 'Unknown error',
});
throw error;
}
}
@@ -0,0 +1,94 @@
/**
* Council voting prompts - 1:1 match with llm-council
* Source: https://github.com/karpathy/llm-council
*/
/**
* Ranking prompt - used by each model to rank all responses
* Exact match with llm-council's peer review prompt
*/
export function createCouncilRankingPrompt(userQuery: string, responses: Array<{ label: string; content: string }>): string {
const responsesText = responses
.map(({ label, content }) => `${label}:\n${content}`)
.join('\n\n');
return `You are evaluating different responses to the following question:
Question: ${userQuery}
Here are the responses from different models (anonymized):
${responsesText}
Your task:
1. First, evaluate each response individually. For each response, explain what it does well and what it does poorly.
2. Then, at the very end of your response, provide a final ranking.
IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows:
- Start with the line "FINAL RANKING:" (all caps, with colon)
- Then list the responses from best to worst as a numbered list
- Each line should be: number, period, space, then ONLY the response label (e.g., "1. Response A")
- Do not add any other text or explanations in the ranking section
Example format:
FINAL RANKING:
1. Response B
2. Response A
3. Response D
4. Response C
Now provide your evaluation and ranking:`;
}
/**
* Chairman synthesis prompt - combines all responses and rankings
* Exact match with llm-council's chairman prompt
*/
export function createCouncilChairmanPrompt(
userQuery: string,
responses: Array<{ rayId: string; modelName: string; content: string }>,
rankings: Array<{ rankerName: string; evaluationText: string; extractedRanking: string }>,
): string {
// Stage 1: Individual responses with model names
const stage1Text = responses
.map(({ modelName, content }) => `**${modelName}:**\n${content}`)
.join('\n\n---\n\n');
// Stage 2: Peer rankings with full evaluations
const stage2Text = rankings
.map(({ rankerName, evaluationText, extractedRanking }) =>
`**${rankerName}'s Evaluation:**\n\n${evaluationText}\n\n${extractedRanking}`)
.join('\n\n---\n\n');
return `You are the Chairman of an LLM Council. Multiple AI models have provided responses to a user's question, and then ranked each other's responses.
Original Question: ${userQuery}
STAGE 1 - Individual Responses:
${stage1Text}
STAGE 2 - Peer Rankings:
${stage2Text}
Your task as Chairman is to synthesize all of this information into a single, comprehensive, accurate answer to the user's original question. Consider:
- The individual responses and their insights
- The peer rankings and what they reveal about response quality
- Any patterns of agreement or disagreement
Provide a clear, well-reasoned final answer that represents the council's collective wisdom:`;
}
/**
* Extract user query from chat history
*/
export function extractUserQuery(chatMessages: readonly { role: string; text: string }[]): string {
// Find the last user message
for (let i = chatMessages.length - 1; i >= 0; i--) {
if (chatMessages[i].role === 'user') {
return chatMessages[i].text;
}
}
return 'No user query found';
}
@@ -0,0 +1,59 @@
/**
* Council voting types for Beam Gather
* Implements llm-council's peer ranking mechanism
*/
import type { DMessage } from '~/common/stores/chat/chat.message';
/**
* Individual ranking from one model (ranker) evaluating all responses
*/
export interface CouncilRanking {
rankerRayId: string;
rankerModelName: string;
rankings: Array<{
rayId: string;
position: number; // 1 = best, N = worst
}>;
evaluationText: string; // Full evaluation with reasoning
extractedRanking: string; // Parsed "FINAL RANKING:" section
}
/**
* Aggregated ranking results for one response
*/
export interface CouncilAggregation {
rayId: string;
modelName: string;
averageRank: number; // Lower is better (1.0 = best possible)
voteCount: number;
standardDeviation: number; // Higher = more controversial
positions: number[]; // All rank positions received
responsePreview: string; // First ~100 chars of response
}
/**
* Complete council voting results
*/
export interface CouncilResults {
rankings: CouncilRanking[];
aggregations: CouncilAggregation[];
chairmanSynthesis?: Partial<DMessage> & { fragments: DMessage['fragments'] };
rankingMatrix: Map<string, Map<string, number>>; // ranker -> ranked -> position
}
/**
* Council voting state
*/
export type CouncilState = 'idle' | 'ranking' | 'aggregating' | 'synthesizing' | 'complete' | 'error';
/**
* Council voting progress
*/
export interface CouncilProgress {
state: CouncilState;
currentStep: number;
totalSteps: number;
message: string;
error?: string;
}
+47
View File
@@ -5,12 +5,14 @@ import { Box, Button } from '@mui/joy';
import AddCircleOutlineRoundedIcon from '@mui/icons-material/AddCircleOutlineRounded';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
import TelegramIcon from '@mui/icons-material/Telegram';
import HowToVoteIcon from '@mui/icons-material/HowToVote';
import type { BeamStoreApi } from '../store-beam.hooks';
import { BeamCard } from '../BeamCard';
import { SCATTER_RAY_MAX, SCATTER_RAY_MIN } from '../beam.config';
import { BeamRay } from './BeamRay';
import { BeamCouncilView } from '../gather/council/BeamCouncilView';
const rayGridDesktopSx: SxProps = {
@@ -40,6 +42,24 @@ export function BeamRayGrid(props: {
const raysCount = props.rayIds.length;
// Council voting state
const [isCouncilActive, setIsCouncilActive] = React.useState(false);
// Check if council voting is available (need at least 2 completed rays)
const rays = props.beamStore.getState().rays;
const completedRays = rays.filter(r => r.status === 'success');
const canRunCouncil = completedRays.length >= 2;
const handleCouncilStart = () => {
setIsCouncilActive(true);
props.beamStore.getState().setCouncilActive(true);
};
const handleCouncilClose = () => {
setIsCouncilActive(false);
props.beamStore.getState().setCouncilActive(false);
};
return (
<Box sx={props.isMobile ? rayGridMobileSx : rayGridDesktopSx}>
@@ -90,6 +110,33 @@ export function BeamRayGrid(props: {
</Box>
)}
{/* Council Voting Button */}
{canRunCouncil && !isCouncilActive && (
<Box sx={{ gridColumn: '1 / -1', display: 'flex', justifyContent: 'center', mt: 2 }}>
<Button
fullWidth
variant='outlined'
color='primary'
onClick={handleCouncilStart}
startDecorator={<HowToVoteIcon />}
sx={{
backgroundColor: 'background.surface',
'&:hover': { backgroundColor: 'background.popup' },
}}
>
🗳 Run Council Vote
</Button>
</Box>
)}
{/* Council View */}
{isCouncilActive && (
<BeamCouncilView
beamStore={props.beamStore}
onClose={handleCouncilClose}
/>
)}
{/*/!* Takes a full row *!/*/}
{/*<Divider sx={{*/}
{/* gridColumn: '1 / -1',*/}
+12
View File
@@ -163,6 +163,9 @@ interface ScatterStateSlice {
isScattering: boolean; // true if any ray is scattering at the moment
raysReady: number; // 0, or number of the rays that are ready
// council voting
isCouncilActive: boolean;
}
export const reInitScatterStateSlice = (prevRays: BRay[]): ScatterStateSlice => {
@@ -176,6 +179,7 @@ export const reInitScatterStateSlice = (prevRays: BRay[]): ScatterStateSlice =>
isScattering: false,
raysReady: 0,
isCouncilActive: false,
};
};
@@ -192,6 +196,9 @@ export interface ScatterStoreSlice extends ScatterStateSlice {
raySetLlmId: (rayId: BRayId, llmId: DLLMId | null) => void;
_rayUpdate: (rayId: BRayId, update: Partial<BRay> | ((ray: BRay) => Partial<BRay>)) => void;
// council actions
setCouncilActive: (active: boolean) => void;
_storeLastScatterConfig: () => void;
_syncRaysStateToScatter: () => void;
@@ -353,6 +360,11 @@ export const createScatterSlice: StateCreator<RootStoreSlice & ScatterStoreSlice
),
})),
setCouncilActive: (active: boolean) =>
_set({
isCouncilActive: active,
}),
_storeLastScatterConfig: () => {
updateBeamLastConfig({
rayLlmIds: _get().rays.map(ray => ray.rayLlmId).filter(Boolean) as DLLMId[],