Compare commits

...

42 Commits

Author SHA1 Message Date
Enrico Ros b35901d94c 2.0.1 Roll AIX 2025-11-24 15:06:39 -08:00
Enrico Ros c0df1a23f4 2.0.1 Update news 2025-11-24 15:05:40 -08:00
Enrico Ros 495619af2c LLMs: Interfaces fix 2025-11-24 15:00:09 -08:00
Enrico Ros 72dfadf106 AIX: Anthropic: auto-header for programmatic tool calling (calling from code, not just llm) 2025-11-24 14:58:34 -08:00
Enrico Ros 5825909e45 AIX: Anthropic: programmatic tool calling support 2025-11-24 14:42:20 -08:00
Enrico Ros d3f6d87ee0 AIX: remove legacy unconstrained 'json mode' 2025-11-24 14:29:36 -08:00
Enrico Ros c4f4c5ddad AIX: cross-vendor json output | strict tool invocation 2025-11-24 14:23:25 -08:00
Enrico Ros 2921d7ca27 Anthropic: Structured Outputs | Strict Tools 2025-11-24 13:59:20 -08:00
Enrico Ros 2021cbc988 Anthropic: MessageDeltaUsage 2025-11-24 13:58:57 -08:00
Enrico Ros e9e29861b2 Anthropic: cleanup models 2025-11-24 13:21:25 -08:00
Enrico Ros 8e6da36059 LLM interface types - relax for bw comp 2025-11-24 13:21:13 -08:00
Enrico Ros 5e1469e12e Anthropic: Tool Search Tool 2025-11-24 13:20:58 -08:00
Enrico Ros bd7465f8b1 Roll packages 2025-11-24 12:34:52 -08:00
Enrico Ros 570397a616 Anthropic: add Effort parameter 2025-11-24 12:34:39 -08:00
Enrico Ros b3b5f1daef Anthropic: raise default thinking to 16384 2025-11-24 12:13:50 -08:00
Enrico Ros 25ec3ae47c Anthropic: add Opus 4.5 2025-11-24 12:09:41 -08:00
Enrico Ros 5ba5e3da58 2.0.1 Roll AIX, news 2025-11-24 04:11:39 -08:00
Enrico Ros 9296c14ca0 2.0.1 News 2025-11-24 04:11:39 -08:00
Enrico Ros 310b5d3422 2.0.1 Package 2025-11-24 03:57:17 -08:00
Enrico Ros 1c5967112e Rolled posthog as there's still no local min 2025-11-24 03:19:08 -08:00
Enrico Ros 49a3d8ee71 Roll deep 2025-11-24 03:14:58 -08:00
Enrico Ros cf8b61e8d9 Packages: locked dexie 2025-11-24 03:11:26 -08:00
Enrico Ros 967ae5723e Roll posthog - next.config.ts fix 2025-11-24 02:39:28 -08:00
Enrico Ros 03421acf2f Roll posthog - security fix (details below)
https://helixguard.ai/blog/malicious-sha1hulud-2025-11-24
2025-11-24 02:39:15 -08:00
Enrico Ros d43896cc5a Package: cmd to fix sharp on win32/x64 2025-11-24 02:33:50 -08:00
Enrico Ros b283124a2f Roll packages 2025-11-24 02:05:37 -08:00
Enrico Ros 8c39be01f8 Roll packages 2025-11-24 02:04:23 -08:00
Enrico Ros fb2bd4ccd8 Error Hints: nit 2025-11-23 23:34:36 -08:00
Enrico Ros 5b826ffc45 Error Hints: control AI Service advanced setup 2025-11-23 23:26:56 -08:00
Enrico Ros 0b2ab365d3 Error Hints: Render Reconnect 2025-11-23 23:26:56 -08:00
Enrico Ros 93fc54992c Error Hints: AIX Client and Reassembler -> Fragment 2025-11-23 23:26:56 -08:00
Enrico Ros 60b7326deb Error Hints: Fragments 2025-11-23 23:26:56 -08:00
Enrico Ros d6e6139244 AIX: Gemini: change log 2025-11-23 23:26:56 -08:00
Enrico Ros 0892911ddc Next config: conditional strict mode 2025-11-23 23:26:56 -08:00
Enrico Ros 30267ac50c LLMs: Nano Banana message names 2025-11-23 23:16:43 -08:00
Enrico Ros ffef0ef31d PWA detect. Fixes #887 2025-11-23 23:15:56 -08:00
Enrico Ros fc047087ce CSF: direct connect actions hook 2025-11-23 23:15:03 -08:00
Enrico Ros 81d4966535 CSF: improve vendors 2025-11-23 20:02:06 -08:00
Enrico Ros 004d63fda1 FormRadioControl: gap 1 2025-11-23 16:23:35 -08:00
Enrico Ros 23e2dbb354 tRPC fetchers: increase error message to 240 2025-11-23 16:19:25 -08:00
Enrico Ros 28e9899b97 Settings: looks 2025-11-23 16:19:22 -08:00
Enrico Ros 7441d41550 FormRadioControl: descriptions 2025-11-23 16:19:11 -08:00
51 changed files with 2016 additions and 664 deletions
+2 -2
View File
@@ -30,7 +30,7 @@ buildType && console.log(` 🧠 big-AGI: building for ${buildType}...\n`);
/** @type {import('next').NextConfig} */
let nextConfig: NextConfig = {
reactStrictMode: true,
reactStrictMode: !process.env.NO_STRICT_MODE, // default: enabled
// [exports] https://nextjs.org/docs/advanced-features/static-html-export
...(buildType && {
@@ -141,7 +141,7 @@ if (process.env.POSTHOG_API_KEY && process.env.POSTHOG_ENV_ID) {
personalApiKey: process.env.POSTHOG_API_KEY,
envId: process.env.POSTHOG_ENV_ID,
host: 'https://us.i.posthog.com', // backtrace upload host
verbose: false,
logLevel: 'info',
sourcemaps: {
enabled: process.env.NODE_ENV === 'production',
project: 'big-agi',
+1189 -483
View File
File diff suppressed because it is too large Load Diff
+13 -12
View File
@@ -1,6 +1,6 @@
{
"name": "big-agi",
"version": "2.0.0",
"version": "2.0.1",
"private": true,
"author": "Enrico Ros <enrico.ros@gmail.com>",
"repository": "https://github.com/enricoros/big-agi",
@@ -14,7 +14,8 @@
"postinstall": "prisma generate --no-hints",
"db:push": "prisma db push",
"db:studio": "prisma studio",
"vercel:env:pull": "npx vercel env pull .env.development.local"
"vercel:env:pull": "npx vercel env pull .env.development.local",
"sharp:win32_x64": "npm install --os=win32 --cpu=x64 sharp"
},
"prisma": {
"schema": "src/server/prisma/schema.prisma"
@@ -32,7 +33,7 @@
"@mui/joy": "^5.0.0-beta.52",
"@next/bundle-analyzer": "~15.1.8",
"@prisma/client": "~5.22.0",
"@tanstack/react-query": "5.90.3",
"@tanstack/react-query": "5.90.10",
"@tanstack/react-virtual": "^3.13.12",
"@trpc/client": "11.5.1",
"@trpc/next": "11.5.1",
@@ -43,8 +44,8 @@
"browser-fs-access": "^0.38.0",
"cheerio": "^1.1.2",
"csv-stringify": "^6.6.0",
"dexie": "^4.0.11",
"dexie-react-hooks": "^1.1.7",
"dexie": "~4.0.11",
"dexie-react-hooks": "~1.1.7",
"diff": "^8.0.2",
"eventemitter3": "^5.0.1",
"idb-keyval": "^6.2.2",
@@ -53,10 +54,10 @@
"next": "~15.1.8",
"nprogress": "^0.2.0",
"pdfjs-dist": "5.4.54",
"posthog-js": "^1.297.0",
"posthog-node": "^5.13.0",
"posthog-js": "^1.298.0",
"posthog-node": "^5.14.0",
"prismjs": "^1.30.0",
"puppeteer-core": "^24.30.0",
"puppeteer-core": "^24.31.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-hook-form": "^7.66.1",
@@ -68,20 +69,20 @@
"remark-gfm": "^4.0.1",
"remark-mark-highlight": "^0.1.1",
"remark-math": "^6.0.0",
"sharp": "^0.33.5",
"sharp": "^0.34.5",
"superjson": "^2.2.5",
"tesseract.js": "^6.0.1",
"tiktoken": "^1.0.22",
"turndown": "^7.2.2",
"zod": "^4.1.12",
"zod": "^4.1.13",
"zustand": "5.0.7"
},
"devDependencies": {
"@posthog/nextjs-config": "1.3.2",
"@posthog/nextjs-config": "^1.6.0",
"@types/node": "^24.10.1",
"@types/nprogress": "^0.2.3",
"@types/prismjs": "^1.26.5",
"@types/react": "^19.2.6",
"@types/react": "^19.2.7",
"@types/react-csv": "^1.1.10",
"@types/react-dom": "^19.2.3",
"@types/turndown": "^5.0.6",
@@ -17,7 +17,6 @@ import { createDMessageFromFragments, createDMessageTextContent, DMessage, DMess
import { createTextContentFragment, DMessageFragment, DMessageFragmentId } from '~/common/stores/chat/chat.fragments';
import { openFileForAttaching } from '~/common/components/ButtonAttachFiles';
import { optimaOpenPreferences } from '~/common/layout/optima/useOptima';
import { useBrowserTranslationWarning } from '~/common/components/useIsBrowserTranslating';
import { useCapabilityElevenLabs } from '~/common/components/useCapabilities';
import { useChatOverlayStore } from '~/common/chat-overlay/store-perchat_vanilla';
import { useChatStore } from '~/common/stores/chat/store-chats';
@@ -65,7 +64,6 @@ export function ChatMessageList(props: {
const { notifyBooting } = useScrollToBottom();
const danger_experimentalHtmlWebUi = useChatAutoSuggestHTMLUI();
const [showSystemMessages] = useChatShowSystemMessages();
const optionalTranslationWarning = useBrowserTranslationWarning();
const { conversationMessages, historyTokenCount } = useChatStore(useShallow(({ conversations }) => {
const conversation = conversations.find(conversation => conversation.id === props.conversationId);
return {
@@ -326,8 +324,6 @@ export function ChatMessageList(props: {
return (
<List role='chat-messages-list' sx={listSx}>
{optionalTranslationWarning}
{props.isMessageSelectionMode && (
<MessagesSelectionHeader
hasSelected={selectedMessages.size > 0}
@@ -797,6 +797,7 @@ export function ChatMessage(props: {
fitScreen={props.fitScreen}
isMobile={props.isMobile}
messageRole={messageRole}
messageGeneratorLlmId={messageGenerator?.mgt === 'aix' ? messageGenerator.aix?.mId : undefined}
messagePendingIncomplete={messagePendingIncomplete}
optiAllowSubBlocksMemo={!!messagePendingIncomplete}
disableMarkdownText={disableMarkdown || fromUser /* User messages are edited as text. Try to have them in plain text. NOTE: This may bite. */}
@@ -3,15 +3,30 @@ import * as React from 'react';
import { ScaledTextBlockRenderer } from '~/modules/blocks/ScaledTextBlockRenderer';
import type { ContentScaling } from '~/common/app.theme';
import type { DMessageErrorPart } from '~/common/stores/chat/chat.fragments';
import type { DMessageRole } from '~/common/stores/chat/chat.message';
import { BlockPartError_RequestExceeded } from './BlockPartError_RequestExceeded';
export function BlockPartError(props: {
errorText: string,
errorHint?: DMessageErrorPart['hint'],
messageRole: DMessageRole,
messageGeneratorLlmId?: string | null,
contentScaling: ContentScaling,
}) {
// special error presentation, based on hints
switch (props.errorHint) {
case 'aix-request-exceeded':
return <BlockPartError_RequestExceeded messageGeneratorLlmId={props.messageGeneratorLlmId} contentScaling={props.contentScaling} />;
default:
// continue rendering generic error
break;
}
// Check if the errorText starts with '**' and has a closing '**' following Markdown rules
let textToRender = props.errorText;
let renderAsMarkdown = false;
@@ -0,0 +1,108 @@
import * as React from 'react';
import { Alert, Box, FormHelperText, Switch, Typography } from '@mui/joy';
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
import type { ContentScaling } from '~/common/app.theme';
import { useLLM } from '~/common/stores/llms/llms.hooks';
import { useModelServiceClientSideFetch } from '~/common/stores/llms/hooks/useModelServiceClientSideFetch';
/**
* Error recovery component for "Request too large" errors.
*/
export function BlockPartError_RequestExceeded(props: {
messageGeneratorLlmId?: string | null;
contentScaling: ContentScaling;
onRegenerate?: () => void;
}) {
// external state
const model = useLLM(props.messageGeneratorLlmId) ?? null;
const { csfAvailable, csfActive, csfToggle } = useModelServiceClientSideFetch(true, model);
return (
<Alert
size={props.contentScaling === 'xs' ? 'sm' : 'md'}
color='danger'
sx={{ display: 'flex', alignItems: 'flex-start', gap: 1 }}
>
<WarningRoundedIcon sx={{ flexShrink: 0, mt: 0.25 }} />
<Box sx={{ flex: 1, display: 'flex', flexDirection: 'column', gap: 0.5 }}>
<Box fontSize='larger'>
Request Too Large
</Box>
<div>
Your message or attachments exceed the limit of the Vercel edge network.
</div>
{/* Recovery options */}
{csfAvailable ? (
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
{/* Explanation */}
<Box color='text.secondary' fontSize='sm'>
<strong>Experimental:</strong> enable direct connection to the AI services, and try again.
</Box>
{/* Toggle */}
<Box
sx={{
display: 'flex',
alignItems: 'center',
gap: 2,
p: 2,
borderRadius: 'sm',
bgcolor: 'background.popup',
boxShadow: 'md',
}}
>
<Box sx={{ flex: 1 }}>
<Box color={!csfActive ? undefined : 'primary.solidBg'} fontWeight='lg' mb={0.5}>
Direct Connection {csfActive && '- Now Try Again'}
</Box>
<FormHelperText>
Bypassing servers and connect directly from this client -&gt; AI provider
</FormHelperText>
</Box>
<Switch
checked={csfActive}
onChange={(e) => csfToggle(e.target.checked)}
/>
</Box>
{/* Regenerate button */}
{/*{props.onRegenerate && (*/}
{/* <Button*/}
{/* size='sm'*/}
{/* variant={csfActive ? 'solid' : 'outlined'}*/}
{/* color={csfActive ? 'success' : 'neutral'}*/}
{/* startDecorator={<RefreshIcon />}*/}
{/* onClick={props.onRegenerate}*/}
{/* sx={{ alignSelf: 'flex-start' }}*/}
{/* >*/}
{/* {csfActive ? 'Regenerate with Direct Connection' : 'Regenerate'}*/}
{/* </Button>*/}
{/*)}*/}
</Box>
) : (
<Box>
<Typography level='body-sm' sx={{ mb: 1 }}>
<strong>Suggestions:</strong>
</Typography>
<Typography component='ul' level='body-sm' sx={{ pl: 2, m: 0 }}>
<li>Use the cleanup button in the right pane to hide old messages</li>
<li>Remove large attachments from the conversation</li>
{/*<li>Reduce conversation length before sending</li>*/}
</Typography>
</Box>
)}
</Box>
</Alert>
);
}
@@ -56,6 +56,7 @@ export function ContentFragments(props: {
isMobile: boolean,
messageRole: DMessageRole,
messagePendingIncomplete?: boolean,
messageGeneratorLlmId?: string | null,
optiAllowSubBlocksMemo?: boolean,
disableMarkdownText: boolean,
enhanceCodeBlocks: boolean,
@@ -172,7 +173,7 @@ export function ContentFragments(props: {
default:
const _exhaustiveVoidCheck: never = part;
// fallthrough - we don't handle these here anymore
// fallthrough - we don't handle these here anymore
case 'annotations':
return (
<ScaledTextBlockRenderer
@@ -243,7 +244,9 @@ export function ContentFragments(props: {
<BlockPartError
key={fId}
errorText={part.error}
errorHint={part.hint}
messageRole={props.messageRole}
messageGeneratorLlmId={props.messageGeneratorLlmId}
contentScaling={props.contentScaling}
/>
);
+1 -1
View File
@@ -283,7 +283,7 @@ export function AppNews() {
</Box>
)}
{idx === 0 && <Divider sx={{ my: 6, mx: 6 }}/>}
{idx === 1 && <Divider sx={{ my: 6, mx: 6 }}/>}
</React.Fragment>;
})}
+13
View File
@@ -71,6 +71,19 @@ export const DevNewsItem: NewsItem = {
// news and feature surfaces
export const NewsItems: NewsItem[] = [
{
versionCode: '2.0.1',
versionName: 'Heavy Critters',
versionDate: new Date('2025-11-24T23:30:00Z'),
items: [
{ text: <>New: <B>Opus 4.5</B>, <B>Gemini 3 Pro</B>, <B>Nano Banana Pro</B>, <B>Grok 4.1</B>, <B>GPT-5.1</B>, <B>Kimi K2</B></> },
{ text: <><B>Image Generation</B> with Azure and LocalAI providers, in addition to OpenAI</> },
{ text: <>Enhanced <B>OpenRouter</B> integration with auto-capabilities and reasoning</> },
{ text: <>Call transcripts, generate persona images, search button in beams</> },
{ text: <>Starred models, errors resilience, 278 fixes</> },
{ text: <ExternalLink href='https://github.com/enricoros/big-agi/issues/new?template=ai-triage.yml'>AI-Automatic feature development</ExternalLink> },
],
},
{
versionCode: '2.0.0',
versionName: 'Open',
+12 -12
View File
@@ -44,7 +44,11 @@ const Topics = styled(AccordionGroup)({
// larger summary, with a spinning icon
[`& .${accordionSummaryClasses.button}`]: {
minHeight: 64,
minHeight: '52px',
border: 'none',
paddingRight: '0.75rem',
backgroundColor: 'rgba(var(--joy-palette-primary-lightChannel) / 0.2)',
gap: '1rem',
},
[`& .${accordionSummaryClasses.indicator}`]: {
transition: '0.2s',
@@ -52,11 +56,6 @@ const Topics = styled(AccordionGroup)({
[`& [aria-expanded="true"] .${accordionSummaryClasses.indicator}`]: {
transform: 'rotate(45deg)',
},
// larger padded block
[`& .${accordionDetailsClasses.content}.${accordionDetailsClasses.expanded}`]: {
paddingBlock: '1rem',
},
});
function Topic(props: { title?: React.ReactNode, icon?: string | React.ReactNode, startCollapsed?: boolean, children?: React.ReactNode }) {
@@ -92,9 +91,9 @@ function Topic(props: { title?: React.ReactNode, icon?: string | React.ReactNode
>
{!!props.icon && (
<Avatar
size='sm'
color={COLOR_TOPIC_ICON}
variant={expanded ? 'plain' /* was: soft */ : 'plain'}
// size='sm'
>
{props.icon}
</Avatar>
@@ -109,7 +108,7 @@ function Topic(props: { title?: React.ReactNode, icon?: string | React.ReactNode
slotProps={{
content: {
sx: {
px: { xs: 1.5, md: 2 },
p: { xs: 1.5, md: 2.5 },
},
},
}}
@@ -153,6 +152,7 @@ const _styles = {
tabsListTab: {
// borderRadius: '2rem',
borderRadius: 'sm',
fontSize: 'sm',
flex: 1,
p: 0,
'&[aria-selected="true"]': {
@@ -251,7 +251,7 @@ export function SettingsModal(props: {
<Tab value='tools' disableIndicator sx={_styles.tabsListTab}>Tools</Tab>
</TabList>
<TabPanel value='chat' variant='outlined' sx={_styles.tabPanel}>
<TabPanel value='chat' color='primary' variant='outlined' sx={_styles.tabPanel}>
<Topics>
<Topic>
<AppChatSettingsUI />
@@ -268,7 +268,7 @@ export function SettingsModal(props: {
</Topics>
</TabPanel>
<TabPanel value='voice' variant='outlined' sx={_styles.tabPanel}>
<TabPanel value='voice' color='primary' variant='outlined' sx={_styles.tabPanel}>
<Topics>
<Topic icon={/*'🎙️'*/ <MicIcon />} title='Microphone'>
<VoiceSettings />
@@ -279,7 +279,7 @@ export function SettingsModal(props: {
</Topics>
</TabPanel>
<TabPanel value='draw' variant='outlined' sx={_styles.tabPanel}>
<TabPanel value='draw' color='primary' variant='outlined' sx={_styles.tabPanel}>
<Topics>
<Topic>
<T2ISettings />
@@ -290,7 +290,7 @@ export function SettingsModal(props: {
</Topics>
</TabPanel>
<TabPanel value='tools' variant='outlined' sx={_styles.tabPanel}>
<TabPanel value='tools' color='primary' variant='outlined' sx={_styles.tabPanel}>
<Topics>
<Topic icon={<LanguageRoundedIcon />} title='Load Web Pages (with images)' startCollapsed>
<BrowseSettings />
+2 -2
View File
@@ -23,8 +23,8 @@ export const Release = {
// this is here to trigger revalidation of data, e.g. models refresh
Monotonics: {
Aix: 40,
NewsVersion: 200,
Aix: 42,
NewsVersion: 201,
},
// Frontend: pretty features
@@ -10,6 +10,7 @@ import { FormLabelStart } from './FormLabelStart';
export type FormRadioOption<T extends string> = {
value: T,
label: string | React.JSX.Element,
description?: string,
disabled?: boolean
};
@@ -23,18 +24,24 @@ export const FormRadioControl = <TValue extends string>(props: {
options: Immutable<FormRadioOption<TValue>[]>;
value?: TValue;
onChange: (value: TValue) => void;
}) =>
<FormControl size={props.size} orientation='horizontal' disabled={props.disabled} sx={{ justifyContent: 'space-between', alignItems: 'center' }}>
{(!!props.title || !!props.description) && <FormLabelStart title={props.title} description={props.description} tooltip={props.tooltip} />}
<RadioGroup
size={props.size}
orientation='horizontal'
value={props.value}
onChange={(event: React.ChangeEvent<HTMLInputElement>) => event.target.value && props.onChange(event.target.value as TValue)}
sx={{ flexWrap: 'wrap' }}
>
{props.options.map((option) =>
<Radio key={'opt-' + option.value} value={option.value} label={option.label} disabled={option.disabled || props.disabled} />,
)}
</RadioGroup>
</FormControl>;
}) => {
const selectedOption = props.options.find(option => option.value === props.value);
const description = selectedOption?.description ?? props.description;
return (
<FormControl size={props.size} orientation='horizontal' disabled={props.disabled} sx={{ justifyContent: 'space-between', alignItems: 'center' }}>
{(!!props.title || !!description) && <FormLabelStart title={props.title} description={description} tooltip={props.tooltip} />}
<RadioGroup
size={props.size}
orientation='horizontal'
value={props.value}
onChange={(event: React.ChangeEvent<HTMLInputElement>) => event.target.value && props.onChange(event.target.value as TValue)}
sx={{ flexWrap: 'wrap', gap: 1 }}
>
{props.options.map((option) =>
<Radio key={'opt-' + option.value} value={option.value} label={option.label} disabled={option.disabled || props.disabled} />,
)}
</RadioGroup>
</FormControl>
);
};
@@ -0,0 +1,66 @@
import * as React from 'react';
import { Alert, IconButton } from '@mui/joy';
import CloseRoundedIcon from '@mui/icons-material/CloseRounded';
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
import { Is, isBrowser, isPwa } from '~/common/util/pwaUtils';
import { useUICounter } from '~/common/stores/store-ui';
/**
* Detects if a mobile PWA is running in Desktop Mode (which causes layout issues).
* This happens when Chrome's "Request Desktop Site" is enabled on mobile devices.
*
* Shows a dismissible warning when:
* - App is running as a PWA (standalone mode)
* - Device OS is mobile (iOS or Android)
* - Viewport width is >= 900px (indicating desktop mode override)
*/
export function usePWADesktopModeWarning() {
// state
const [hideWarning, setHideWarning] = React.useState(false);
// external state
const { novel: lessThanFive, touch } = useUICounter('acknowledge-pwa-desktop-mode-warning', 5);
// detect PWA in desktop mode
const isInDesktopMode = React.useMemo(() => {
if (!isBrowser) return false;
// if PWA
const isInPwaMode = isPwa();
if (!isInPwaMode) return false;
// if OS is mobile
const isMobileOS = Is.OS.iOS || Is.OS.Android;
if (!isMobileOS) return false;
// Check if viewport width suggests desktop mode (>= 900px)
// This matches the mobile breakpoint used in useMatchMedia.ts
return window.matchMedia('(min-width: 900px)').matches;
}, []);
const showWarning = isInDesktopMode && !hideWarning && lessThanFive;
return React.useMemo(() => showWarning ? (
<Alert
size='lg'
variant='soft'
color='warning'
startDecorator={<WarningRoundedIcon />}
endDecorator={
<IconButton color='warning'>
<CloseRoundedIcon onClick={() => {
setHideWarning(true);
touch();
}} />
</IconButton>
}
>
This Browser is running in Desktop Mode, which may cause layout issues.<br />
To fix: Close this app, open Chrome, visit this site, disable &quot;Desktop site&quot; in the menu, then reopen the app.
</Alert>
) : null, [showWarning, touch]);
}
+10
View File
@@ -5,8 +5,10 @@ import { PanelGroup } from 'react-resizable-panels';
import { GlobalDragOverlay } from '~/common/components/dnd-dt/GlobalDragOverlay';
import { Is } from '~/common/util/pwaUtils';
import { checkVisibleNav, navItems } from '~/common/app.nav';
import { useBrowserTranslationWarning } from '~/common/components/useIsBrowserTranslating';
import { useGlobalShortcuts } from '~/common/components/shortcuts/useGlobalShortcuts';
import { useIsMobile } from '~/common/components/useMatchMedia';
import { usePWADesktopModeWarning } from '~/common/components/useIsBrowserInPWADesktop';
import { useUIPreferencesStore } from '~/common/stores/store-ui';
import { ScratchClip } from './scratchclip/ScratchClip';
@@ -61,6 +63,10 @@ export function OptimaLayout(props: { suspendAutoModelsSetup?: boolean, children
// derived state
const currentApp = navItems.apps.find(item => item.route === route);
// global warnings
const translationWarning = useBrowserTranslationWarning();
const pwaDesktopModeWarning = usePWADesktopModeWarning();
// global shortcuts for Optima
useGlobalShortcuts('OptimaApp', React.useMemo(() => [
// Preferences & Model dialogs
@@ -78,6 +84,10 @@ export function OptimaLayout(props: { suspendAutoModelsSetup?: boolean, children
return <>
{/* Global Warnings */}
{translationWarning}
{pwaDesktopModeWarning}
<PanelGroup direction='horizontal' id='root-layout' style={isMobile ? undoPanelGroupSx : undefined}>
+17 -6
View File
@@ -113,7 +113,18 @@ export type DMessageFragmentVendorState = Record<string, unknown> & {
export type DMessageTextPart = { pt: 'text', text: string };
export type DMessageErrorPart = { pt: 'error', error: string };
export type DMessageErrorPart = { pt: 'error', error: string, hint?: DMessageErrorPartHint };
type DMessageErrorPartHint =
// AIX streaming errors (from aixClassifyStreamingError)
| 'aix-client-aborted'
| 'aix-net-disconnected'
| 'aix-request-exceeded'
| 'aix-response-captive'
| 'aix-net-unknown'
| 'aix-processing-error'
// Allow custom hints
| string;
/**
* @deprecated replaced by DMessageZyncAssetReferencePart to an image asset; here for migration purposes
@@ -380,8 +391,8 @@ export function createTextContentFragment(text: string): DMessageContentFragment
return _createContentFragment(_create_Text_Part(text));
}
export function createErrorContentFragment(error: string): DMessageContentFragment {
return _createContentFragment(_create_Error_Part(error));
export function createErrorContentFragment(error: string, hint?: DMessageErrorPartHint): DMessageContentFragment {
return _createContentFragment(_create_Error_Part(error, hint));
}
export function createZyncAssetReferenceContentFragment(assetUuid: ZYNC_Entity.UUID, refSummary: string | undefined, assetType: 'image' | 'audio', legacyImageRefPart?: DMessageZyncAssetReferencePart['_legacyImageRefPart']): DMessageContentFragment {
@@ -514,8 +525,8 @@ function _create_Text_Part(text: string): DMessageTextPart {
return { pt: 'text', text };
}
function _create_Error_Part(error: string): DMessageErrorPart {
return { pt: 'error', error };
function _create_Error_Part(error: string, hint?: DMessageErrorPartHint): DMessageErrorPart {
return { pt: 'error', error, ...(hint && { hint }) };
}
export function createDMessageZyncAssetReferencePart(zUuid: ZYNC_Entity.UUID, refSummary: string | undefined, assetType: 'image' | 'audio', legacyImageRefPart?: DMessageZyncAssetReferencePart['_legacyImageRefPart']): DMessageZyncAssetReferencePart {
@@ -593,7 +604,7 @@ function _duplicate_Part<TPart extends (DMessageContentFragment | DMessageAttach
return _create_Doc_Part(part.vdt, _duplicate_InlineData(part.data), part.ref, part.l1Title, newDocVersion, part.meta ? { ...part.meta } : undefined) as TPart;
case 'error':
return _create_Error_Part(part.error) as TPart;
return _create_Error_Part(part.error, part.hint) as TPart;
case 'reference':
const rt = part.rt;
@@ -0,0 +1,42 @@
import * as React from 'react';
import { findModelVendor } from '~/modules/llms/vendors/vendors.registry';
import type { DLLM } from '../llms.types';
import type { DModelsService } from '../llms.service.types';
import { llmsStoreActions, useModelsStore } from '../store-llms';
/**
* Hook to manage client-side fetch setting for a model's service
*/
export function useModelServiceClientSideFetch(enabled: boolean, model: DLLM | null) {
// memo vendor
const { vendor, csfKey } = React.useMemo(() => {
if (!enabled) return { vendor: null, csfKey: '' };
const vendor = findModelVendor(model?.vId);
const csfKey = vendor?.csfKey || '';
return { vendor, csfKey };
}, [enabled, model?.vId]);
// external state
const service: null | DModelsService = useModelsStore(state => !model?.sId ? null : state.sources.find(s => s.id === model.sId) ?? null);
// actual state
const csfAvailable: boolean | undefined = !!csfKey && vendor?.csfAvailable?.(service?.setup);
const csfActive: boolean | undefined = csfAvailable && (service?.setup as any)?.[csfKey];
const serviceId = service?.id || '';
const csfToggle = React.useCallback((value: boolean) => {
if (csfKey && serviceId)
llmsStoreActions().updateServiceSettings(serviceId, { [csfKey]: value });
}, [csfKey, serviceId]);
const csfReset = React.useCallback(() => {
if (csfKey && serviceId)
llmsStoreActions().updateServiceSettings(serviceId, { [csfKey]: false });
}, [csfKey, serviceId]);
return { csfAvailable, csfActive, csfToggle, csfReset };
}
+17 -1
View File
@@ -84,6 +84,14 @@ export const DModelParameterRegistry = {
// No initialValue - undefined means off (e.g. default 200K context window)
} as const,
llmVndAntEffort: {
label: 'Effort',
type: 'enum' as const,
description: 'Controls token usage vs. thoroughness trade-off. Works alongside thinking budget.',
values: ['low', 'medium', 'high'] as const,
// No initialValue - undefined means high effort (default, equivalent to omitting the parameter)
} as const,
llmVndAntSkills: {
label: 'Document Skills',
type: 'string' as const,
@@ -96,7 +104,7 @@ export const DModelParameterRegistry = {
type: 'integer' as const,
description: 'Budget for extended thinking',
range: [1024, 65536] as const,
initialValue: 8192,
initialValue: 16384,
nullable: {
meaning: 'Disable extended thinking',
} as const,
@@ -118,6 +126,14 @@ export const DModelParameterRegistry = {
// No initialValue - undefined means off (same as 'off')
} as const,
// llmVndAntToolSearch: { // Not user set
// label: 'Tool Search',
// type: 'enum' as const,
// description: 'Search algorithm for discovering tools on-demand (regex=pattern-based, bm25=natural language)',
// values: ['regex', 'bm25'] as const,
// // No initialValue - undefined means off (tool search disabled)
// } as const,
llmVndGeminiAspectRatio: {
label: 'Aspect Ratio',
type: 'enum' as const,
+3
View File
@@ -142,6 +142,7 @@ export type DModelInterfaceV1 =
| 'oai-chat'
| 'oai-chat-fn'
| 'oai-chat-json'
| 'ant-tools-search'
| 'oai-chat-vision'
| 'oai-chat-reasoning'
| 'oai-complete'
@@ -166,6 +167,7 @@ export type DModelInterfaceV1 =
export const LLM_IF_OAI_Chat: DModelInterfaceV1 = 'oai-chat';
export const LLM_IF_OAI_Fn: DModelInterfaceV1 = 'oai-chat-fn';
export const LLM_IF_OAI_Json: DModelInterfaceV1 = 'oai-chat-json'; // for Structured Outputs (or JSON mode at worst)
export const LLM_IF_ANT_ToolsSearch: DModelInterfaceV1 = 'ant-tools-search';
// export const LLM_IF_OAI_JsonSchema: ... future?
export const LLM_IF_OAI_Vision: DModelInterfaceV1 = 'oai-chat-vision';
export const LLM_IF_OAI_Reasoning: DModelInterfaceV1 = 'oai-chat-reasoning';
@@ -193,6 +195,7 @@ export const LLMS_ALL_INTERFACES = [
LLM_IF_OAI_Vision, // GREAT TO HAVE - image inputs
LLM_IF_OAI_Fn, // IMPORTANT - support for function calls
LLM_IF_OAI_Json, // not used for now: structured outputs
LLM_IF_ANT_ToolsSearch, // Anthropic tool: Tools Search
// Generalized capabilities
LLM_IF_OAI_Reasoning, // COSMETIC ONLY - may show a 'brain' icon in supported screens
LLM_IF_Outputs_Audio, // COSMETIC ONLY FOR NOW - Models that generate audio output (TTS models)
+1
View File
@@ -245,6 +245,7 @@ export function uiSetPanelGroupCollapsed(key: string, collapsed: boolean): void
// 'export-share' // used the export function
// 'share-chat-link' // not shared a Chat Link yet
type KnownKeys =
| 'acknowledge-pwa-desktop-mode-warning' // displayed if mobile PWA is in desktop mode (layout issues)
| 'acknowledge-translation-warning' // displayed if Chrome is translating the page (may crash)
| 'beam-wizard' // first Beam
| 'call-wizard' // first Call
+11 -6
View File
@@ -34,11 +34,11 @@ export const avatarIconSx = {
width: 36,
} as const;
const largerAvatarIconsSx = {
borderRadius: 'sm',
width: 48,
height: 48,
};
// const largerAvatarIconsSx = {
// borderRadius: 'sm',
// width: 48,
// height: 48,
// };
const aixSkipBoxSx = {
height: 36,
@@ -148,7 +148,8 @@ export function makeMessageAvatarIcon(
: isTextToImage ? ANIM_BUSY_PAINTING
: isReact ? ANIM_BUSY_THINKING
: ANIM_BUSY_TYPING}
sx={larger ? largerAvatarIconsSx : avatarIconSx}
sx={avatarIconSx}
// sx={larger ? largerAvatarIconsSx : avatarIconSx}
/>;
// Purpose image (if present)
@@ -428,6 +429,10 @@ export function prettyShortChatModelName(model: string | undefined): string {
cutModel = cutModel.slice(0, cutModel.length - dateMatch[0].length); // remove '-05-15'
}
const geminiName = cutModel
// commercial aliases
.replace('gemini-3-pro-image', 'Nano Banana Pro')
.replace('gemini-2.5-flash-image', 'Nano Banana')
// root changes
.replace('non-thinking', '') // NOTE: this is our variant, injected in gemini.models.ts
.replaceAll('-', ' ')
// products
+6 -6
View File
@@ -2,7 +2,7 @@ import { addDBImageAsset } from '~/common/stores/blob/dblobs-portability';
import type { MaybePromise } from '~/common/types/useful.types';
import { convert_Base64WithMimeType_To_Blob } from '~/common/util/blobUtils';
import { create_CodeExecutionInvocation_ContentFragment, create_CodeExecutionResponse_ContentFragment, create_FunctionCallInvocation_ContentFragment, createAnnotationsVoidFragment, createDMessageDataRefDBlob, createDVoidWebCitation, createErrorContentFragment, createModelAuxVoidFragment, createPlaceholderVoidFragment, createTextContentFragment, createZyncAssetReferenceContentFragment, DVoidModelAuxPart, DVoidPlaceholderModelOp, isContentFragment, isModelAuxPart, isTextContentFragment, isVoidAnnotationsFragment, isVoidFragment } from '~/common/stores/chat/chat.fragments';
import { create_CodeExecutionInvocation_ContentFragment, create_CodeExecutionResponse_ContentFragment, create_FunctionCallInvocation_ContentFragment, createAnnotationsVoidFragment, createDMessageDataRefDBlob, createDVoidWebCitation, createErrorContentFragment, createModelAuxVoidFragment, createPlaceholderVoidFragment, createTextContentFragment, createZyncAssetReferenceContentFragment, DMessageErrorPart, DVoidModelAuxPart, DVoidPlaceholderModelOp, isContentFragment, isModelAuxPart, isTextContentFragment, isVoidAnnotationsFragment, isVoidFragment } from '~/common/stores/chat/chat.fragments';
import { ellipsizeMiddle } from '~/common/util/textUtils';
import { imageBlobTransform, PLATFORM_IMAGE_MIMETYPE } from '~/common/util/imageUtils';
import { metricsFinishChatGenerateLg, metricsPendChatGenerateLg } from '~/common/stores/metrics/metrics.chatgenerate';
@@ -98,11 +98,11 @@ export class ContentReassembler {
await this.#reassembleParticle({ cg: 'end', reason: 'abort-client', tokenStopReason: 'client-abort-signal' });
}
async setClientExcepted(errorAsText: string): Promise<void> {
async setClientExcepted(errorAsText: string, errorHint?: DMessageErrorPart['hint']): Promise<void> {
if (DEBUG_PARTICLES)
console.log('-> aix.p: issue:', errorAsText);
this.onCGIssue({ cg: 'issue', issueId: 'client-read', issueText: errorAsText });
this.onCGIssue({ cg: 'issue', issueId: 'client-read', issueText: errorAsText, issueHint: errorHint });
// NOTE: this doesn't go to the debugger anymore - as we only publish external particles to the debugger
await this.#reassembleParticle({ cg: 'end', reason: 'issue-rpc', tokenStopReason: 'cg-issue' });
@@ -481,7 +481,7 @@ export class ContentReassembler {
} catch (error: any) {
console.warn('[DEV] Failed to add inline audio to DBlobs:', { label: safeLabel, error, mimeType, size: base64Data.length });
// Add an error fragment instead
this.accumulator.fragments.push(createErrorContentFragment(`Failed to process audio: ${error?.message || 'Unknown error'}`));
this.accumulator.fragments.push(createErrorContentFragment(`Failed to process audio: ${error?.message || 'Unknown error'}`, 'aix-audio-processing'));
}
}
@@ -677,7 +677,7 @@ export class ContentReassembler {
}
}
private onCGIssue({ issueId: _issueId /* Redundant as we add an Error Fragment already */, issueText }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'issue' }>): void {
private onCGIssue({ issueId: _issueId /* Redundant as we add an Error Fragment already */, issueText, issueHint }: Extract<AixWire_Particles.ChatGenerateOp, { cg: 'issue' }> & { issueHint?: DMessageErrorPart['hint'] }): void {
// NOTE: not sure I like the flow at all here
// there seem to be some bad conditions when issues are raised while the active part is not text
if (MERGE_ISSUES_INTO_TEXT_PART_IF_OPEN) {
@@ -688,7 +688,7 @@ export class ContentReassembler {
return;
}
}
this.accumulator.fragments.push(createErrorContentFragment(issueText));
this.accumulator.fragments.push(createErrorContentFragment(issueText, issueHint));
this.currentTextFragmentIndex = null;
}
+6 -3
View File
@@ -47,7 +47,7 @@ export function aixCreateModelFromLLMOptions(
// destructure input with the overrides
const {
llmRef, llmTemperature, llmResponseTokens, llmTopP,
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch,
llmVndAnt1MContext, llmVndAntSkills, llmVndAntThinkingBudget, llmVndAntWebFetch, llmVndAntWebSearch, llmVndAntEffort,
llmVndGeminiAspectRatio, llmVndGeminiImageSize, llmVndGeminiCodeExecution, llmVndGeminiComputerUse, llmVndGeminiGoogleSearch, llmVndGeminiMediaResolution, llmVndGeminiShowThoughts, llmVndGeminiThinkingBudget, llmVndGeminiThinkingLevel,
// llmVndMoonshotWebSearch,
llmVndOaiReasoningEffort, llmVndOaiReasoningEffort4, llmVndOaiRestoreMarkdown, llmVndOaiVerbosity, llmVndOaiWebSearchContext, llmVndOaiWebSearchGeolocation, llmVndOaiImageGeneration,
@@ -105,6 +105,7 @@ export function aixCreateModelFromLLMOptions(
...(llmVndAntSkills ? { vndAntSkills: llmVndAntSkills } : {}),
...(llmVndAntWebFetch === 'auto' ? { vndAntWebFetch: llmVndAntWebFetch } : {}),
...(llmVndAntWebSearch === 'auto' ? { vndAntWebSearch: llmVndAntWebSearch } : {}),
...(llmVndAntEffort ? { vndAntEffort: llmVndAntEffort } : {}),
...(llmVndGeminiAspectRatio ? { vndGeminiAspectRatio: llmVndGeminiAspectRatio } : {}),
...(llmVndGeminiCodeExecution === 'auto' ? { vndGeminiCodeExecution: llmVndGeminiCodeExecution } : {}),
...(llmVndGeminiComputerUse ? { vndGeminiComputerUse: llmVndGeminiComputerUse } : {}),
@@ -761,8 +762,10 @@ async function _aixChatGenerateContent_LL(
// NOT retryable: e.g. client-abort, or missing handle
if (errorType === 'client-aborted')
await reassembler.setClientAborted().catch(console.error /* never */);
else
await reassembler.setClientExcepted(errorMessage).catch(console.error);
else {
const errorHint: DMessageErrorPart['hint'] = `aix-${errorType}`; // MUST MATCH our `aixClassifyStreamingError` hints with 'aix-<type>' in DMessageErrorPart
await reassembler.setClientExcepted(errorMessage, errorHint).catch(console.error);
}
// ... fall through (traditional single path)
} else {
@@ -336,6 +336,14 @@ export namespace AixWire_Tooling {
properties: z.record(z.string(), OpenAPI_Schema.Object_schema),
required: z.array(z.string()).optional(),
}).optional(),
/**
* WARNING: Anthropic-ONLY for now - support for "Programmatic Tool Calling" - 2 new fields:
* - allowed_callers: which contexts can invoke this tool, where 'direct' is the model itself, and 'code_execution' is when invoked from a container, and even both
* - input_examples: array of example input objects that demonstrate format conventions, nested object population, etc.
*/
allowed_callers: z.array(z.enum(['direct', 'code_execution'])).optional(),
input_examples: z.array(z.record(z.string(), z.any())).optional(),
});
const _FunctionCallTool_schema = z.object({
@@ -422,10 +430,31 @@ export namespace AixWire_API {
maxTokens: z.number().min(1).optional(),
topP: z.number().min(0).max(1).optional(),
forceNoStream: z.boolean().optional(),
// Cross-vendor Structured Outputs
/**
* Constrain model response to a JSON schema for data extraction. Response will be valid JSON. Schema limitations vary by vendor.
* Supported: Anthropic (output_format), OpenAI (response_format), Gemini (responseSchema)
*/
strictJsonOutput: z.object({
name: z.string().optional(), // Required by OpenAI, optional elsewhere
description: z.string().optional(), // Helps model understand the schema's purpose
schema: z.any(), // JSON Schema object
}).optional(),
/**
* Enable strict schema validation for tool/function call invocations. Guarantees tool inputs exactly match the input_schema. Eliminates validation/retry logic.
* Supported: Anthropic (strict:true), OpenAI (strict:true). Gemini: not supported yet.
*/
strictToolInvocations: z.boolean().optional(),
// Anthropic
vndAnt1MContext: z.boolean().optional(),
vndAntEffort: z.enum(['low', 'medium', 'high']).optional(),
vndAntSkills: z.string().optional(),
vndAntThinkingBudget: z.number().nullable().optional(),
vndAntToolSearch: z.enum(['regex', 'bm25']).optional(), // Tool Search Tool variant
vndAntWebFetch: z.enum(['auto']).optional(),
vndAntWebSearch: z.enum(['auto']).optional(),
// Gemini
@@ -103,13 +103,18 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
// console.log(`Anthropic: hotFixStartWithUser (${chatMessages.length} messages) - ${hackSystemMessageFirstLine}`);
// }
// [Anthropic, 2025-11-13] constrained output modes - both JSON and tool invocations
const strictToolsEnabled = !!model.strictToolInvocations;
// [Anthropic, 2025-11-24] Tool Search Tool - when enabled, all custom tools get defer_loading: true
const toolSearchEnabled = !!model.vndAntToolSearch;
// Construct the request payload
const payload: TRequest = {
max_tokens: model.maxTokens !== undefined ? model.maxTokens : 8192,
model: model.id,
system: systemMessage,
messages: chatMessages,
tools: chatGenerate.tools && _toAnthropicTools(chatGenerate.tools),
tools: chatGenerate.tools && _toAnthropicTools(chatGenerate.tools, strictToolsEnabled, toolSearchEnabled),
tool_choice: chatGenerate.toolsPolicy && _toAnthropicToolChoice(chatGenerate.toolsPolicy),
// metadata: { user_id: ... }
// stop_sequences: undefined,
@@ -138,6 +143,26 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
delete payload.temperature;
}
// [Anthropic] Effort parameter [Anthropic, effort-2025-11-24]
if (model.vndAntEffort /*&& model.vndAntEffort !== 'high'*/)
payload.output_config = {
effort: model.vndAntEffort,
};
// [Anthropic, 2025-11-13] Structured Outputs - JSON output format
if (model.strictJsonOutput) {
// auto-add additionalProperties: false to root object if not present - required by Anthropic
let schema = model.strictJsonOutput.schema;
if (schema && typeof schema === 'object' && schema.type === 'object' && schema.additionalProperties === undefined)
schema = { ...schema, additionalProperties: false };
payload.output_format = { type: 'json_schema', schema };
// warn about incompatible features (citations are enabled via web_fetch tool)
if (model.vndAntWebFetch === 'auto')
console.warn('[Anthropic] Structured output_format may conflict with web_fetch citations');
}
// --- Tools ---
// Allow/deny auto-adding hosted tools when custom tools are present
@@ -168,6 +193,18 @@ export function aixToAnthropicMessageCreate(model: AixAPI_Model, _chatGenerate:
});
}
// [Anthropic, 2025-11-24] Tool Search Tool(s)
if (model.vndAntToolSearch === 'regex')
hostedTools.push({
type: 'tool_search_tool_regex_20251119',
name: 'tool_search_tool_regex',
});
else if (model.vndAntToolSearch === 'bm25')
hostedTools.push({
type: 'tool_search_tool_bm25_20251119',
name: 'tool_search_tool_bm25',
});
// Merge hosted tools with custom tools
if (hostedTools.length > 0) {
payload.tools = payload.tools ? [...payload.tools, ...hostedTools] : hostedTools;
@@ -353,12 +390,12 @@ function* _generateAnthropicMessagesContentBlocks({ parts, role }: AixMessages_C
}
}
function _toAnthropicTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
function _toAnthropicTools(itds: AixTools_ToolDefinition[], strictToolsEnabled: boolean, toolSearchToolEnabled: boolean): NonNullable<TRequest['tools']> {
return itds.map(itd => {
switch (itd.type) {
case 'function_call':
const { name, description, input_schema } = itd.function_call;
const { name, description, input_schema, allowed_callers, input_examples } = itd.function_call;
return {
type: 'custom', // we could not set it, but it helps our typesystem with discrimination
name,
@@ -367,7 +404,16 @@ function _toAnthropicTools(itds: AixTools_ToolDefinition[]): NonNullable<TReques
type: 'object',
properties: input_schema?.properties || null, // Anthropic valid values for input_schema.properties are 'object' or 'null' (null is used to declare functions with no inputs)
required: input_schema?.required,
// [Anthropic, 2025-11-13] Structured Outputs requires additionalProperties: false
...(strictToolsEnabled ? { additionalProperties: false } : {}),
},
// [Anthropic, 2025-11-13] Structured Outputs: strict mode guarantees tool inputs match schema
...(strictToolsEnabled ? { strict: true } : {}),
// [Anthropic, 2025-11-24] Tool Search Tool - auto-defer all custom tools
...(toolSearchToolEnabled ? { defer_loading: true } : {}),
// [Anthropic, 2025-11-24] Programmatic Tool Calling - pass through allowed_callers and input_examples
...(allowed_callers ? { allowed_callers: allowed_callers.map(c => c === 'code_execution' ? 'code_execution_20250825' : c) } : {}),
...(input_examples ? { input_examples } : {}),
};
case 'code_execution':
@@ -61,6 +61,11 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: A
// Chat Messages
const contents: TRequest['contents'] = _toGeminiContents(chatGenerate.chatSequence, api3RequiresSignatures);
// constrained output modes - only JSON (not tool invocations for now)
const jsonOutputEnabled = !!model.strictJsonOutput || jsonOutput;
const jsonOutputSchema = model.strictJsonOutput?.schema;
// const strictToolInvocation = model.strictToolInvocations; // Gemini does not seem to support this yet - need to confirm
// Construct the request payload
const payload: TRequest = {
contents,
@@ -68,8 +73,8 @@ export function aixToGeminiGenerateContent(model: AixAPI_Model, _chatGenerate: A
systemInstruction,
generationConfig: {
stopSequences: undefined, // (default, optional)
responseMimeType: jsonOutput ? 'application/json' : undefined,
responseSchema: undefined, // (default, optional) NOTE: for JSON output, we'd take the schema here
responseMimeType: jsonOutputEnabled ? 'application/json' : undefined,
responseSchema: jsonOutputSchema,
candidateCount: undefined, // (default, optional)
maxOutputTokens: model.maxTokens !== undefined ? model.maxTokens : undefined,
...(model.temperature !== null ? { temperature: model.temperature !== undefined ? model.temperature : undefined } : {}),
@@ -384,7 +389,8 @@ function _toGeminiContents(chatSequence: AixMessages_ChatMessage[], apiRequiresS
// if not applied yet, and required for this part type, apply bypass dummy and warn
else if (partRequiresSignature) {
tsTarget.thoughtSignature = GEMINI_BYPASS_THOUGHT_SIGNATURE;
console.log('[Gemini 3] Message part missing thoughtSignature - using bypass dummy (cross-provider or edited content)');
// [Gemini 3, 2025-11-20] Cross-provider or edited content warning
console.log(`[Gemini 3] ${part.pt} missing thoughtSignature - bypass applied`);
}
}
}
@@ -29,7 +29,7 @@ const approxSystemMessageJoiner = '\n\n---\n\n';
type TRequest = OpenAIWire_API_Chat_Completions.Request;
type TRequestMessages = TRequest['messages'];
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, jsonOutput: boolean, streaming: boolean): TRequest {
export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model: AixAPI_Model, _chatGenerate: AixAPIChatGenerate_Request, streaming: boolean): TRequest {
// Pre-process CGR - approximate spill of System to User message
const chatGenerate = aixSpillSystemToUser(_chatGenerate);
@@ -70,11 +70,15 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
chatMessages = _fixAlternateUserAssistantRoles(chatMessages);
// constrained output modes - both JSON and tool invocations
// const strictJsonOutput = !!model.strictJsonOutput;
const strictToolInvocations = !!model.strictToolInvocations;
// Construct the request payload
let payload: TRequest = {
model: model.id,
messages: chatMessages,
tools: chatGenerate.tools && _toOpenAITools(chatGenerate.tools),
tools: chatGenerate.tools && _toOpenAITools(chatGenerate.tools, strictToolInvocations),
tool_choice: chatGenerate.toolsPolicy && _toOpenAIToolChoice(openAIDialect, chatGenerate.toolsPolicy),
parallel_tool_calls: undefined,
max_tokens: model.maxTokens !== undefined ? model.maxTokens : undefined,
@@ -83,7 +87,15 @@ export function aixToOpenAIChatCompletions(openAIDialect: OpenAIDialects, model:
n: hotFixOnlySupportN1 ? undefined : 0, // NOTE: we choose to not support this at the API level - most downstram ecosystem supports 1 only, which is the default
stream: streaming,
stream_options: streaming ? { include_usage: true } : undefined,
response_format: jsonOutput ? { type: 'json_object' } : undefined,
response_format: model.strictJsonOutput ? {
type: 'json_schema',
json_schema: {
name: model.strictJsonOutput.name || 'response',
description: model.strictJsonOutput.description,
schema: model.strictJsonOutput.schema,
strict: true,
},
} : undefined,
seed: undefined,
stop: undefined,
user: undefined,
@@ -623,7 +635,7 @@ function _toOpenAIMessages(systemMessage: AixMessages_SystemMessage | null, chat
return chatMessages;
}
function _toOpenAITools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['tools']> {
function _toOpenAITools(itds: AixTools_ToolDefinition[], strictToolInvocations: boolean): NonNullable<TRequest['tools']> {
return itds.map(itd => {
const itdType = itd.type;
switch (itdType) {
@@ -639,7 +651,9 @@ function _toOpenAITools(itds: AixTools_ToolDefinition[]): NonNullable<TRequest['
type: 'object',
properties: input_schema?.properties ?? {},
required: input_schema?.required,
...(strictToolInvocations ? { additionalProperties: false } : {}), // required for strict tool invocations
},
...(strictToolInvocations ? { strict: true } : {}), // enable strict (grammar-constrained) tool invocation inputs
},
};
@@ -27,7 +27,6 @@ export function aixToOpenAIResponses(
openAIDialect: OpenAIDialects,
model: AixAPI_Model,
_chatGenerate: AixAPIChatGenerate_Request,
jsonOutput: boolean,
streaming: boolean,
enableResumability: boolean,
): TRequest {
@@ -51,6 +50,10 @@ export function aixToOpenAIResponses(
// NOTE: the zod parsing will remove the undefined values from the upstream request, enabling an easier construction
// ---
// constrained output modes - both JSON and tool invocations
// const strictJsonOutput = !!model.strictJsonOutput;
const strictToolInvocations = !!model.strictToolInvocations;
const { requestInput, requestInstructions } = _toOpenAIResponsesRequestInput(chatGenerate.systemMessage, chatGenerate.chatSequence);
const payload: TRequest = {
@@ -65,7 +68,7 @@ export function aixToOpenAIResponses(
input: requestInput,
// Tools
tools: chatGenerate.tools && _toOpenAIResponsesTools(chatGenerate.tools),
tools: chatGenerate.tools && _toOpenAIResponsesTools(chatGenerate.tools, strictToolInvocations),
tool_choice: chatGenerate.toolsPolicy && _toOpenAIResponsesToolChoice(chatGenerate.toolsPolicy),
// parallel_tool_calls: undefined, // response if unset: true
@@ -98,15 +101,18 @@ export function aixToOpenAIResponses(
payload.top_p = model.topP;
}
// JSON output: not implemented yet - will need a schema definition (similar to the tool args definition)
if (jsonOutput) {
console.warn('[DEV] notImplemented: responses: jsonOutput');
// payload.text = {
// format: {
// type: 'json_schema',
// },
// };
}
// Structured Outputs - JSON output grammar
if (model.strictJsonOutput)
payload.text = {
...payload.text,
format: {
type: 'json_schema',
name: model.strictJsonOutput.name || 'response',
description: model.strictJsonOutput.description,
schema: model.strictJsonOutput.schema,
strict: true,
},
};
// GPT-5 Verbosity: Add to existing text config or create new one
if (model.vndOaiVerbosity) {
@@ -481,7 +487,7 @@ function _toOpenAIResponsesRequestInput(systemMessage: AixMessages_SystemMessage
};
}
function _toOpenAIResponsesTools(itds: AixTools_ToolDefinition[]): NonNullable<TRequestTool[]> {
function _toOpenAIResponsesTools(itds: AixTools_ToolDefinition[], strictToolInvocations: boolean): NonNullable<TRequestTool[]> {
return itds.map(itd => {
const itdType = itd.type;
switch (itdType) {
@@ -496,7 +502,9 @@ function _toOpenAIResponsesTools(itds: AixTools_ToolDefinition[]): NonNullable<T
type: 'object',
properties: input_schema?.properties ?? {},
required: input_schema?.required,
...(strictToolInvocations ? { additionalProperties: false } : {}), // required for strict tool invocations
},
...(strictToolInvocations ? { strict: true } : {}), // enable strict (grammar-constrained) tool invocation inputs
};
case 'code_execution':
@@ -49,11 +49,24 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
const { dialect } = access;
switch (dialect) {
case 'anthropic': {
// [Anthropic, 2025-11-24] Detect if any tool uses Programmatic Tool Calling features (allowed_callers, input_examples)
const usesProgrammaticToolCalling = chatGenerate.tools?.some(tool =>
tool.type === 'function_call' && (
tool.function_call.allowed_callers?.includes('code_execution') ||
(tool.function_call.input_examples && tool.function_call.input_examples.length > 0)
),
) ?? false;
const anthropicRequest = anthropicAccess(access, '/v1/messages', {
modelIdForBetaFeatures: model.id,
vndAntWebFetch: model.vndAntWebFetch === 'auto',
vndAnt1MContext: model.vndAnt1MContext === true,
vndAntEffort: !!model.vndAntEffort,
enableSkills: !!model.vndAntSkills,
enableStrictOutputs: !!model.strictJsonOutput || !!model.strictToolInvocations, // [Anthropic, 2025-11-13] for both JSON output and grammar-constrained tool invocations inputs
enableToolSearch: !!model.vndAntToolSearch,
enableProgrammaticToolCalling: usesProgrammaticToolCalling,
// enableCodeExecution: ...
});
@@ -96,8 +109,8 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
request: {
...ollamaAccess(access, '/v1/chat/completions'), // use the OpenAI-compatible endpoint
method: 'POST',
// body: ollamaChatCompletionPayload(model, _hist, access.ollamaJson, streaming),
body: aixToOpenAIChatCompletions('openai', model, chatGenerate, access.ollamaJson, streaming),
// body: ollamaChatCompletionPayload(model, _hist, streaming),
body: aixToOpenAIChatCompletions('openai', model, chatGenerate, streaming),
},
// demuxerFormat: streaming ? 'json-nl' : null,
demuxerFormat: streaming ? 'fast-sse' : null,
@@ -130,7 +143,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
request: {
...openAIAccess(access, model.id, '/v1/responses'),
method: 'POST',
body: aixToOpenAIResponses(dialect, model, chatGenerate, false, streaming, enableResumability),
body: aixToOpenAIResponses(dialect, model, chatGenerate, streaming, enableResumability),
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createOpenAIResponsesEventParser() : createOpenAIResponseParserNS(),
@@ -141,7 +154,7 @@ export function createChatGenerateDispatch(access: AixAPI_Access, model: AixAPI_
request: {
...openAIAccess(access, model.id, '/v1/chat/completions'),
method: 'POST',
body: aixToOpenAIChatCompletions(dialect, model, chatGenerate, false, streaming),
body: aixToOpenAIChatCompletions(dialect, model, chatGenerate, streaming),
},
demuxerFormat: streaming ? 'fast-sse' : null,
chatGenerateParse: streaming ? createOpenAIChatCompletionsChunkParser() : createOpenAIChatCompletionsParserNS(),
@@ -153,7 +153,7 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
console.log(`ant content_block_start[${index}]: type=${content_block.type}, ${debugInfo}`);
}
switch (content_block.type) {
switch (content_block.type) { // .content_block_start.type
case 'text':
pt.appendText(content_block.text);
// Note: In streaming mode, citations arrive via citations_delta events, not on content_block_start
@@ -173,6 +173,12 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
// [Anthropic] Note: .input={} and is parsed as an object - if that's the case, we zap it to ''
if (content_block && typeof content_block.input === 'object' && Object.keys(content_block.input).length === 0)
content_block.input = null;
// [Anthropic, 2025-11-24] Programmatic Tool Calling - detect if called from code execution
const isProgrammaticCall = content_block.caller?.type === 'code_execution_20250825';
if (isProgrammaticCall && ANTHROPIC_DEBUG_EVENT_SEQUENCE)
console.log(`[Anthropic] Programmatic tool call: ${content_block.name} called from code_execution (tool_id: ${content_block.caller?.type === 'code_execution_20250825' ? content_block.caller.tool_id : 'n/a'})`);
pt.startFunctionCallInvocation(content_block.id, content_block.name, 'incr_str', content_block.input! ?? null);
break;
@@ -183,7 +189,7 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
content_block.input = null;
// Show placeholder for known server tools
switch (content_block.name) {
switch (content_block.name) { // .server_tool_use.name
case 'web_search':
pt.sendVoidPlaceholder('search-web', 'Searching the web...');
break;
@@ -197,6 +203,11 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
case 'text_editor_code_execution':
pt.sendVoidPlaceholder('code-exec', '⚡ Executing code...');
break;
// [Anthropic, 2025-11-24] Tool Search Tool
case 'tool_search_tool_regex':
case 'tool_search_tool_bm25':
pt.sendVoidPlaceholder('code-exec', '🔍 Searching available tools...');
break;
default:
// For unknown server tools (e.g., future Skills), show a generic placeholder instead of throwing
console.warn(`[Anthropic Parser] Unknown server tool: ${content_block.name}`);
@@ -351,6 +362,19 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
// using the Files API with content_block.file_id
break;
case 'tool_result': // [Anthropic, 2025-11-24] Tool Search Tool - The actual tool definitions are auto-expanded by Anthropic's API
if (Array.isArray(content_block.content)) {
// success
const toolNames = content_block.content.map((ref: { type: string; tool_name: string }) => ref.tool_name);
pt.sendVoidPlaceholder('code-exec', `🔍 Discovered ${toolNames.length} tool(s): ${toolNames.join(', ')}`);
// Log for future debugging
console.log('[Anthropic] Tool search discovered:', { tools: toolNames });
} else if (content_block.content?.type === 'tool_search_tool_result_error') {
// error during tool search
pt.sendVoidPlaceholder('code-exec', `🔍 Tool search error: ${content_block.content.error_code}`);
}
break;
default:
const _exhaustiveCheck: never = content_block;
throw new Error(`Unexpected content block type: ${(content_block as any).type}`);
@@ -474,6 +498,7 @@ export function createAnthropicMessageParser(): ChatGenerateParseFunction {
if (tokenStopReason !== null)
pt.setTokenStopReason(tokenStopReason);
// NOTE: we have more fields we're not parsing yet - https://platform.claude.com/docs/en/api/typescript/messages#message_delta_usage
if (usage?.output_tokens && messageStartTime) {
const elapsedTimeMilliseconds = Date.now() - messageStartTime;
const elapsedTimeSeconds = elapsedTimeMilliseconds / 1000;
@@ -570,7 +595,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
for (let i = 0; i < content.length; i++) {
const contentBlock = content[i];
const isLastBlock = i === content.length - 1;
switch (contentBlock.type) {
switch (contentBlock.type) { // .content_block (non-streaming)
case 'text':
pt.appendText(contentBlock.text);
// Handle citations if present (non-streaming mode has all citations attached)
@@ -603,6 +628,12 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
case 'tool_use':
// NOTE: this gets parsed as an object, not string deltas of a json!
// [Anthropic, 2025-11-24] Programmatic Tool Calling - detect if called from code execution
const isProgrammaticCallNS = contentBlock.caller?.type === 'code_execution_20250825';
if (isProgrammaticCallNS)
console.log(`[Anthropic] Programmatic tool call (non-streaming): ${contentBlock.name} called from code_execution (tool_id: ${contentBlock.caller?.type === 'code_execution_20250825' ? contentBlock.caller.tool_id : 'n/a'})`);
pt.startFunctionCallInvocation(contentBlock.id, contentBlock.name, 'json_object', (contentBlock.input as object) || null);
pt.endMessagePart();
break;
@@ -610,7 +641,7 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
case 'server_tool_use':
// Server tool use in non-streaming mode
// NOTE: We don't create tool invocations for server tools - just show placeholders
switch (contentBlock.name) {
switch (contentBlock.name) { // .server_tool_use.name
case 'web_search':
pt.sendVoidPlaceholder('search-web', 'Searching the web...');
break;
@@ -623,6 +654,11 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
case 'text_editor_code_execution':
pt.sendVoidPlaceholder('code-exec', '⚡ Executing code...');
break;
// [Anthropic, 2025-11-24] Tool Search Tool
case 'tool_search_tool_regex':
case 'tool_search_tool_bm25':
pt.sendVoidPlaceholder('code-exec', '🔍 Searching available tools...');
break;
default:
console.warn(`[Anthropic Parser] Unknown server tool (non-streaming): ${contentBlock.name}`);
pt.sendVoidPlaceholder('code-exec', `⚡ Using ${contentBlock.name}...`);
@@ -771,6 +807,19 @@ export function createAnthropicMessageParserNS(): ChatGenerateParseFunction {
});
break;
case 'tool_result': // [Anthropic, 2025-11-24] Tool Search Tool - The actual tool definitions are auto-expanded by Anthropic's API
if (Array.isArray(contentBlock.content)) {
// success
const toolNames = contentBlock.content.map((ref: { type: string; tool_name: string }) => ref.tool_name);
pt.sendVoidPlaceholder('code-exec', `🔍 Discovered ${toolNames.length} tool(s): ${toolNames.join(', ')}`);
// Log for future debugging
console.log('[Anthropic] Tool search discovered (non-streaming):', { tools: toolNames });
} else if ((contentBlock.content as any)?.type === 'tool_search_tool_result_error') {
// error during tool search
pt.sendVoidPlaceholder('code-exec', `🔍 Tool search error: ${(contentBlock.content as any).error_code}`);
}
break;
default:
const _exhaustiveCheck: never = contentBlock;
throw new Error(`Unexpected content block type: ${(contentBlock as any).type}`);
@@ -10,6 +10,12 @@ import * as z from 'zod/v4';
*
* ## Updates
*
* ### 2025-11-24 - Programmatic Tool Calling (Beta: advanced-tool-use-2025-11-20)
* - ToolUseBlock: added 'caller' field to indicate direct vs programmatic invocation
* - CustomToolDefinition: added 'allowed_callers' field to restrict tool invocation contexts
* - CustomToolDefinition: added 'input_examples' field for improved accuracy
* - New ToolUseCaller_schema for discriminating caller types
*
* ### 2025-10-17 - MAJOR: Server Tools & 2025 API Additions
* - ContentBlockOutput: added 9 new server tool response block types
* - ToolDefinition: added 9 new 2025 tool types (web_search, web_fetch, memory, code_execution, etc.)
@@ -119,6 +125,17 @@ export namespace AnthropicWire_Blocks {
id: z.string(),
name: z.string(), // length: 1-64
input: z.any(), // Formally an 'object', but relaxed for robust parsing, and code-enforced
/**
* [Anthropic, 2025-11-24] Programmatic Tool Calling - indicates how this tool was invoked.
* Requires the advanced-tool-use-2025-11-20 beta feature.
*/
caller: z.discriminatedUnion('type', [
z.object({ type: z.literal('direct') }), // model called tool directly
z.object({
type: z.literal('code_execution_20250825'), // tool called programmatically from within code execution
tool_id: z.string(), // ref the server_tool_use (code_execution) that made this call
}),
]).optional(),
});
@@ -224,6 +241,8 @@ export namespace AnthropicWire_Blocks {
'code_execution',
'bash_code_execution', // sub-tool of 'code_execution'
'text_editor_code_execution', // sub-tool of 'code_execution'
'tool_search_tool_regex', // Tool Search Tool - regex variant
'tool_search_tool_bm25', // Tool Search Tool - BM25 (natural text) variant
]),
z.string(), // forward-compatibility parsing
]),
@@ -369,6 +388,27 @@ export namespace AnthropicWire_Blocks {
file_id: z.string(),
});
/**
* [Anthropic, 2025-11-24] Tool Search Tool - Result of tool search operation
* Contains either an array of tool references or an error.
*/
export const ToolSearchToolResultBlock_schema = _CommonBlock_schema.extend({
type: z.literal('tool_result'),
tool_use_id: z.string(),
content: z.union([
// success - array of tool references
z.array(z.object({
type: z.literal('tool_reference'),
tool_name: z.string(),
})),
// error
z.object({
type: z.literal('tool_search_tool_result_error'),
error_code: z.union([z.enum(['too_many_requests', 'invalid_pattern', 'pattern_too_long', 'unavailable']), z.string() /* forward-compatibility */]),
}),
]),
});
/// Block Constructors
@@ -457,6 +497,7 @@ export namespace AnthropicWire_Messages {
* - Code execution tool result, Bash code execution tool result, Text editor code execution tool result
* - MCP tool use, MCP tool result
* - Container upload
* - Tool reference
*/
export const ContentBlockOutput_schema = z.discriminatedUnion('type', [
// Common Blocks (both input and output)
@@ -474,6 +515,7 @@ export namespace AnthropicWire_Messages {
AnthropicWire_Blocks.MCPToolUseBlock_schema,
AnthropicWire_Blocks.MCPToolResultBlock_schema,
AnthropicWire_Blocks.ContainerUploadBlock_schema,
AnthropicWire_Blocks.ToolSearchToolResultBlock_schema, // [Anthropic, 2025-11-24] Tool Search Tool
]);
}
@@ -543,6 +585,19 @@ export namespace AnthropicWire_Tools {
properties: z.record(z.string(), z.any()).nullish(), // FC-DEF params schema - WAS: z.json().nullable(),
required: z.array(z.string()).optional(), // 2025-02-24: seems to be removed; we may still have this, but it may also be within the 'properties' object
}),
/**
* [Anthropic, 2025-11-24] Tool Search Tool - when true, this tool is not loaded into context initially and can be discovered via the tool search tool when needed.
*/
defer_loading: z.boolean().optional(),
/**
* [Anthropic, 2025-11-24] Programmatic Tool Calling - 2 new fields:
* - specifies which contexts can invoke this tool
* - concrete usage examples to improve accuracy - can increase accuracy (e.g. 72% -> 90% in examples)
*/
allowed_callers: z.array(z.enum(['direct', 'code_execution_20250825'])).optional(), // can be both ['direct', 'code_execution_20250825']
input_examples: z.array(z.record(z.string(), z.any())).optional(),
});
// Latest Tool Versions (sorted alphabetically by tool name)
@@ -589,6 +644,18 @@ export namespace AnthropicWire_Tools {
max_characters: z.number().nullish(),
});
/** [Anthropic, 2025-11-24] Tool Search Tool - constructs regex patterns (e.g., "weather", "get_.*_data") to search tool names/descriptions. */
const _ToolSearchToolRegex_20251119_schema = _ToolDefinitionBase_schema.extend({
type: z.literal('tool_search_tool_regex_20251119'),
name: z.literal('tool_search_tool_regex'),
});
/** [Anthropic, 2025-11-24] Tool Search Tool - BM25 variant (natural language search) - uses natural language queries to search for tools. */
const _ToolSearchToolBM25_20251119_schema = _ToolDefinitionBase_schema.extend({
type: z.literal('tool_search_tool_bm25_20251119'),
name: z.literal('tool_search_tool_bm25'),
});
const _WebFetchTool_20250910_schema = _ToolDefinitionBase_schema.extend({
type: z.literal('web_fetch_20250910'),
name: z.literal('web_fetch'),
@@ -617,6 +684,8 @@ export namespace AnthropicWire_Tools {
_ComputerUseTool_20250124_schema,
_MemoryTool_20250818_schema,
_TextEditor_20250728_schema,
_ToolSearchToolBM25_20251119_schema, // [Anthropic, 2025-11-24] Tool Search Tool - BM25 variant
_ToolSearchToolRegex_20251119_schema, // [Anthropic, 2025-11-24] Tool Search Tool - Regex variant
_WebFetchTool_20250910_schema,
_WebSearchTool_20250305_schema,
]);
@@ -769,6 +838,14 @@ export namespace AnthropicWire_API_Message_Create {
z.object({ type: z.literal('disabled') }),
]).optional(),
/**
* [Anthropic, effort-2025-11-24] Output configuration for effort-based token control.
* Allows trading off response thoroughness for efficiency (Claude Opus 4.5+ only).
*/
output_config: z.object({
effort: z.enum(['low', 'medium', 'high']).optional(),
}).optional(),
/**
* Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks.
*/
@@ -785,6 +862,17 @@ export namespace AnthropicWire_API_Message_Create {
* Recommended for advanced use cases only. You usually only need to use `temperature`.
* */
top_p: z.number().optional(),
/**
* [Anthropic, 2025-11-13] Structured Outputs - JSON output format configuration.
* Constrains Claude's response to follow a specific JSON schema.
* Beta feature requiring header: "structured-outputs-2025-11-13"
* Available for Claude Sonnet 4.5 and Claude Opus 4.1+.
*/
output_format: z.object({
type: z.literal('json_schema'),
schema: z.any(), // JSON Schema object - validated by Anthropic
}).optional(),
});
/// Response
@@ -831,6 +919,7 @@ export namespace AnthropicWire_API_Message_Create {
server_tool_use: z.object({
web_fetch_requests: z.number(),
web_search_requests: z.number(),
tool_search_requests: z.number().optional(), // [Anthropic, 2025-11-24] Tool Search Tool usage
}).nullish(),
service_tier: z.enum(['standard', 'priority', 'batch']).nullish(),
}),
@@ -872,8 +961,18 @@ export namespace AnthropicWire_API_Message_Create {
stop_reason: StopReason_schema.nullable(),
stop_sequence: z.string().nullable(),
}),
// MessageDeltaUsage
usage: z.object({ output_tokens: z.number() }),
// MessageDeltaUsage - extended to include cache and server tool metrics
usage: z.object({
cache_creation_input_tokens: z.number().nullish(),
cache_read_input_tokens: z.number().nullish(),
input_tokens: z.number().nullish(),
output_tokens: z.number(),
server_tool_use: z.object({
web_fetch_requests: z.number().optional(),
web_search_requests: z.number().optional(),
tool_search_requests: z.number().optional(),
}).nullish(),
}),
});
export const event_ContentBlockStart_schema = z.object({
+2 -2
View File
@@ -1,7 +1,7 @@
import { hasGoogleAnalytics, sendGAEvent } from '~/common/components/3rdparty/GoogleAnalytics';
import type { DModelsService, DModelsServiceId } from '~/common/stores/llms/llms.service.types';
import { DLLM, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
import { DLLM, DModelInterfaceV1, LLM_IF_HOTFIX_NoTemperature, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn } from '~/common/stores/llms/llms.types';
import { applyModelParameterInitialValues, FALLBACK_LLM_PARAM_TEMPERATURE } from '~/common/stores/llms/llms.parameters';
import { isModelPricingFree } from '~/common/stores/llms/llms.pricing';
import { llmsStoreActions } from '~/common/stores/llms/store-llms';
@@ -88,7 +88,7 @@ function _createDLLMFromModelDescription(d: ModelDescriptionSchema, service: DMo
contextTokens,
maxOutputTokens,
trainingDataCutoff: d.trainingDataCutoff,
interfaces: d.interfaces?.length ? d.interfaces : _fallbackInterfaces,
interfaces: d.interfaces?.length ? d.interfaces as DModelInterfaceV1[] : _fallbackInterfaces,
benchmark: d.benchmark,
// pricing?: ..., // set below, since it needs some adaptation
@@ -120,6 +120,12 @@ const _antWebFetchOptions = [
{ value: _UNSPECIFIED, label: 'Off', description: 'Disabled (default)' },
] as const;
const _antEffortOptions = [
{ value: _UNSPECIFIED, label: 'High', description: 'Maximum capability (default)' },
{ value: 'medium', label: 'Medium', description: 'Balanced speed and quality' },
{ value: 'low', label: 'Low', description: 'Fastest, most efficient' },
] as const;
// const _moonshotWebSearchOptions = [
// { value: 'auto', label: 'On', description: 'Enable Kimi $web_search ($0.005 per search)' },
// { value: _UNSPECIFIED, label: 'Off', description: 'Disabled (default)' },
@@ -187,6 +193,7 @@ export function LLMParametersEditor(props: {
llmTemperature = FALLBACK_LLM_PARAM_TEMPERATURE, // fallback for undefined, result is number | null
llmForceNoStream,
llmVndAnt1MContext,
llmVndAntEffort,
llmVndAntSkills,
llmVndAntThinkingBudget,
llmVndAntWebFetch,
@@ -317,6 +324,19 @@ export function LLMParametersEditor(props: {
/>
)}
{showParam('llmVndAntEffort') && (
<FormSelectControl
title='Effort'
tooltip='Controls token usage vs. thoroughness. Low = fastest, most efficient. High = maximum capability (default). Works alongside thinking budget.'
value={llmVndAntEffort ?? _UNSPECIFIED}
onChange={(value) => {
if (value === _UNSPECIFIED || !value || value === 'high') onRemoveParameter('llmVndAntEffort');
else onChangeParameter({ llmVndAntEffort: value });
}}
options={_antEffortOptions}
/>
)}
{showParam('llmVndAntWebSearch') && (
<FormSelectControl
title='Web Search'
@@ -74,8 +74,12 @@ export type AnthropicHeaderOptions = {
modelIdForBetaFeatures?: string;
vndAntWebFetch?: boolean;
vndAnt1MContext?: boolean;
vndAntEffort?: boolean; // [Anthropic, effort-2025-11-24]
enableSkills?: boolean;
enableCodeExecution?: boolean;
enableStrictOutputs?: boolean; // [Anthropic, 2025-11-13] Structured Outputs (JSON outputs & strict tool use)
enableToolSearch?: boolean; // [Anthropic, 2025-11-24] Tool Search Tool
enableProgrammaticToolCalling?: boolean; // [Anthropic, 2025-11-24] Programmatic Tool Calling (allowed_callers, input_examples)
clientSideFetch?: boolean; // whether the request will be made from client-side (browser) - adds CORS header
};
@@ -156,6 +160,19 @@ function _anthropicHeaders(options?: AnthropicHeaderOptions): Record<string, str
betaFeatures.push('code-execution-2025-08-25');
}
// [Anthropic, 2025-11-24] Add beta feature for effort parameter (Claude Opus 4.5+)
if (options?.vndAntEffort)
betaFeatures.push('effort-2025-11-24');
// [Anthropic, 2025-11-24] Add beta feature for Advanced Tool Use (Tool Search Tool, Programmatic Tool Calling)
// Same beta header covers both features: tool discovery and programmatic calling from code execution
if (options?.enableToolSearch || options?.enableProgrammaticToolCalling)
betaFeatures.push('advanced-tool-use-2025-11-20');
// [Anthropic, 2025-11-13] Add beta feature for Structured Outputs (JSON outputs & strict tool use)
if (options?.enableStrictOutputs)
betaFeatures.push('structured-outputs-2025-11-13');
return {
...DEFAULT_ANTHROPIC_HEADERS,
// CORS: allow browser access to Anthropic API servers
@@ -1,6 +1,6 @@
import * as z from 'zod/v4';
import { LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
import { LLM_IF_ANT_PromptCaching, LLM_IF_ANT_ToolsSearch, LLM_IF_OAI_Chat, LLM_IF_OAI_Fn, LLM_IF_OAI_Reasoning, LLM_IF_OAI_Vision, LLM_IF_Tools_WebSearch } from '~/common/stores/llms/llms.types';
import { Release } from '~/common/app.release';
import type { ModelDescriptionSchema } from '../llm.server.types';
@@ -10,6 +10,9 @@ import type { ModelDescriptionSchema } from '../llm.server.types';
export const DEV_DEBUG_ANTHROPIC_MODELS = Release.IsNodeDevBuild;
const IF_4 = [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching];
const IF_4_R = [...IF_4, LLM_IF_OAI_Reasoning];
const ANT_PAR_WEB: ModelDescriptionSchema['parameterSpecs'] = [
{ paramId: 'llmVndAntWebSearch' },
{ paramId: 'llmVndAntWebFetch' },
@@ -26,13 +29,22 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
// NOTE: what's not redefined below is inherited from the underlying model definition
// Claude 4.5 models with thinking variants
'claude-opus-4-5-20251101': {
idVariant: 'thinking',
label: 'Claude Opus 4.5 (Thinking)',
description: 'Claude Opus 4.5 with extended thinking mode for complex reasoning and agentic workflows',
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAntEffort' }, { paramId: 'llmVndAntSkills' }],
maxCompletionTokens: 32000,
},
'claude-sonnet-4-5-20250929': {
idVariant: 'thinking',
label: 'Claude Sonnet 4.5 (Thinking)',
description: 'Claude Sonnet 4.5 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntSkills' }],
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: [...IF_4_R, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntSkills' }],
benchmark: { cbaElo: 1451 + 1 }, // FALLBACK-UNTIL-AVAILABLE: claude-opus-4-1-20250805-thinking-16k + 1
},
@@ -40,9 +52,9 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Haiku 4.5 (Thinking)',
description: 'Claude Haiku 4.5 with extended thinking mode - first Haiku model with reasoning capabilities',
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAntSkills' }],
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: IF_4_R,
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAntSkills' }],
},
// Claude 4.1 models with thinking variants
@@ -50,9 +62,9 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Opus 4.1 (Thinking)',
description: 'Claude Opus 4.1 with extended thinking mode enabled for complex reasoning',
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 32000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: IF_4_R,
parameterSpecs: ANT_PAR_WEB_THINKING,
benchmark: { cbaElo: 1451 }, // claude-opus-4-1-20250805-thinking-16k
},
@@ -62,9 +74,9 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Opus 4 (Thinking)',
description: 'Claude Opus 4 with extended thinking mode enabled for complex reasoning',
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 32000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: IF_4_R,
parameterSpecs: ANT_PAR_WEB_THINKING,
benchmark: { cbaElo: 1420 }, // claude-opus-4-20250514-thinking-16k
},
@@ -72,9 +84,9 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Sonnet 4 (Thinking)',
description: 'Claude Sonnet 4 with extended thinking mode enabled for complex reasoning',
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAnt1MContext' }],
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: IF_4_R,
parameterSpecs: [...ANT_PAR_WEB_THINKING, { paramId: 'llmVndAnt1MContext' }],
benchmark: { cbaElo: 1400 }, // claude-sonnet-4-20250514-thinking-32k
},
@@ -83,9 +95,9 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
idVariant: 'thinking',
label: 'Claude Sonnet 3.7 (Thinking)',
description: 'Claude 3.7 with extended thinking mode enabled for complex reasoning',
parameterSpecs: ANT_PAR_WEB_THINKING,
maxCompletionTokens: 64000,
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching, LLM_IF_OAI_Reasoning],
interfaces: IF_4_R,
parameterSpecs: ANT_PAR_WEB_THINKING,
benchmark: { cbaElo: 1385 }, // claude-3-7-sonnet-20250219-thinking-32k
},
@@ -95,6 +107,17 @@ export const hardcodedAnthropicVariants: { [modelId: string]: Partial<ModelDescr
export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: boolean })[] = [
// Claude 4.5 models
{
id: 'claude-opus-4-5-20251101', // Active
label: 'Claude Opus 4.5', // 🌟
description: 'Most intelligent model with advanced reasoning for complex agentic workflows',
contextWindow: 200000,
maxCompletionTokens: 64000,
trainingDataCutoff: 'Jan 2025',
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAntEffort' }],
chatPrice: { input: 5, output: 25, cache: { cType: 'ant-bp', read: 0.50, write: 6.25, duration: 300 } },
},
{
id: 'claude-sonnet-4-5-20250929', // Active
label: 'Claude Sonnet 4.5', // 🌟
@@ -102,7 +125,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 64000,
trainingDataCutoff: 'Jan 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: [...IF_4, LLM_IF_ANT_ToolsSearch],
parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAnt1MContext' }, { paramId: 'llmVndAntSkills' }],
// Note: Tiered pricing - ≤200K: $3/$15, >200K: $6/$22.50 (with 1M context enabled)
// Cache pricing also tiered: write 1.25× input, read 0.10× input
@@ -125,7 +148,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 64000,
trainingDataCutoff: 'Feb 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAntSkills' }],
chatPrice: { input: 1, output: 5, cache: { cType: 'ant-bp', read: 0.10, write: 1.25, duration: 300 } },
},
@@ -133,12 +156,12 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
// Claude 4.1 models
{
id: 'claude-opus-4-1-20250805', // Active
label: 'Claude Opus 4.1', // 🌟
label: 'Claude Opus 4.1',
description: 'Exceptional model for specialized complex tasks requiring advanced reasoning',
contextWindow: 200000,
maxCompletionTokens: 32000,
trainingDataCutoff: 'Jan 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
benchmark: { cbaElo: 1438 }, // claude-opus-4-1-20250805
@@ -153,19 +176,19 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 32000,
trainingDataCutoff: 'Mar 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
benchmark: { cbaElo: 1411 }, // claude-opus-4-20250514
},
{
id: 'claude-sonnet-4-20250514', // Active
label: 'Claude Sonnet 4', // 🌟
label: 'Claude Sonnet 4',
description: 'High-performance model',
contextWindow: 200000,
maxCompletionTokens: 64000,
trainingDataCutoff: 'Mar 2025',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: [...ANT_PAR_WEB, { paramId: 'llmVndAnt1MContext' }],
// Note: Tiered pricing - ≤200K: $3/$15, >200K: $6/$22.50 (with 1M context enabled)
// Cache pricing also tiered: write 1.25× input, read 0.10× input
@@ -190,7 +213,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 64000,
trainingDataCutoff: 'Nov 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 3, output: 15, cache: { cType: 'ant-bp', read: 0.30, write: 3.75, duration: 300 } },
benchmark: { cbaElo: 1369 }, // claude-3-7-sonnet-20250219
@@ -208,7 +231,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 8192,
trainingDataCutoff: 'Jul 2024',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
parameterSpecs: ANT_PAR_WEB,
chatPrice: { input: 0.80, output: 4.00, cache: { cType: 'ant-bp', read: 0.08, write: 1.00, duration: 300 } },
benchmark: { cbaElo: 1319, cbaMmlu: 75.2 }, // claude-3-5-haiku-20241022
@@ -222,7 +245,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Aug 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
chatPrice: { input: 15, output: 75, cache: { cType: 'ant-bp', read: 1.50, write: 18.75, duration: 300 } },
benchmark: { cbaElo: 1322, cbaMmlu: 86.8 },
hidden: true, // deprecated
@@ -236,7 +259,7 @@ export const hardcodedAnthropicModels: (ModelDescriptionSchema & { isLegacy?: bo
contextWindow: 200000,
maxCompletionTokens: 4096,
trainingDataCutoff: 'Aug 2023',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
chatPrice: { input: 0.25, output: 1.25, cache: { cType: 'ant-bp', read: 0.03, write: 0.30, duration: 300 } },
benchmark: { cbaElo: 1263, cbaMmlu: 75.1 },
},
@@ -302,7 +325,7 @@ export function llmsAntCreatePlaceholderModel(model: AnthropicWire_API_Models_Li
contextWindow: 200000,
maxCompletionTokens: 8192,
trainingDataCutoff: 'Latest',
interfaces: [LLM_IF_OAI_Chat, LLM_IF_OAI_Vision, LLM_IF_OAI_Fn, LLM_IF_ANT_PromptCaching],
interfaces: IF_4,
// chatPrice: ...
// benchmark: ...
};
+2 -1
View File
@@ -79,6 +79,7 @@ const ModelParameterSpec_schema = z.object({
'llmForceNoStream',
// Anthropic
'llmVndAnt1MContext',
'llmVndAntEffort',
'llmVndAntSkills',
'llmVndAntThinkingBudget',
'llmVndAntWebFetch',
@@ -129,7 +130,7 @@ export const ModelDescription_schema = z.object({
updated: z.number().optional(),
description: z.string(),
contextWindow: z.number().nullable(),
interfaces: z.array(z.enum(LLMS_ALL_INTERFACES)),
interfaces: z.array(z.union([z.enum(LLMS_ALL_INTERFACES), z.string()])), // backward compatibility: to not Break client-side interface parsing on newer server
parameterSpecs: z.array(ModelParameterSpec_schema).optional(),
maxCompletionTokens: z.number().optional(),
// rateLimits: rateLimitsSchema.optional(),
@@ -23,7 +23,6 @@ export const ollamaAccessSchema = z.object({
dialect: z.enum(['ollama']),
clientSideFetch: z.boolean().optional(), // optional: backward compatibility from newer server version - can remove once all clients are updated
ollamaHost: z.string().trim(),
ollamaJson: z.boolean(),
});
+4
View File
@@ -19,6 +19,10 @@ export interface IModelVendor<TServiceSettings extends Record<string, any> = {},
readonly hasServerConfigFn?: (backendCapabilities: BackendCapabilities) => boolean; // used to show a 'green checkmark' in the list of vendors when adding services
readonly hasServerConfigKey?: keyof BackendCapabilities;
/// client-side-fetch ///
readonly csfKey?: string; // was keyof TServiceSettings, but caused TS troubles
readonly csfAvailable?: (setup?: Partial<TServiceSettings>) => boolean; // undefined: not even, false: conditions not met
/// abstraction interface ///
initializeSetup?(): TServiceSettings;
@@ -38,6 +38,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
// derived state
const { anthropicKey, anthropicHost, clientSideFetch, heliconeKey } = serviceAccess;
const needsUserKey = !serviceHasCloudTenantConfig;
const showAdvanced = advanced.on || !!clientSideFetch;
const keyValid = isValidAnthropicApiKey(anthropicKey);
const keyError = (/*needsUserKey ||*/ !!anthropicKey) && !keyValid;
@@ -67,7 +68,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
placeholder='sk-...'
/>
{advanced.on && <FormSwitchControl
{showAdvanced && <FormSwitchControl
title='Auto-Caching' on='Enabled' off='Disabled'
tooltip='Auto-breakpoints: 3 breakpoints are always set on the System instruction and on the last 2 User messages. This leaves the user with 1 breakpoint of their choice. (max 4)'
description={autoVndAntBreakpoints ? <>Last 2 user messages</> : 'Disabled'}
@@ -76,7 +77,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
/>}
{advanced.on && <FormControl orientation='horizontal' sx={{ flexWrap: 'wrap', justifyContent: 'space-between', alignItems: 'center' }}>
{showAdvanced && <FormControl orientation='horizontal' sx={{ flexWrap: 'wrap', justifyContent: 'space-between', alignItems: 'center' }}>
<FormLabelStart
title='Caching'
description='Toggle per-Message'
@@ -87,7 +88,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
</Typography>
</FormControl>}
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='anthropic-host'
title='API Host'
description={<>e.g., <Link level='body-sm' href='https://github.com/enricoros/big-agi/blob/main/docs/config-aws-bedrock.md' target='_blank'>bedrock-claude</Link></>}
@@ -97,7 +98,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={text => updateSettings({ anthropicHost: text })}
/>}
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='anthropic-helicone-key'
title='Helicone Key' disabled={!!anthropicHost}
description={<>Generate <Link level='body-sm' href='https://www.helicone.ai/keys' target='_blank'>here</Link></>}
@@ -106,7 +107,7 @@ export function AnthropicServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={text => updateSettings({ heliconeKey: text })}
/>}
{advanced.on && <SetupFormClientSideToggle
{showAdvanced && <SetupFormClientSideToggle
visible={!!anthropicKey}
checked={!!clientSideFetch}
onChange={on => updateSettings({ anthropicCSF: on })}
+9 -2
View File
@@ -24,17 +24,24 @@ export const ModelVendorAnthropic: IModelVendor<DAnthropicServiceSettings, Anthr
instanceLimit: 1,
hasServerConfigKey: 'hasLlmAnthropic',
/// client-side-fetch ///
csfKey: 'anthropicCSF',
csfAvailable: _csfAnthropicAvailable,
// functions
getTransportAccess: (partialSetup): AnthropicAccessSchema => ({
dialect: 'anthropic',
clientSideFetch: !!(partialSetup?.anthropicKey && partialSetup?.anthropicCSF),
clientSideFetch: _csfAnthropicAvailable(partialSetup) && !!partialSetup?.anthropicCSF,
anthropicKey: partialSetup?.anthropicKey || '',
anthropicHost: partialSetup?.anthropicHost || null,
heliconeKey: partialSetup?.heliconeKey || null,
}),
// List Models
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmAnthropic.listModels.query({ access }),
};
function _csfAnthropicAvailable(s?: Partial<DAnthropicServiceSettings>) {
return !!s?.anthropicKey;
}
+5 -4
View File
@@ -46,6 +46,7 @@ export function GeminiServiceSetup(props: { serviceId: DModelsServiceId }) {
// derived state
const { clientSideFetch, geminiKey, geminiHost, minSafetyLevel} = serviceAccess;
const needsUserKey = !serviceHasCloudTenantConfig;
const showAdvanced = advanced.on || !!clientSideFetch;
const shallFetchSucceed = !needsUserKey || (!!geminiKey && serviceSetupValid);
const showKeyError = !!geminiKey && !serviceSetupValid;
@@ -69,7 +70,7 @@ export function GeminiServiceSetup(props: { serviceId: DModelsServiceId }) {
placeholder='...'
/>
{advanced.on && <FormControl orientation='horizontal' sx={{ justifyContent: 'space-between', alignItems: 'center' }}>
{showAdvanced && <FormControl orientation='horizontal' sx={{ justifyContent: 'space-between', alignItems: 'center' }}>
<FormLabelStart title='Safety Settings'
description='Threshold' />
<Select
@@ -89,7 +90,7 @@ export function GeminiServiceSetup(props: { serviceId: DModelsServiceId }) {
</Select>
</FormControl>}
{advanced.on && <FormHelperText sx={{ display: 'block' }}>
{showAdvanced && <FormHelperText sx={{ display: 'block' }}>
Gemini has advanced <Link href='https://ai.google.dev/docs/safety_setting_gemini' target='_blank' noLinkStyle>
safety settings</Link> on: harassment, hate speech,
sexually explicit, civic integrity, and dangerous content, in addition to non-adjustable built-in filters.
@@ -97,7 +98,7 @@ export function GeminiServiceSetup(props: { serviceId: DModelsServiceId }) {
{/*of being unsafe.*/}
</FormHelperText>}
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='gemini-host'
title='API Endpoint'
placeholder={`https://generativelanguage.googleapis.com`}
@@ -105,7 +106,7 @@ export function GeminiServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={text => updateSettings({ geminiHost: text })}
/>}
{advanced.on && <SetupFormClientSideToggle
{showAdvanced && <SetupFormClientSideToggle
visible={!!geminiKey}
checked={!!clientSideFetch}
onChange={on => updateSettings({ geminiCSF: on })}
+9 -1
View File
@@ -33,6 +33,10 @@ export const ModelVendorGemini: IModelVendor<DGeminiServiceSettings, GeminiAcces
instanceLimit: 1,
hasServerConfigKey: 'hasLlmGemini',
/// client-side-fetch ///
csfKey: 'geminiCSF',
csfAvailable: _csfGeminiAvailable,
// functions
initializeSetup: () => ({
geminiKey: '',
@@ -44,7 +48,7 @@ export const ModelVendorGemini: IModelVendor<DGeminiServiceSettings, GeminiAcces
},
getTransportAccess: (partialSetup): GeminiAccessSchema => ({
dialect: 'gemini',
clientSideFetch: !!(partialSetup?.geminiKey && partialSetup?.geminiCSF),
clientSideFetch: _csfGeminiAvailable(partialSetup) && !!partialSetup?.geminiCSF,
geminiKey: partialSetup?.geminiKey || '',
geminiHost: partialSetup?.geminiHost || '',
minSafetyLevel: partialSetup?.minSafetyLevel || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
@@ -54,3 +58,7 @@ export const ModelVendorGemini: IModelVendor<DGeminiServiceSettings, GeminiAcces
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmGemini.listModels.query({ access }),
};
function _csfGeminiAvailable(s?: Partial<DGeminiServiceSettings>) {
return !!s?.geminiKey;
}
+9 -1
View File
@@ -23,6 +23,10 @@ export const ModelVendorLocalAI: IModelVendor<DLocalAIServiceSettings, OpenAIAcc
return backendCapabilities.hasLlmLocalAIHost || backendCapabilities.hasLlmLocalAIKey;
},
/// client-side-fetch ///
csfKey: 'localAICSF',
csfAvailable: _csfLocalAIAvailable,
// functions
initializeSetup: () => ({
localAIHost: '',
@@ -31,7 +35,7 @@ export const ModelVendorLocalAI: IModelVendor<DLocalAIServiceSettings, OpenAIAcc
}),
getTransportAccess: (partialSetup) => ({
dialect: 'localai',
clientSideFetch: !!(partialSetup?.localAIHost && partialSetup?.localAICSF),
clientSideFetch: _csfLocalAIAvailable(partialSetup) && !!partialSetup?.localAICSF,
oaiKey: partialSetup?.localAIKey || '',
oaiOrg: '',
oaiHost: partialSetup?.localAIHost || '',
@@ -43,3 +47,7 @@ export const ModelVendorLocalAI: IModelVendor<DLocalAIServiceSettings, OpenAIAcc
rpcUpdateModelsOrThrow: ModelVendorOpenAI.rpcUpdateModelsOrThrow,
};
function _csfLocalAIAvailable(s?: Partial<DLocalAIServiceSettings>) {
return !!s?.localAIHost;
}
+2 -21
View File
@@ -1,11 +1,9 @@
import * as React from 'react';
import { Button, FormControl, Tooltip, Typography } from '@mui/joy';
import WarningRoundedIcon from '@mui/icons-material/WarningRounded';
import { Button, FormControl, Typography } from '@mui/joy';
import type { DModelsServiceId } from '~/common/stores/llms/llms.service.types';
import { FormLabelStart } from '~/common/components/forms/FormLabelStart';
import { FormSwitchControl } from '~/common/components/forms/FormSwitchControl';
import { FormTextField } from '~/common/components/forms/FormTextField';
import { InlineError } from '~/common/components/InlineError';
import { Link } from '~/common/components/Link';
@@ -31,7 +29,7 @@ export function OllamaServiceSetup(props: { serviceId: DModelsServiceId }) {
useServiceSetup(props.serviceId, ModelVendorOllama);
// derived state
const { clientSideFetch, ollamaHost, ollamaJson } = serviceAccess;
const { clientSideFetch, ollamaHost } = serviceAccess;
const hostValid = !!asValidURL(ollamaHost);
const hostError = !!ollamaHost && !hostValid;
@@ -61,23 +59,6 @@ export function OllamaServiceSetup(props: { serviceId: DModelsServiceId }) {
</Typography>
</FormControl>
<FormSwitchControl
title='JSON mode'
on={<Typography level='title-sm' endDecorator={<WarningRoundedIcon sx={{ color: 'danger.solidBg' }} />}>Force JSON</Typography>}
off='Off (default)'
fullWidth
description={
<Tooltip arrow title='Models will output only JSON, including empty {} objects.'>
<Link level='body-sm' href='https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion' target='_blank'>Information</Link>
</Tooltip>
}
checked={ollamaJson}
onChange={on => {
updateSettings({ ollamaJson: on });
refetch();
}}
/>
<SetupFormClientSideToggle
visible={true}
checked={!!clientSideFetch}
+9 -4
View File
@@ -6,7 +6,6 @@ import type { OllamaAccessSchema } from '../../server/ollama/ollama.access';
interface DOllamaServiceSettings {
ollamaHost: string;
ollamaJson: boolean;
ollamaCSF?: boolean;
}
@@ -20,20 +19,26 @@ export const ModelVendorOllama: IModelVendor<DOllamaServiceSettings, OllamaAcces
instanceLimit: 2,
hasServerConfigKey: 'hasLlmOllama',
/// client-side-fetch ///
csfKey: 'ollamaCSF',
csfAvailable: _csfOllamaAvailable,
// functions
initializeSetup: () => ({
ollamaHost: '',
ollamaJson: false,
// ollamaCSF: true, // eventually
}),
getTransportAccess: (partialSetup): OllamaAccessSchema => ({
dialect: 'ollama',
clientSideFetch: !!(partialSetup?.ollamaHost && partialSetup?.ollamaCSF),
clientSideFetch: _csfOllamaAvailable(partialSetup) && !!partialSetup?.ollamaCSF,
ollamaHost: partialSetup?.ollamaHost || '',
ollamaJson: partialSetup?.ollamaJson || false,
}),
// List Models
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOllama.listModels.query({ access }),
};
function _csfOllamaAvailable(s?: Partial<DOllamaServiceSettings>) {
return !!s?.ollamaHost;
}
+6 -5
View File
@@ -37,6 +37,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
// derived state
const { clientSideFetch, oaiKey, oaiOrg, oaiHost, heliKey, moderationCheck } = serviceAccess;
const needsUserKey = !serviceHasCloudTenantConfig;
const showAdvanced = advanced.on || !!clientSideFetch;
const keyValid = true; //isValidOpenAIApiKey(oaiKey);
const keyError = (/*needsUserKey ||*/ !!oaiKey) && !keyValid;
@@ -62,7 +63,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
placeholder='sk-...'
/>
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='openai-host'
title='API Endpoint'
tooltip={`An OpenAI compatible endpoint to be used in place of 'api.openai.com'.\n\nCould be used for Helicone, Cloudflare, or other OpenAI compatible cloud or local services.\n\nExamples:\n - ${HELICONE_OPENAI_HOST}\n - localhost:1234`}
@@ -72,7 +73,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={text => updateSettings({ oaiHost: text })}
/>}
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='openai-org'
title='Organization ID'
description={<Link level='body-sm' href={BaseProduct.OpenSourceRepo + '/issues/63'} target='_blank'>What is this</Link>}
@@ -81,7 +82,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={text => updateSettings({ oaiOrg: text })}
/>}
{advanced.on && <FormTextField
{showAdvanced && <FormTextField
autoCompleteId='openai-helicone-key'
title='Helicone Key'
description={<>Generate <Link level='body-sm' href='https://www.helicone.ai/keys' target='_blank'>here</Link></>}
@@ -96,7 +97,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
: 'OpenAI traffic will now be routed through Helicone.'}
</Alert>}
{advanced.on && <FormSwitchControl
{showAdvanced && <FormSwitchControl
title='Moderation' on='Enabled' fullWidth
description={<>
<Link level='body-sm' href='https://platform.openai.com/docs/guides/moderation/moderation' target='_blank'>Overview</Link>,
@@ -106,7 +107,7 @@ export function OpenAIServiceSetup(props: { serviceId: DModelsServiceId }) {
onChange={on => updateSettings({ moderationCheck: on })}
/>}
{advanced.on && <SetupFormClientSideToggle
{showAdvanced && <SetupFormClientSideToggle
visible={!!oaiHost || !!oaiKey}
checked={!!clientSideFetch}
onChange={on => updateSettings({ oaiCSF: on })}
+9 -1
View File
@@ -25,10 +25,14 @@ export const ModelVendorOpenAI: IModelVendor<DOpenAIServiceSettings, OpenAIAcces
instanceLimit: 5,
hasServerConfigKey: 'hasLlmOpenAI',
/// client-side-fetch ///
csfKey: 'oaiCSF',
csfAvailable: _csfOpenAIAvailable,
// functions
getTransportAccess: (partialSetup): OpenAIAccessSchema => ({
dialect: 'openai',
clientSideFetch: !!((partialSetup?.oaiHost || partialSetup?.oaiKey) && partialSetup?.oaiCSF),
clientSideFetch: _csfOpenAIAvailable(partialSetup) && !partialSetup?.oaiCSF,
oaiKey: '',
oaiOrg: '',
oaiHost: '',
@@ -41,3 +45,7 @@ export const ModelVendorOpenAI: IModelVendor<DOpenAIServiceSettings, OpenAIAcces
rpcUpdateModelsOrThrow: async (access) => await apiAsync.llmOpenAI.listModels.query({ access }),
};
function _csfOpenAIAvailable(s?: Partial<DOpenAIServiceSettings>) {
return !!(s?.oaiHost || s?.oaiKey);
}
+2 -2
View File
@@ -254,8 +254,8 @@ async function _fetchFromTRPC<TBody extends object | undefined | FormData, TOut>
let payloadString = safeErrorString(notOkayPayload);
if (payloadString) {
// truncate
if (payloadString.length > 200)
payloadString = payloadString.slice(0, 200) + '...';
if (payloadString.length > 240)
payloadString = payloadString.slice(0, 240) + '...';
// frame
const inferredType = _inferTextPayloadType(payloadString);
if (inferredType)
@@ -22,7 +22,6 @@
"ollama": {
"dialect": "ollama",
"ollamaHost": "http://127.0.0.1:11434",
"ollamaJson": false
},
"groq": {
"dialect": "groq",
-1
View File
@@ -717,7 +717,6 @@ function createSingleConfig(
config[dialect] = {
dialect: 'ollama',
ollamaHost: host || 'http://127.0.0.1:11434',
ollamaJson: false,
} as any;
break;
@@ -97,7 +97,6 @@
"_comment": "Local Ollama instance - no API key needed",
"dialect": "ollama",
"ollamaHost": "http://127.0.0.1:11434",
"ollamaJson": false
},
"lmstudio": {
"_comment": "Local LM Studio instance - no API key needed",