Commit f182fbd1 authored by Your Name's avatar Your Name

All working but qwen-oauth2

parent 8b50a0ae
{
"name": ".kilo",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"@kilocode/plugin": "7.2.14"
}
},
"node_modules/@kilocode/plugin": {
"version": "7.2.14",
"resolved": "https://registry.npmjs.org/@kilocode/plugin/-/plugin-7.2.14.tgz",
"integrity": "sha512-mS+WA9HZIBH2qQ9ARA+v0q4MdQTSdfOvKbe4AOSkjP+P5hVA70OM/UVM9DVcvmjSOxU+wuUxmOy+j/EQIrgFmw==",
"license": "MIT",
"dependencies": {
"@kilocode/sdk": "7.2.14",
"zod": "4.1.8"
},
"peerDependencies": {
"@opentui/core": ">=0.1.97",
"@opentui/solid": ">=0.1.97"
},
"peerDependenciesMeta": {
"@opentui/core": {
"optional": true
},
"@opentui/solid": {
"optional": true
}
}
},
"node_modules/@kilocode/sdk": {
"version": "7.2.14",
"resolved": "https://registry.npmjs.org/@kilocode/sdk/-/sdk-7.2.14.tgz",
"integrity": "sha512-Naz83lFrsbavuDp6UwxRuglOaSNvRBsZfcRNvb7RpWYAwbuJP0dBdhpXj6uO3ta5qxeQ2JzxKNC9Ffz+LCLLDg==",
"license": "MIT",
"dependencies": {
"cross-spawn": "7.0.6"
}
},
"node_modules/cross-spawn": {
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
"license": "MIT",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
"which": "^2.0.1"
},
"engines": {
"node": ">= 8"
}
},
"node_modules/isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
"license": "ISC"
},
"node_modules/path-key": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
"integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/shebang-command": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
"integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
"license": "MIT",
"dependencies": {
"shebang-regex": "^3.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/shebang-regex": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
"integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
"integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
"license": "ISC",
"dependencies": {
"isexe": "^2.0.0"
},
"bin": {
"node-which": "bin/node-which"
},
"engines": {
"node": ">= 8"
}
},
"node_modules/zod": {
"version": "4.1.8",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
}
}
}
......@@ -25,7 +25,7 @@ import json
import logging
import time
import uuid
from typing import Dict, List, Optional, Union, AsyncIterator
from typing import Dict, List, Optional, Union, AsyncIterator, Tuple
from openai import OpenAI
import httpx
......@@ -49,8 +49,8 @@ class CodexProviderHandler(BaseProviderHandler):
- Standard OpenAI protocol with Bearer token
**OAuth2 Mode** (no api_key, OAuth2 credentials):
- Uses ChatGPT backend API: https://chatgpt.com/backend-api/codex
- Uses Responses API endpoint: /v1/responses
- Uses ChatGPT backend API: https://chatgpt.com/backend-api
- Uses Responses API endpoint: /codex/responses
- ChatGPT-specific protocol with SSE streaming
- Includes ChatGPT-Account-ID header
......@@ -104,7 +104,7 @@ class CodexProviderHandler(BaseProviderHandler):
# OAuth2 Mode: Check if OAuth2 is authenticated
# If authenticated, use ChatGPT backend; otherwise use configured endpoint
if self.oauth2.is_authenticated():
self.base_url = "https://chatgpt.com/backend-api/codex"
self.base_url = "https://chatgpt.com/backend-api"
logger.info(f"CodexProviderHandler: Initialized in OAuth2 mode with ChatGPT backend: {self.base_url}")
else:
# Not yet authenticated, keep configured endpoint
......@@ -160,8 +160,8 @@ class CodexProviderHandler(BaseProviderHandler):
self._account_id = self.oauth2.credentials['tokens'].get('account_id')
# Switch to ChatGPT backend if OAuth2 is now authenticated
if not self._use_api_key_mode and self.base_url != "https://chatgpt.com/backend-api/codex":
self.base_url = "https://chatgpt.com/backend-api/codex"
if not self._use_api_key_mode and self.base_url != "https://chatgpt.com/backend-api":
self.base_url = "https://chatgpt.com/backend-api"
logger.info(f"CodexProviderHandler: Switched to ChatGPT backend after OAuth2 authentication: {self.base_url}")
# Update the configuration with the new endpoint
......@@ -257,19 +257,35 @@ class CodexProviderHandler(BaseProviderHandler):
# OAuth2 Mode Methods (ChatGPT Responses API)
# =========================================================================
def _convert_messages_to_responses_format(self, messages: List[Dict]) -> List[Dict]:
def _convert_messages_to_responses_format(self, messages: List[Dict]) -> Tuple[List[Dict], Optional[str]]:
"""
Convert OpenAI Chat Completions messages to Responses API format.
OpenAI format: {"role": "user", "content": "text"}
Responses format: {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "text"}]}
Returns:
tuple: (converted_messages, system_instruction)
- converted_messages: List of messages in Responses API format
- system_instruction: Combined system message content (if any)
"""
result = []
system_instructions = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
# Handle system messages - extract for instructions field
if role == "system":
if isinstance(content, str):
system_instructions.append(content)
elif isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
system_instructions.append(item.get("text", ""))
continue
# Handle tool messages
if role == "tool":
result.append({
......@@ -299,17 +315,17 @@ class CodexProviderHandler(BaseProviderHandler):
})
continue
# Handle regular messages
# Handle regular messages (user, developer, assistant)
content_items = []
if isinstance(content, str):
content_type = "input_text" if role in ["user", "system", "developer"] else "output_text"
content_type = "input_text" if role in ["user", "developer"] else "output_text"
content_items.append({"type": content_type, "text": content})
elif isinstance(content, list):
# Handle multimodal content
for item in content:
if isinstance(item, dict):
if item.get("type") == "text":
content_type = "input_text" if role in ["user", "system", "developer"] else "output_text"
content_type = "input_text" if role in ["user", "developer"] else "output_text"
content_items.append({"type": content_type, "text": item.get("text", "")})
elif item.get("type") == "image_url":
content_items.append({
......@@ -324,7 +340,40 @@ class CodexProviderHandler(BaseProviderHandler):
"content": content_items
})
return result
# Combine system instructions
combined_system = " ".join(system_instructions) if system_instructions else None
return result, combined_system
def _convert_tools_to_codex_format(self, tools: Optional[List[Dict]]) -> List[Dict]:
"""
Convert OpenAI tool format to Codex/ChatGPT format.
OpenAI format: {"type": "function", "function": {"name": "...", "description": "...", "parameters": {...}}}
Codex format: {"type": "function", "name": "...", "description": "...", "parameters": {...}}
Key difference: No nested "function" object in Codex format.
"""
if not tools:
return []
converted_tools = []
for tool in tools:
if tool.get("type") == "function" and "function" in tool:
# OpenAI format - flatten it
func = tool["function"]
converted_tool = {
"type": "function",
"name": func.get("name"),
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
}
converted_tools.append(converted_tool)
else:
# Already in Codex format or other type
converted_tools.append(tool)
return converted_tools
def _build_responses_request(
self,
......@@ -336,29 +385,32 @@ class CodexProviderHandler(BaseProviderHandler):
tool_choice: Optional[Union[str, Dict]] = None,
) -> Dict:
"""Build a Responses API request payload."""
# Convert messages to Responses format
input_items = self._convert_messages_to_responses_format(messages)
# Convert messages to Responses format and extract system instructions
input_items, system_instruction = self._convert_messages_to_responses_format(messages)
# Use system instruction from messages if available, otherwise use default
instructions = system_instruction if system_instruction else "You are Codex, a helpful AI assistant for coding tasks."
# Convert tools to Codex format (flatten the structure)
codex_tools = self._convert_tools_to_codex_format(tools)
# Build base request
request = {
"model": model,
"instructions": "You are Codex, a helpful AI assistant for coding tasks.",
"instructions": instructions,
"input": input_items,
"stream": True,
"store": False,
"tools": codex_tools,
"tool_choice": "auto",
"parallel_tool_calls": True,
}
# Add optional parameters
if max_tokens is not None:
request["max_tokens"] = max_tokens
if temperature is not None:
request["temperature"] = temperature
if tools:
# Convert OpenAI tool format to Responses API format
request["tools"] = tools
# Note: temperature and max_tokens are not supported by /codex/responses endpoint
# They are handled internally by the model
# Override tool_choice if explicitly provided
if tool_choice:
request["tool_choice"] = tool_choice if isinstance(tool_choice, str) else "auto"
......@@ -506,31 +558,39 @@ class CodexProviderHandler(BaseProviderHandler):
headers = self._build_headers(api_key, conversation_id)
# Make request to Responses API
url = f"{self.base_url}/v1/responses"
url = f"{self.base_url}/codex/responses"
logger.info(f"CodexProviderHandler: Sending request to {url}")
if AISBF_DEBUG:
logger.info(f"CodexProviderHandler: Request payload: {json.dumps(request_payload, indent=2)}")
logger.info(f"CodexProviderHandler: Request headers: {json.dumps({k: v for k, v in headers.items() if k.lower() != 'authorization'}, indent=2)}")
async with httpx.AsyncClient(timeout=300.0) as client:
response = await client.post(
async with client.stream(
"POST",
url,
headers=headers,
json=request_payload,
)
response.raise_for_status()
# Parse SSE stream
events = []
async for event in self._parse_sse_stream(response):
events.append(event)
) as response:
if response.status_code >= 400:
error_body = await response.aread()
logger.error(f"CodexProviderHandler: Error response status: {response.status_code}")
logger.error(f"CodexProviderHandler: Error response body: {error_body.decode('utf-8')}")
# For streaming, we would yield events here
# For now, accumulate all events
# Convert to OpenAI format
openai_response = self._convert_sse_to_openai_format(events, model)
return openai_response
response.raise_for_status()
# Parse SSE stream
events = []
async for event in self._parse_sse_stream(response):
events.append(event)
# For streaming, we would yield events here
# For now, accumulate all events
# Convert to OpenAI format
openai_response = self._convert_sse_to_openai_format(events, model)
return openai_response
# =========================================================================
# Main Request Handler (Routes to appropriate mode)
......
......@@ -25,6 +25,7 @@ import asyncio
import time
import json
import platform
import uuid
from typing import Dict, List, Optional, Union
from openai import AsyncOpenAI
from ..models import Model
......@@ -130,19 +131,35 @@ class QwenProviderHandler(BaseProviderHandler):
logger.info("QwenProviderHandler: Using OAuth2 authentication")
auth_key = access_token
# Use provider configured endpoint for OAuth2 (fixed endpoints)
base_url = self.provider_config.endpoint
# Get resource URL from auth and normalize it properly
base_url = self.auth.get_resource_url()
# Normalize endpoint exactly as specified in documentation
if not base_url.startswith("http"):
base_url = f"https://{base_url}"
if not base_url.endswith("/v1"):
base_url = f"{base_url}/v1"
logger.info(f"QwenProviderHandler: Final endpoint: {base_url}")
# Normalize endpoint
if not base_url.startswith("http"):
base_url = f"https://{base_url}"
# DashScope endpoint already includes /v1 so do not append again
# Build required DashScope headers
import uuid
user_agent = f"QwenCode/1.0.0 ({platform.system().lower()}; {platform.machine()})"
default_headers = {
"Accept": "application/json",
"X-DashScope-CacheControl": "enable",
"X-DashScope-UserAgent": user_agent,
"X-DashScope-AuthType": "qwen-oauth",
"x-request-id": str(uuid.uuid4()),
}
self._sdk_client = AsyncOpenAI(
api_key=auth_key,
base_url=base_url,
max_retries=3,
timeout=httpx.Timeout(300.0, connect=30.0),
default_headers=default_headers,
)
logger.info(f"QwenProviderHandler: Created SDK client (endpoint: {base_url})")
......@@ -223,12 +240,22 @@ class QwenProviderHandler(BaseProviderHandler):
# Get SDK client with current OAuth token
client = await self._get_sdk_client()
# Generate session tracking IDs
session_id = str(uuid.uuid4())
prompt_id = str(uuid.uuid4())
# Build request parameters
request_params = {
"model": model,
"messages": messages,
"max_tokens": max_tokens or 4096,
"stream": stream,
"extra_body": {
"metadata": {
"sessionId": session_id,
"promptId": prompt_id
}
}
}
if temperature is not None and temperature > 0:
......@@ -240,6 +267,10 @@ class QwenProviderHandler(BaseProviderHandler):
if tool_choice and tools:
request_params["tool_choice"] = tool_choice
# Add stream_options for streaming requests
if stream:
request_params["stream_options"] = {"include_usage": True}
try:
if stream:
logger.info("QwenProviderHandler: Using streaming mode")
......@@ -440,16 +471,37 @@ class QwenProviderHandler(BaseProviderHandler):
using_api_key = qwen_config and isinstance(qwen_config, dict) and qwen_config.get('api_key')
if not using_api_key:
# OAuth2 authentication: return fixed model list
logger.info("QwenProviderHandler: Using OAuth2 authentication, returning fixed model list")
# OAuth2 authentication: return full model list
logger.info("QwenProviderHandler: Using OAuth2 authentication, returning full model list")
return [
Model(
id="coder-model",
name="Coder Model",
id="qwen-turbo",
name="Qwen Turbo",
provider_id=self.provider_id,
context_size=1000000,
context_length=1000000,
)
context_size=32000,
context_length=32000,
),
Model(
id="qwen-plus",
name="Qwen Plus",
provider_id=self.provider_id,
context_size=128000,
context_length=128000,
),
Model(
id="qwen-max",
name="Qwen Max",
provider_id=self.provider_id,
context_size=128000,
context_length=128000,
),
Model(
id="qwen3-coder-plus",
name="Qwen 3 Coder Plus",
provider_id=self.provider_id,
context_size=128000,
context_length=128000,
),
]
# API token authentication: fetch from models endpoint
......@@ -502,10 +554,10 @@ class QwenProviderHandler(BaseProviderHandler):
# Fallback to static model list
logger.warning("QwenProviderHandler: No models returned from API, using static list")
models = [
Model(id="qwen-plus", name="Qwen Plus", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-turbo", name="Qwen Turbo", provider_id=self.provider_id, context_size=8000),
Model(id="qwen-max", name="Qwen Max", provider_id=self.provider_id, context_size=8000),
Model(id="coder-model", name="Qwen Coder", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-turbo", name="Qwen Turbo", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-plus", name="Qwen Plus", provider_id=self.provider_id, context_size=128000),
Model(id="qwen-max", name="Qwen Max", provider_id=self.provider_id, context_size=128000),
Model(id="qwen3-coder-plus", name="Qwen 3 Coder Plus", provider_id=self.provider_id, context_size=128000),
]
logger.info(f"QwenProviderHandler: Returning {len(models)} models")
......@@ -517,8 +569,8 @@ class QwenProviderHandler(BaseProviderHandler):
# Return static fallback list
logger.info("QwenProviderHandler: Using static fallback model list")
return [
Model(id="qwen-plus", name="Qwen Plus", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-turbo", name="Qwen Turbo", provider_id=self.provider_id, context_size=8000),
Model(id="qwen-max", name="Qwen Max", provider_id=self.provider_id, context_size=8000),
Model(id="coder-model", name="Qwen Coder", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-turbo", name="Qwen Turbo", provider_id=self.provider_id, context_size=32000),
Model(id="qwen-plus", name="Qwen Plus", provider_id=self.provider_id, context_size=128000),
Model(id="qwen-max", name="Qwen Max", provider_id=self.provider_id, context_size=128000),
Model(id="qwen3-coder-plus", name="Qwen 3 Coder Plus", provider_id=self.provider_id, context_size=128000),
]
# ChatGPT API Implementation Guide for Codex-CLI
# Complete ChatGPT/OpenAI API Request Flow Documentation
This document provides a comprehensive analysis of how codex-cli communicates with ChatGPT API endpoints, including exact endpoints, headers, authentication, request schemas, and implementation details.
This document provides a comprehensive analysis of how Codex sends requests to the ChatGPT/OpenAI API, including authentication, headers, request format, session management, and endpoints.
## Table of Contents
1. [Overview](#overview)
2. [API Endpoints](#api-endpoints)
3. [Authentication](#authentication)
4. [Request Headers](#request-headers)
5. [Request/Response Schemas](#requestresponse-schemas)
6. [Model List Retrieval](#model-list-retrieval)
7. [Streaming Responses](#streaming-responses)
8. [WebSocket Support](#websocket-support)
9. [Message Conversion Between OpenAI and Codex Formats](#message-conversion-between-openai-and-codex-formats)
10. [Python Implementation Examples](#python-implementation-examples)
11. [Implementation Examples (Rust)](#implementation-examples-rust)
12. [Developer Role Messages](#developer-role-messages)
13. [Session Flow](#session-flow)
1. [Authentication & OAuth2 Flow](#1-authentication--oauth2-flow)
2. [API Endpoints](#2-api-endpoints)
3. [Request Headers](#3-request-headers)
4. [Request Body Format](#4-request-body-format)
5. [Session Management](#5-session-management)
6. [Transport Mechanisms](#6-transport-mechanisms)
7. [System Prompt (Instructions)](#7-system-prompt-instructions)
8. [Message Format Conversion](#8-message-format-conversion-openai-compatible--chatgpt)
9. [Complete Request Flow](#9-complete-request-flow)
10. [Python Implementation Example](#10-python-implementation-example)
---
## Overview
## 1. Authentication & OAuth2 Flow
Codex-CLI uses OpenAI's **Responses API** (not the Chat Completions API) to communicate with ChatGPT. The primary endpoints are:
### OAuth2 Token Management
- **Base URL (ChatGPT mode)**: `https://chatgpt.com/backend-api/codex`
- **Base URL (API Key mode)**: `https://api.openai.com/v1`
**Location**: `codex-rs/login/src/auth/manager.rs`
The client supports both HTTP/SSE and WebSocket transports for streaming responses.
Codex uses OAuth2 device code flow for ChatGPT authentication:
---
## API Endpoints
#### 1.1 Device Code Request
### Primary Endpoints
- **Endpoint**: `https://auth0.openai.com/oauth/device/code`
- **Client ID**: Retrieved from configuration
- **Scope**: `openid profile email offline_access`
#### 1. Responses Endpoint (Streaming)
- **Path**: `/v1/responses` (or `/responses` relative to base)
- **Method**: `POST`
- **Purpose**: Stream AI responses for a given prompt
- **Transport**: HTTP with Server-Sent Events (SSE) or WebSocket
#### 1.2 Token Exchange
#### 2. Models Endpoint
- **Path**: `/v1/models` (or `/models` relative to base)
- **Method**: `GET`
- **Purpose**: Retrieve available models and their capabilities
- **Query Parameters**: `client_version=<version>` (e.g., `0.99.0`)
- **Endpoint**: `https://auth0.openai.com/oauth/token`
- **Grant type**: `urn:ietf:params:oauth:grant-type:device_code`
- **Returns**: `access_token`, `refresh_token`, `id_token`
#### 3. Compact Endpoint
- **Path**: `/v1/responses/compact` (or `/responses/compact` relative to base)
- **Method**: `POST`
- **Purpose**: Compact conversation history
#### 1.3 Token Storage
#### 4. Memory Summarization Endpoint
- **Path**: `/v1/memories/trace_summarize` (or `/memories/trace_summarize` relative to base)
- **Method**: `POST`
- **Purpose**: Summarize memory traces
- Tokens stored in `~/.codex/auth.json` or system keyring
- ID token contains `chatgpt_account_id` claim
- Access token used for API authentication
### ChatGPT-Specific Backend Endpoints (OAuth Mode)
#### 1.4 Token Refresh
When using ChatGPT authentication, additional endpoints are available:
- Automatic refresh on 401 responses
- Uses refresh token to get new access token
- Implements retry logic with exponential backoff
#### 5. Config Requirements
- **Path**: `/backend-api/wham/config/requirements`
- **Method**: `GET`
- **Purpose**: Retrieve cloud configuration requirements
### Token Data Structure
#### 6. Rate Limits
- **Path**: `/backend-api/api/codex/usage`
- **Method**: `GET`
- **Purpose**: Get account rate limits
```rust
pub struct TokenData {
pub access_token: String,
pub refresh_token: Option<String>,
pub account_id: Option<String>, // From chatgpt_account_id claim
pub id_token: IdTokenClaims,
}
```
#### 7. Plugin/App Management
- **Path**: `/backend-api/plugins/list`
- **Method**: `GET`
- **Purpose**: List installed plugins
---
- **Path**: `/backend-api/plugins/featured`
- **Method**: `GET`
- **Query Parameters**: `platform=codex`
- **Purpose**: Get featured plugins
## 2. API Endpoints
- **Path**: `/backend-api/plugins/{plugin_id}/enable`
- **Method**: `POST`
- **Purpose**: Enable a plugin
### Base URL
- **Path**: `/backend-api/plugins/{plugin_id}/uninstall`
- **Method**: `POST`
- **Purpose**: Uninstall a plugin
- **Default**: `https://chatgpt.com/backend-api/`
- **Configurable**: via `chatgpt_base_url` in config
#### 8. MCP Apps
- **Path**: `/backend-api/wham/apps`
- **Method**: WebSocket connection
- **Purpose**: MCP (Model Context Protocol) server communication
### Primary Endpoints
**Important Note**: When using ChatGPT OAuth authentication, the base instructions field in the request is **required** and should reference "Codex" specifically. The example from the blog post shows:
#### 2.1 Responses API (Main chat endpoint)
```json
{
"instructions": "You are Codex, based on GPT-5. You are running as a coding agent ..."
}
```
- **Path**: `/v1/responses`
- **Method**: `POST`
- **Transport**: HTTP (SSE) or WebSocket
- **Purpose**: Streaming chat completions
This appears to be a requirement for the ChatGPT backend API to accept requests properly.
#### 2.2 Compact API (History compression)
---
- **Path**: `/v1/responses/compact`
- **Method**: `POST`
- **Purpose**: Compress conversation history
## Authentication
#### 2.3 Memories API (Memory summarization)
### Two Authentication Modes
- **Path**: `/v1/memories/trace_summarize`
- **Method**: `POST`
- **Purpose**: Summarize conversation memories
#### 1. API Key Mode
- **Header**: `Authorization: Bearer <api_key>`
- **Source**: Environment variable (typically `OPENAI_API_KEY`)
- **Base URL**: `https://api.openai.com/v1`
#### 2.4 Models List
#### 2. ChatGPT Mode (OAuth2)
- **Header**: `Authorization: Bearer <access_token>`
- **Additional Header**: `ChatGPT-Account-ID: <account_id>`
- **Base URL**: `https://chatgpt.com/backend-api/codex`
- **Token Management**: Automatic refresh on 401 responses
- **Path**: `/models`
- **Method**: `GET`
- **Purpose**: Retrieve available models
### Authentication Implementation
#### 2.5 Plugins
The authentication is handled through the `AuthProvider` trait:
- **Path**: `/plugins/list`
- **Path**: `/plugins/featured`
- **Path**: `/plugins/export/curated`
```rust
pub trait AuthProvider: Send + Sync {
fn bearer_token(&self) -> Option<String>;
fn account_id(&self) -> Option<String> {
None
}
}
```
#### 2.6 Files (OpenAI file uploads)
Headers are added via:
- **Path**: `/files`
- **Method**: `POST`
- **Path**: `/files/{file_id}/uploaded`
- **Method**: `POST`
```rust
pub(crate) fn add_auth_headers_to_header_map<A: AuthProvider>(auth: &A, headers: &mut HeaderMap) {
if let Some(token) = auth.bearer_token()
&& let Ok(header) = HeaderValue::from_str(&format!("Bearer {token}"))
{
let _ = headers.insert(http::header::AUTHORIZATION, header);
}
if let Some(account_id) = auth.account_id()
&& let Ok(header) = HeaderValue::from_str(&account_id)
{
let _ = headers.insert("ChatGPT-Account-ID", header);
}
}
```
#### 2.7 Realtime (Voice/WebRTC)
**Location**: `codex-rs/codex-api/src/auth.rs`
- **Path**: `/v1/realtime/calls`
- **Method**: `POST`
- **Purpose**: Create WebRTC voice sessions
---
## Request Headers
### Standard Headers (All Requests)
1. **User-Agent**
- Format: `{originator}/{version} ({os} {os_version}; {arch}) {terminal_type} ({suffix})`
- Example: `codex_cli_rs/0.99.0 (Linux 6.12; x86_64) xterm-256color (vscode; 1.86.0)`
- **Location**: `codex-rs/login/src/auth/default_client.rs:131-155`
2. **originator**
- Value: `codex_cli_rs` (default) or custom via `CODEX_INTERNAL_ORIGINATOR_OVERRIDE`
- Purpose: Identifies the client application
## 3. Request Headers
3. **Content-Type**
- Value: `application/json` (for POST requests)
### 3.1 Authentication Headers
4. **Accept**
- Value: `text/event-stream` (for SSE streaming)
**Location**: `codex-rs/model-provider/src/bearer_auth_provider.rs`
### Responses API Specific Headers
```http
Authorization: Bearer {access_token}
ChatGPT-Account-ID: {account_id}
```
5. **x-client-request-id**
- Value: Thread/conversation ID
- Purpose: Request correlation
For FedRAMP accounts:
6. **session_id**
- Value: Thread/conversation ID
- Purpose: Session tracking
```http
X-OpenAI-Fedramp: true
```
7. **x-codex-turn-state**
- Value: Sticky routing token from previous response
- Purpose: Maintain routing to same backend instance within a turn
### 3.2 Standard Headers
8. **x-codex-turn-metadata**
- Value: Optional turn metadata (JSON)
- Purpose: Observability and debugging
**Location**: `codex-rs/login/src/auth/default_client.rs`
9. **x-codex-window-id**
- Format: `{conversation_id}:{window_generation}`
- Purpose: Window/context tracking
```http
User-Agent: {originator}/{version} ({os} {os_version}; {arch}) {terminal_info} ({suffix})
originator: codex_cli_rs
Content-Type: application/json
```
10. **x-openai-subagent**
- Values: `review`, `compact`, `memory_consolidation`, `collab_spawn`
- Purpose: Identify subagent requests
Example User-Agent:
11. **x-codex-parent-thread-id**
- Value: Parent thread ID (for spawned threads)
- Purpose: Thread hierarchy tracking
```
codex_cli_rs/0.1.0 (Linux 5.15.0; x86_64) iTerm2/3.4.0
```
12. **x-codex-beta-features**
- Value: Comma-separated beta feature keys
- Purpose: Enable experimental features
### 3.3 Codex-Specific Headers
13. **x-responsesapi-include-timing-metrics**
- Value: `true`
- Purpose: Request timing metrics in response
**Location**: `codex-rs/core/src/client.rs`
### WebSocket Specific Headers
#### Session Identification
14. **OpenAI-Beta**
- Value: `responses_websockets=2026-02-06`
- Purpose: Enable WebSocket protocol version
```http
session_id: {conversation_id}
x-client-request-id: {conversation_id}
```
### Developer Role Messages
#### Installation Tracking
When using the ChatGPT OAuth API, requests often include a `developer` role message in the input array. This is distinct from the `instructions` field:
```http
x-codex-installation-id: {installation_id}
```
- **`instructions` field**: Base system instructions (e.g., "You are Codex, based on GPT-5...")
- **`developer` role message**: Additional contextual instructions injected as a message in the conversation
#### Window/Turn Tracking
Example from the blog post:
```json
{
"input": [
{
"type": "message",
"role": "developer",
"content": [
{
"type": "input_text",
"text": "You are a helpful assistant. Respond directly to the user request without running tools or shell commands."
}
]
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "Generate an SVG of a pelican riding a bicycle"
}
]
}
]
}
```http
x-codex-window-id: {conversation_id}:{window_generation}
x-codex-turn-state: {sticky_routing_token}
x-codex-turn-metadata: {base64_encoded_metadata}
```
The `developer` role is used for:
- Permission instructions (sandbox mode, approval policies)
- Capability instructions (available tools, restrictions)
- Context-specific guidance (collaboration mode, personality specs)
- Model switching notifications
- Realtime conversation boundaries
**Location**: `codex-rs/protocol/src/models.rs:755-767`
#### Subagent Identification
### Optional Headers
```http
x-openai-subagent: review|compact|memory_consolidation|collab_spawn
x-codex-parent-thread-id: {parent_thread_id}
```
15. **OpenAI-Organization**
- Source: `OPENAI_ORGANIZATION` environment variable
- Purpose: Organization routing
#### Feature Flags
16. **OpenAI-Project**
- Source: `OPENAI_PROJECT` environment variable
- Purpose: Project routing
```http
x-codex-beta-features: {comma_separated_features}
OpenAI-Beta: responses_websockets=2026-02-06
```
17. **version**
- Value: Package version (e.g., `0.99.0`)
- Purpose: Client version tracking
#### Telemetry
### Request Compression
```http
x-responsesapi-include-timing-metrics: true
```
When using ChatGPT authentication with OpenAI provider:
- **Content-Encoding**: `zstd`
- Body is compressed using Zstandard algorithm
#### Residency (Enterprise)
**Location**: `codex-rs/core/src/client.rs:1040-1049`
```http
x-openai-internal-codex-residency: us
```
---
## Request/Response Schemas
## 4. Request Body Format
### Responses API Request Schema
### 4.1 Main Request Structure
**Location**: `codex-rs/codex-api/src/common.rs`
```json
{
"model": "gpt-4",
"instructions": "You are a helpful assistant...",
"instructions": "You are a helpful coding assistant...",
"input": [
{
"type": "message",
......@@ -317,1244 +236,1248 @@ When using ChatGPT authentication with OpenAI provider:
"stream": true,
"include": ["reasoning.encrypted_content"],
"service_tier": "default",
"prompt_cache_key": "<conversation_id>",
"prompt_cache_key": "{conversation_id}",
"text": {
"type": "text",
"verbosity": "normal"
"verbosity": "medium",
"format": {
"type": "json_schema",
"strict": true,
"schema": {},
"name": "codex_output_schema"
}
},
"client_metadata": {
"x-codex-installation-id": "...",
"ws_request_header_traceparent": "...",
"ws_request_header_tracestate": "..."
}
}
```
**Key Fields**:
- `model`: Model identifier (e.g., `gpt-4`, `o1-preview`)
- `instructions`: System instructions/base prompt
- `input`: Array of conversation items (messages, tool calls, tool results)
- `tools`: Available tools in JSON Schema format
- `reasoning`: Reasoning configuration (effort level, summary mode)
- `store`: Whether to store in Azure (provider-specific)
- `stream`: Always `true` for streaming
- `include`: Additional fields to include in response
- `service_tier`: Priority level (`default`, `priority`, `flex`)
- `prompt_cache_key`: Cache key for prompt caching
- `text`: Text generation parameters (verbosity, output schema)
**Location**: `codex-rs/codex-api/src/endpoint/responses.rs`
### Response Stream Events (SSE)
The server sends Server-Sent Events with the following event types:
1. **response.created**
```json
event: response.created
data: {"response_id": "resp_123", "status": "in_progress"}
```
2. **response.output_item.added**
```json
event: response.output_item.added
data: {"item": {"type": "message", "role": "assistant", "content": []}}
```
3. **response.content_part.added**
```json
event: response.content_part.added
data: {"part": {"type": "text", "text": ""}}
```
4. **response.content_part.delta**
```json
event: response.content_part.delta
data: {"delta": {"text": "Hello"}}
```
5. **response.output_item.done**
```json
event: response.output_item.done
data: {"item": {"type": "message", "role": "assistant", "content": [...]}}
```
6. **response.done**
```json
event: response.done
data: {
"response_id": "resp_123",
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"total_tokens": 150,
"cached_input_tokens": 0,
"reasoning_output_tokens": 0
}
}
```
7. **error**
```json
event: error
data: {"error": {"message": "Rate limit exceeded", "code": "rate_limit_exceeded"}}
```
**Location**: Event parsing in `codex-rs/codex-api/src/sse.rs`
### Models Response Schema
### 4.2 Input Items (ResponseItem)
```json
{
"models": [
{
"slug": "gpt-4",
"display_name": "GPT-4",
"description": "Most capable model",
"default_reasoning_level": "medium",
"supported_reasoning_levels": [
{"effort": "low", "description": "Fast"},
{"effort": "medium", "description": "Balanced"},
{"effort": "high", "description": "Thorough"}
],
"shell_type": "shell_command",
"visibility": "list",
"minimal_client_version": [0, 99, 0],
"supported_in_api": true,
"priority": 1,
"upgrade": null,
"base_instructions": "You are a helpful assistant",
"supports_reasoning_summaries": true,
"support_verbosity": true,
"default_verbosity": "normal",
"apply_patch_tool_type": "unified_diff",
"truncation_policy": {"mode": "bytes", "limit": 100000},
"supports_parallel_tool_calls": true,
"supports_image_detail_original": true,
"context_window": 128000,
"experimental_supported_tools": []
}
]
**Location**: `codex-rs/protocol/src/models.rs`
The `input` array contains conversation history as `ResponseItem` objects:
```rust
pub enum ResponseItem {
Message {
role: String, // "user", "assistant", "developer"
content: Vec<ContentItem>,
},
FunctionCall {
call_id: String,
name: String,
arguments: String,
},
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
CustomToolCall {
call_id: String,
name: String,
input: Value,
},
CustomToolCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
Reasoning {
content: Vec<ReasoningContent>,
summary: Vec<String>,
},
// ... other types
}
```
**Location**: `codex-rs/codex-api/src/endpoint/models.rs`
### 4.3 Content Items
---
```rust
pub enum ContentItem {
InputText { text: String },
InputImage {
source: ImageSource,
detail: Option<String>,
},
OutputText { text: String },
// ... other types
}
```
## Model List Retrieval
---
### Request Details
## 5. Session Management
**Endpoint**: `GET /v1/models?client_version=<version>`
### 5.1 Conversation ID (ThreadId)
**Headers**:
- `Authorization: Bearer <token>`
- `ChatGPT-Account-ID: <account_id>` (if ChatGPT mode)
- `User-Agent: <codex_user_agent>`
- `originator: <originator>`
**Location**: `codex-rs/protocol/src/protocol.rs`
**Query Parameters**:
- `client_version`: Client version string (e.g., `0.99.0`)
- **Format**: UUID v4
- **Persistence**: Across entire conversation
- **Used for**:
- Session identification (`session_id` header)
- Prompt caching (`prompt_cache_key`)
- Window tracking (`x-codex-window-id`)
### Response Handling
### 5.2 Window Generation
The response includes an `ETag` header for caching:
**Location**: `codex-rs/core/src/client.rs`
```rust
let header_etag = resp
.headers
.get(ETAG)
.and_then(|value| value.to_str().ok())
.map(ToString::to_string);
```
- Increments when conversation context is reset
- **Format**: `{conversation_id}:{window_generation}`
- **Sent in**: `x-codex-window-id` header
**Location**: `codex-rs/codex-api/src/endpoint/models.rs:58-62`
### 5.3 Turn State (Sticky Routing)
### Model Selection
**Location**: `codex-rs/core/src/client.rs`
Models are filtered based on:
1. `visibility`: Must be `"list"` to appear in UI
2. `minimal_client_version`: Client version must meet minimum
3. `supported_in_api`: Must be `true` for API usage
```http
# Server sends in response header:
x-codex-turn-state: {opaque_token}
---
# Client echoes back in subsequent requests within same turn:
x-codex-turn-state: {same_token}
```
## Streaming Responses
### HTTP/SSE Transport
1. **Connection Setup**
- POST request to `/v1/responses`
- `Accept: text/event-stream` header
- Optional `Content-Encoding: zstd` for compression
2. **Event Stream Processing**
- Parse SSE events line-by-line
- Handle `event:` and `data:` lines
- Reconstruct JSON from multi-line data
- Parse event-specific payloads
3. **Idle Timeout**
- Default: 300 seconds (5 minutes)
- Configurable via `stream_idle_timeout_ms`
- Connection reset if no data received within timeout
4. **Retry Logic**
- Default max retries: 5 attempts
- Exponential backoff: 200ms base delay
- Retry on: 5xx errors, transport errors
- No retry on: 429 (rate limit), 401 (unauthorized)
**Location**: `codex-rs/codex-api/src/sse.rs`
### WebSocket Transport
1. **Connection Handshake**
- Upgrade HTTP connection to WebSocket
- URL: `wss://chatgpt.com/backend-api/codex/responses`
- Headers: Same as HTTP plus `OpenAI-Beta: responses_websockets=2026-02-06`
2. **Request Format**
```json
{
"type": "response.create",
"response": {
"model": "gpt-4",
"instructions": "...",
"input": [...],
"client_metadata": {
"x-codex-window-id": "...",
"x-openai-subagent": "...",
"x-codex-parent-thread-id": "...",
"x-codex-turn-metadata": "..."
}
}
}
```
3. **Incremental Requests**
- Reuse WebSocket connection for multiple requests
- Send only delta items with `previous_response_id`
- Server maintains conversation state
4. **Connection Reuse**
- Connection cached per turn
- Reused across multiple requests in same turn
- Reset on window generation change
5. **Fallback to HTTP**
- On `426 Upgrade Required` response
- On connection timeout (15 seconds default)
- On WebSocket errors
**Location**: `codex-rs/codex-api/src/websocket.rs`
**Purpose**: Ensures requests within a turn hit the same backend instance
---
## Message Conversion Between OpenAI and Codex Formats
### Overview
## 6. Transport Mechanisms
Codex uses the OpenAI Responses API format which is more structured than the traditional Chat Completions API. Understanding how to convert between standard OpenAI message formats and Codex's internal format is essential for implementing compatible clients.
### 6.1 HTTP (SSE - Server-Sent Events)
### Key Type Definitions
**Location**: `codex-rs/core/src/client.rs:1132`
#### Codex Internal Types (`ResponseInputItem` and `ResponseItem`)
1. POST to `/v1/responses`
2. Response: `Content-Type: text/event-stream`
3. **Events**:
- `response.created`
- `response.output_item.added`
- `response.output_item.done`
- `response.completed`
- `response.failed`
Codex uses two main types for messages:
### 6.2 WebSocket
1. **`ResponseInputItem`** - Messages sent TO the API
2. **`ResponseItem`** - Messages received FROM the API
**Location**: `codex-rs/core/src/client.rs:1229`
```rust
// From codex-rs/protocol/src/models.rs
#### Connection
// Input items (sent to API)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseInputItem {
Message {
role: String,
content: Vec<ContentItem>,
},
FunctionCallOutput {
call_id: String,
output: FunctionCallOutputPayload,
},
McpToolCallOutput {
call_id: String,
output: CallToolResult,
},
CustomToolCallOutput {
call_id: String,
name: Option<String>,
output: FunctionCallOutputPayload,
},
ToolSearchOutput {
call_id: String,
status: String,
execution: String,
tools: Vec<serde_json::Value>,
},
}
- Upgrade HTTP to WebSocket
- **Path**: `/v1/responses`
- **Protocol**: `responses_websockets=2026-02-06`
// Output items (received from API)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseItem {
Message { id: Option<String>, role: String, content: Vec<ContentItem>, ... },
Reasoning { id: String, summary: Vec<ReasoningItemReasoningSummary>, ... },
FunctionCall { id: Option<String>, name: String, arguments: String, call_id: String, ... },
CustomToolCall { id: Option<String>, status: Option<String>, call_id: String, name: String, ... },
// ... and more
}
#### Request Format
// Content items within messages
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentItem {
InputText { text: String },
InputImage { image_url: String },
OutputText { text: String },
```json
{
"type": "response.create",
"model": "gpt-4",
"instructions": "...",
"input": [],
"previous_response_id": "resp_123",
"generate": false,
...
}
```
**Location**: `codex-rs/protocol/src/models.rs:119-159`
#### Incremental Requests
### Converting User Input to Codex Format
- Reuses WebSocket connection
- Sends only delta of new input items
- References `previous_response_id`
#### Simple Text Messages
#### Fallback
**Standard OpenAI format:**
```json
{
"messages": [
{"role": "user", "content": "Hello, how are you?"}
]
}
```
- Falls back to HTTP on connection failure
- Session-scoped: once activated, stays on HTTP
**Codex format:**
```json
{
"input": [
{
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "Hello, how are you?"}
]
}
]
---
## 7. System Prompt (Instructions)
### 7.1 Base Instructions
**Location**: `codex-rs/protocol/src/models.rs`
```rust
pub struct BaseInstructions {
pub text: String,
pub personality: Option<Personality>,
}
```
**Conversion logic:**
### 7.2 Instruction Sources (Priority Order)
1. **User Override**: `config.base_instructions`
2. **Model Default**: `model_info.base_instructions`
3. **Personality Template**: Applied if no override
### 7.3 Instruction Composition
**Location**: `codex-rs/core/src/session/session.rs`
```rust
// From codex-rs/protocol/src/models.rs:1015-1053
impl From<Vec<UserInput>> for ResponseInputItem {
fn from(items: Vec<UserInput>) -> Self {
Self::Message {
role: "user".to_string(),
content: items
.into_iter()
.flat_map(|c| match c {
UserInput::Text { text, .. } => {
vec![ContentItem::InputText { text }]
}
// ... handle images, local images, etc.
})
.collect(),
}
}
}
// Final instructions sent to API:
instructions = base_instructions + tool_instructions + context_tags
```
#### Messages with Images
Context tags include:
- Sandbox mode information
- Collaboration mode
- Realtime conversation state
- Approval settings
---
## 8. Message Format Conversion (OpenAI Compatible → ChatGPT)
### 8.1 Key Differences
1. **No Direct OpenAI Format**:
- Codex doesn't use OpenAI's `messages` array format
- Uses custom `ResponseItem` enum instead
2. **Input Array**:
- **OpenAI**: `messages: [{role, content}]`
- **Codex**: `input: [ResponseItem]`
3. **Instructions vs System Message**:
- **OpenAI**: System message in `messages` array
- **Codex**: Separate `instructions` field
4. **Tool Calls**:
- **OpenAI**: Embedded in message content
- **Codex**: Separate `ResponseItem` types (`FunctionCall`, `FunctionCallOutput`)
### 8.2 Conversion Example
**OpenAI Format**:
**Standard OpenAI format:**
```json
{
"model": "gpt-4",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,..."}
}
]
}
{ "role": "system", "content": "You are helpful" },
{ "role": "user", "content": "Hello" },
{ "role": "assistant", "content": "Hi there!" }
]
}
```
**Codex format (with image tags):**
**Codex Format**:
```json
{
"model": "gpt-4",
"instructions": "You are helpful",
"input": [
{
"type": "message",
"role": "user",
"content": [
{"type": "input_text", "text": "<image>"},
{"type": "input_image", "image_url": "data:image/png;base64,..."},
{"type": "input_text", "text": "</image>What's in this image?"}
]
"content": [{ "type": "input_text", "text": "Hello" }]
},
{
"type": "message",
"role": "assistant",
"content": [{ "type": "output_text", "text": "Hi there!" }]
}
]
}
```
**Image tagging rules:**
- Remote images: wrapped with `<image>` and `</image>` tags
- Local images: wrapped with `<image name=[Image #N]>` and `</image>` tags
- Multiple images share a sequential label counter
**Location**: `codex-rs/protocol/src/models.rs:867-906`
---
### Converting Tool Calls/Function Calls
## 9. Complete Request Flow
#### Tool Call Request (Model → API)
### Step-by-Step Process
**Codex receives from model:**
```rust
// From codex-rs/protocol/src/models.rs:227-240
ResponseItem::FunctionCall {
name: "shell",
arguments: "{\"command\": [\"ls\"]}",
call_id: "call_123",
namespace: Some("mcp"),
}
```
#### Step 1: Authentication
**Wire format (Responses API):**
```json
{
"type": "function_call",
"name": "shell",
"arguments": "{\"command\": [\"ls\"]}",
"call_id": "call_123"
}
```python
# Load tokens from storage
tokens = load_from_auth_json()
access_token = tokens['access_token']
account_id = tokens['id_token']['chatgpt_account_id']
```
#### Tool Result Response (API → Model)
**Codex sends back to API:**
```rust
ResponseInputItem::FunctionCallOutput {
call_id: "call_123".to_string(),
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text("total 0".to_string()),
success: Some(true),
},
#### Step 2: Build Headers
```python
headers = {
'Authorization': f'Bearer {access_token}',
'ChatGPT-Account-ID': account_id,
'User-Agent': 'codex_cli_rs/0.1.0 (Linux 5.15.0; x86_64) iTerm2',
'originator': 'codex_cli_rs',
'Content-Type': 'application/json',
'session_id': conversation_id,
'x-codex-installation-id': installation_id,
'x-codex-window-id': f'{conversation_id}:0',
'OpenAI-Beta': 'responses_websockets=2026-02-06',
}
```
**Wire format:**
```json
{
"type": "function_call_output",
"call_id": "call_123",
"output": "total 0"
#### Step 3: Build Request Body
```python
body = {
'model': 'gpt-4',
'instructions': base_instructions,
'input': conversation_history, # List of ResponseItem
'tools': tool_definitions,
'tool_choice': 'auto',
'parallel_tool_calls': True,
'stream': True,
'store': False,
'prompt_cache_key': conversation_id,
}
```
Or for multimodal tool outputs:
```json
{
"type": "function_call_output",
"call_id": "call_123",
"output": [
{"type": "input_text", "text": "File listing:"},
{"type": "input_image", "image_url": "data:image/png;base64,..."}
]
}
#### Step 4: Send Request
```python
# HTTP/SSE
response = requests.post(
'https://chatgpt.com/backend-api/v1/responses',
headers=headers,
json=body,
stream=True
)
# Or WebSocket
ws = websocket.create_connection(
'wss://chatgpt.com/backend-api/v1/responses',
header=headers
)
ws.send(json.dumps({'type': 'response.create', **body}))
```
**Location**: `codex-rs/protocol/src/models.rs:1180-1288`
#### Step 5: Handle Response
### Tool Definition Format
```python
# SSE
for line in response.iter_lines():
if line.startswith(b'data: '):
event = json.loads(line[6:])
handle_event(event)
**Codex tool format (JSON Schema):**
```rust
// Tools are passed to the Responses API in OpenAI function format
{
"type": "function",
"name": "shell",
"description": "Execute a shell command",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "array",
"items": {"type": "string"},
"description": "Command as array of strings"
},
"workdir": {
"type": "string",
"description": "Working directory"
}
},
"required": ["command"]
}
}
# WebSocket
while True:
message = json.loads(ws.recv())
handle_event(message)
```
**Location**: `codex-rs/codex-tools/src/lib.rs`
### Streaming Message Handling
#### Step 6: Handle 401 (Token Refresh)
#### SSE Events to ResponseItem Conversion
```python
if response.status_code == 401:
# Refresh token
new_tokens = refresh_access_token(refresh_token)
# Retry request with new token
```
The Responses API streams events that must be assembled into `ResponseItem` objects:
#### Step 7: Update Turn State
```rust
// From codex-rs/codex-api/src/sse.rs
// Event sequence:
// 1. response.created - Start of response
// 2. response.output_item.added - New item created
// 3. response.content_part.added - Content started
// 4. response.content_part.delta - Incremental content
// 5. response.output_item.done - Item complete
// 6. response.done - All done
// Each event type maps to internal types:
// - response.output_item.added (type=message) -> ResponseItem::Message
// - response.output_item.added (type=function_call) -> ResponseItem::FunctionCall
// - response.output_item.added (type=reasoning) -> ResponseItem::Reasoning
```python
# Extract from response headers
turn_state = response.headers.get('x-codex-turn-state')
# Include in next request within same turn
headers['x-codex-turn-state'] = turn_state
```
**Location**: `codex-rs/codex-api/src/sse.rs`
---
#### Converting Streaming Deltas to Codex Format
## 10. Python Implementation Example
```python
import requests
import json
import uuid
from typing import List, Dict, Any
class CodexChatGPTClient:
def __init__(self, access_token: str, account_id: str):
self.base_url = "https://chatgpt.com/backend-api"
self.access_token = access_token
self.account_id = account_id
self.conversation_id = str(uuid.uuid4())
self.installation_id = str(uuid.uuid4())
self.window_generation = 0
self.turn_state = None
def _build_headers(self) -> Dict[str, str]:
headers = {
'Authorization': f'Bearer {self.access_token}',
'ChatGPT-Account-ID': self.account_id,
'User-Agent': 'custom_client/1.0.0 (Linux; x86_64)',
'originator': 'custom_client',
'Content-Type': 'application/json',
'session_id': self.conversation_id,
'x-client-request-id': self.conversation_id,
'x-codex-installation-id': self.installation_id,
'x-codex-window-id': f'{self.conversation_id}:{self.window_generation}',
}
```rust
// Incremental text delta
{
"event": "response.content_part.delta",
"data": {
"delta": {"text": "Hello"}
}
}
if self.turn_state:
headers['x-codex-turn-state'] = self.turn_state
return headers
def send_message(self,
user_message: str,
conversation_history: List[Dict],
instructions: str = "You are a helpful assistant.") -> None:
# Build input array
input_items = []
for item in conversation_history:
input_items.append({
'type': 'message',
'role': item['role'],
'content': [{'type': 'input_text', 'text': item['content']}]
})
# Add current message
input_items.append({
'type': 'message',
'role': 'user',
'content': [{'type': 'input_text', 'text': user_message}]
})
# Build request body
body = {
'model': 'gpt-4',
'instructions': instructions,
'input': input_items,
'tools': [],
'tool_choice': 'auto',
'parallel_tool_calls': True,
'stream': True,
'store': False,
'prompt_cache_key': self.conversation_id,
}
// Becomes:
ResponseItem::Message {
content: vec![ContentItem::OutputText { text: "Hello" }],
// ...
}
# Send request
response = requests.post(
f'{self.base_url}/v1/responses',
headers=self._build_headers(),
json=body,
stream=True
)
# Update turn state from response
if 'x-codex-turn-state' in response.headers:
self.turn_state = response.headers['x-codex-turn-state']
# Process SSE stream
for line in response.iter_lines():
if line.startswith(b'data: '):
try:
event = json.loads(line[6:])
self._handle_event(event)
except json.JSONDecodeError:
continue
def _handle_event(self, event: Dict[str, Any]):
event_type = event.get('type')
if event_type == 'response.output_item.done':
item = event.get('item', {})
if item.get('type') == 'message':
content = item.get('content', [])
for c in content:
if c.get('type') == 'output_text':
print(c.get('text', ''), end='', flush=True)
elif event_type == 'response.completed':
print() # New line after completion
# Usage example
if __name__ == "__main__":
# Assuming you have valid tokens from OAuth2 flow
client = CodexChatGPTClient(
access_token="your_access_token_here",
account_id="your_account_id_here"
)
# Send a message
client.send_message(
user_message="Hello, how are you?",
conversation_history=[],
instructions="You are a helpful coding assistant."
)
```
### Complete Conversion Flow
#### Non-Streaming Request Flow
1. **User Input → Codex Input:**
```rust
// User input (text, images, etc.)
let user_input = vec![UserInput::Text { text: "Hello".to_string() }];
// Convert to ResponseInputItem
let input_item: ResponseInputItem = user_input.into();
// Results in: ResponseInputItem::Message { role: "user", content: [...] }
```
2. **Build Request:**
```rust
// From codex-rs/core/src/client.rs:749-815
let request = ResponsesApiRequest {
model: "gpt-4".to_string(),
instructions: base_instructions.text,
input: vec![input_item], // ResponseInputItem array
tools: create_tools_json(tools)?,
// ... other fields
};
```
3. **Response → ResponseItem:**
```rust
// Parse JSON response into ResponseItem
let response_item: ResponseItem = serde_json::from_value(response_json)?;
// Handle Message, FunctionCall, Reasoning, etc.
```
#### Streaming Request Flow
1. **Send Request** (same as non-streaming)
2. **Process SSE Events:**
```rust
// From codex-rs/core/src/client.rs:1496-1576
while let Some(event) = stream.next().await {
match event {
Ok(ResponseEvent::OutputItemDone(item)) => {
// Item is complete, convert to ResponseItem
items_added.push(item);
}
Ok(ResponseEvent::ContentPartDelta { delta }) => {
// Accumulate incremental text
}
Ok(ResponseEvent::Completed { response_id, usage }) => {
// Final response with usage stats
}
}
}
```
3. **Reconstruct Full Message:**
```rust
// Multiple deltas are assembled into complete ResponseItem
// e.g., multiple response.content_part.delta events -> complete message
```
### WebSocket Message Format
For WebSocket transport, messages use a different structure:
---
```rust
// From codex-rs/codex-api/src/websocket.rs
## Summary
// Request (WebSocket)
{
"type": "response.create",
"response": {
"model": "gpt-4",
"instructions": "...",
"input": [...],
"client_metadata": {
"x-codex-window-id": "...",
"x-openai-subagent": "..."
}
}
}
This documentation provides a complete reference for implementing a standalone client that mimics Codex's request behavior to ChatGPT's backend API. Key takeaways:
// Incremental request (subsequent requests in same turn)
{
"type": "response.create",
"previous_response_id": "resp_123",
"input": [...], // Only new items, not full history
"response": { ... }
}
```
1. **Authentication**: Uses OAuth2 device code flow with token refresh
2. **Headers**: Requires `Authorization`, `ChatGPT-Account-ID`, and various Codex-specific headers
3. **Format**: Uses custom `ResponseItem` format instead of OpenAI's `messages` array
4. **Transport**: Supports both HTTP/SSE and WebSocket with automatic fallback
5. **Session Management**: Uses conversation IDs, window generations, and turn state for routing
6. **Instructions**: Separate `instructions` field instead of system messages
**Location**: `codex-rs/codex-api/src/websocket.rs`
### Summary: Key Conversion Points
| Aspect | OpenAI Standard | Codex Internal |
|--------|----------------|----------------|
| Messages field | `messages` | `input` |
| Message structure | `{role, content}` | `{type: message, role, content[]}` |
| Content structure | `{type, text}` or `{type, image_url}` | `{type: input_text/input_image/output_text}` |
| Tool calls | `tool_calls` array | `ResponseItem::FunctionCall` |
| Tool results | `function_call_output` (string) | `FunctionCallOutputPayload` (string or array) |
| Images | Simple array | Wrapped with `<image>` tags |
| Streaming | SSE with deltas | Assembled into complete `ResponseItem` |
### Important Implementation Notes
1. **Content Array**: Messages always have a `content` array, even for single text
2. **Type Tags**: All items use snake_case type tags (`message`, `function_call`, `input_text`)
3. **Image Handling**: Images require special tagging with `<image>` and `</image>`
4. **Tool Output**: Can be plain text OR array of content items for multimodal
5. **Namespace**: MCP tools include `namespace` field; built-in tools don't
All code locations reference the Codex Rust codebase for verification and deeper exploration.
---
## WebSocket Support
### Prewarm/Preconnect
## 11. Detailed Request Examples with Tool Usage
Before sending the first request, the client can establish a WebSocket connection:
### 11.1 Simple Text Request (No Tools)
```rust
pub async fn preconnect_websocket(
&mut self,
session_telemetry: &SessionTelemetry,
_model_info: &ModelInfo,
) -> std::result::Result<(), ApiError>
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful coding assistant.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "What is Python?"
}
]
}
],
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": true,
"reasoning": null,
"store": false,
"stream": true,
"include": [],
"service_tier": null,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000",
"text": null,
"client_metadata": {
"x-codex-installation-id": "123e4567-e89b-12d3-a456-426614174000"
}
}
```
This reduces latency for the first actual request.
### Warmup Request
A special request with `generate: false` to establish connection without generating output:
### 11.2 Request with Function Tools
**Request:**
```json
{
"type": "response.create",
"response": {
"model": "gpt-4",
"generate": false,
...
"model": "gpt-4",
"instructions": "You are a helpful coding assistant with access to shell commands.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "List files in the current directory"
}
]
}
],
"tools": [
{
"type": "function",
"name": "shell",
"description": "Execute a shell command and return the output",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The shell command to execute"
},
"workdir": {
"type": "string",
"description": "Working directory for command execution"
}
},
"required": ["command"],
"additionalProperties": false
}
}
],
"tool_choice": "auto",
"parallel_tool_calls": true,
"reasoning": null,
"store": false,
"stream": true,
"include": [],
"service_tier": null,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000",
"text": null,
"client_metadata": {
"x-codex-installation-id": "123e4567-e89b-12d3-a456-426614174000"
}
}
```
The client waits for completion before sending the actual request.
**Location**: `codex-rs/core/src/client.rs:1303-1351`
---
## Implementation Examples
### Example 1: Basic Request (HTTP/SSE)
### 11.3 Request with Tool Call and Output in History
```rust
use codex_api::{ResponsesClient, ResponsesApiRequest, ResponsesOptions};
use codex_api::requests::responses::Compression;
// Setup
let transport = ReqwestTransport::new(build_reqwest_client());
let provider = Provider {
name: "OpenAI".to_string(),
base_url: "https://api.openai.com/v1".to_string(),
query_params: None,
headers: HeaderMap::new(),
retry: RetryConfig { /* ... */ },
stream_idle_timeout: Duration::from_secs(300),
};
let auth = /* implement AuthProvider */;
let client = ResponsesClient::new(transport, provider, auth);
// Build request
let request = ResponsesApiRequest {
model: "gpt-4".to_string(),
instructions: "You are a helpful assistant".to_string(),
input: vec![/* conversation items */],
tools: vec![],
tool_choice: "auto".to_string(),
parallel_tool_calls: true,
reasoning: None,
store: false,
stream: true,
include: vec![],
service_tier: None,
prompt_cache_key: Some("conversation_123".to_string()),
text: None,
};
let options = ResponsesOptions {
conversation_id: Some("conversation_123".to_string()),
session_source: None,
extra_headers: HeaderMap::new(),
compression: Compression::None,
turn_state: None,
};
// Stream response
let mut stream = client.stream_request(request, options).await?;
while let Some(event) = stream.next().await {
match event {
Ok(ResponseEvent::ContentPartDelta { delta }) => {
print!("{}", delta.text);
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful coding assistant.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "List files in the current directory"
}
Ok(ResponseEvent::Completed { response_id, token_usage }) => {
println!("\nCompleted: {}", response_id);
]
},
{
"type": "message",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "I'll list the files for you."
}
Err(e) => {
eprintln!("Error: {}", e);
break;
]
},
{
"type": "function_call",
"call_id": "call_abc123",
"name": "shell",
"arguments": "{\"command\":\"ls -la\"}"
},
{
"type": "function_call_output",
"call_id": "call_abc123",
"output": {
"type": "text",
"text": "total 48\ndrwxr-xr-x 12 user staff 384 Apr 19 10:30 .\ndrwxr-xr-x 6 user staff 192 Apr 18 15:20 ..\n-rw-r--r-- 1 user staff 1234 Apr 19 10:25 README.md\n-rw-r--r-- 1 user staff 567 Apr 19 10:30 main.py"
}
},
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "What's in main.py?"
}
_ => {}
]
}
],
"tools": [
{
"type": "function",
"name": "shell",
"description": "Execute a shell command",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
},
"required": ["command"]
}
}
],
"tool_choice": "auto",
"parallel_tool_calls": true,
"store": false,
"stream": true,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
### Example 2: Retrieve Models
```rust
use codex_api::{ModelsClient, Provider};
let transport = ReqwestTransport::new(build_reqwest_client());
let provider = Provider { /* ... */ };
let auth = /* implement AuthProvider */;
let client = ModelsClient::new(transport, provider, auth);
let (models, etag) = client
.list_models("0.99.0", HeaderMap::new())
.await?;
### 11.4 Request with Built-in Tools
for model in models {
println!("{}: {}", model.slug, model.display_name);
**Request with local_shell, web_search, and image_generation:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful assistant with access to shell, web search, and image generation.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "Search for Python tutorials and create a diagram"
}
]
}
],
"tools": [
{
"type": "local_shell"
},
{
"type": "web_search",
"external_web_access": true,
"search_context_size": "medium",
"search_content_types": ["text", "image"]
},
{
"type": "image_generation",
"output_format": "url"
}
],
"tool_choice": "auto",
"parallel_tool_calls": true,
"store": false,
"stream": true,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
### Example 3: WebSocket Request
```rust
use codex_api::{ResponsesWebsocketClient, ResponseCreateWsRequest};
let provider = Provider { /* ... */ };
let auth = /* implement AuthProvider */;
let ws_client = ResponsesWebsocketClient::new(provider, auth);
// Connect
let mut connection = ws_client
.connect(headers, default_headers, turn_state, telemetry)
.await?;
// Send request
let request = ResponsesWsRequest::ResponseCreate(ResponseCreateWsRequest {
model: "gpt-4".to_string(),
instructions: "You are a helpful assistant".to_string(),
input: vec![/* items */],
client_metadata: HashMap::new(),
/* ... */
});
let mut stream = connection.stream_request(request, false).await?;
### 11.5 Request with Reasoning Controls
// Process events
while let Some(event) = stream.next().await {
// Handle events
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful coding assistant.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "Explain how quicksort works"
}
]
}
],
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": true,
"reasoning": {
"effort": "high",
"summary": "auto"
},
"store": false,
"stream": true,
"include": ["reasoning.encrypted_content"],
"service_tier": null,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
### Example 4: Authentication Headers
**Reasoning effort options:**
- `"low"` - Minimal reasoning
- `"medium"` - Balanced reasoning (default)
- `"high"` - Extended reasoning
```rust
// API Key Mode
struct ApiKeyAuth {
api_key: String,
}
**Reasoning summary options:**
- `"auto"` - Automatic summary generation
- `"none"` - No summary
- `"concise"` - Brief summary
- `"detailed"` - Detailed summary
impl AuthProvider for ApiKeyAuth {
fn bearer_token(&self) -> Option<String> {
Some(self.api_key.clone())
}
}
### 11.6 Request with Verbosity Control
// ChatGPT Mode
struct ChatGptAuth {
access_token: String,
account_id: String,
}
impl AuthProvider for ChatGptAuth {
fn bearer_token(&self) -> Option<String> {
Some(self.access_token.clone())
}
fn account_id(&self) -> Option<String> {
Some(self.account_id.clone())
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful coding assistant.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "Explain Python decorators"
}
]
}
],
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": true,
"store": false,
"stream": true,
"text": {
"verbosity": "high"
},
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
### Example 5: User-Agent Construction
```rust
pub fn get_codex_user_agent() -> String {
let build_version = env!("CARGO_PKG_VERSION");
let os_info = os_info::get();
let originator = originator();
let prefix = format!(
"{}/{build_version} ({} {}; {}) {}",
originator.value.as_str(),
os_info.os_type(),
os_info.version(),
os_info.architecture().unwrap_or("unknown"),
user_agent() // terminal detection
);
let suffix = USER_AGENT_SUFFIX
.lock()
.ok()
.and_then(|guard| guard.clone());
let suffix = suffix
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map_or_else(String::new, |value| format!(" ({value})"));
format!("{prefix}{suffix}")
}
```
**Result**: `codex_cli_rs/0.99.0 (Linux 6.12; x86_64) xterm-256color (vscode; 1.86.0)`
---
## Key Implementation Details
**Verbosity options:**
- `"low"` - Concise responses
- `"medium"` - Balanced responses (default)
- `"high"` - Detailed responses
### 1. Base URL Selection
### 11.7 Request with JSON Schema Output
```rust
pub fn to_api_provider(&self, auth_mode: Option<AuthMode>) -> CodexResult<ApiProvider> {
let default_base_url = if matches!(auth_mode, Some(AuthMode::Chatgpt)) {
"https://chatgpt.com/backend-api/codex"
} else {
"https://api.openai.com/v1"
};
let base_url = self
.base_url
.clone()
.unwrap_or_else(|| default_base_url.to_string());
// ...
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful assistant that returns structured data.",
"input": [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "List 3 programming languages with their use cases"
}
]
}
],
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": true,
"store": false,
"stream": true,
"text": {
"format": {
"type": "json_schema",
"strict": true,
"name": "programming_languages",
"schema": {
"type": "object",
"properties": {
"languages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"use_case": {
"type": "string"
}
},
"required": ["name", "use_case"],
"additionalProperties": false
}
}
},
"required": ["languages"],
"additionalProperties": false
}
}
},
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
**Location**: `codex-rs/model-provider-info/src/lib.rs:184-193`
### 11.8 Request with Image Input
### 2. Request Compression
```rust
fn responses_request_compression(&self, auth: Option<&CodexAuth>) -> Compression {
if self.client.state.enable_request_compression
&& auth.is_some_and(CodexAuth::is_chatgpt_auth)
&& self.client.state.provider.is_openai()
**Request:**
```json
{
"model": "gpt-4",
"instructions": "You are a helpful assistant that can analyze images.",
"input": [
{
Compression::Zstd
} else {
Compression::None
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "What's in this image?"
},
{
"type": "input_image",
"source": {
"type": "url",
"url": "https://example.com/image.jpg"
},
"detail": "high"
}
]
}
],
"tools": [],
"tool_choice": "auto",
"parallel_tool_calls": true,
"store": false,
"stream": true,
"prompt_cache_key": "550e8400-e29b-41d4-a716-446655440000"
}
```
**Location**: `codex-rs/core/src/client.rs:1040-1049`
**Image detail options:**
- `"low"` - Low resolution analysis
- `"high"` - High resolution analysis
- `"auto"` - Automatic selection
### 3. Retry Logic
---
```rust
pub struct RetryConfig {
pub max_attempts: u64,
pub base_delay: Duration,
pub retry_429: bool,
pub retry_5xx: bool,
pub retry_transport: bool,
}
```
## 12. Key Differences from Standard OpenAI API
Default values:
- `max_attempts`: 4 (requests), 5 (streams)
- `base_delay`: 200ms
- `retry_429`: false
- `retry_5xx`: true
- `retry_transport`: true
### 12.1 Missing Fields in Codex/ChatGPT Format
**Location**: `codex-rs/codex-api/src/provider.rs:16-22`
The following OpenAI API fields are **NOT supported** in Codex's ChatGPT backend format:
### 4. Sticky Routing (Turn State)
#### Not Supported:
- `temperature` - Not configurable per request
- `top_p` - Not configurable per request
- `max_tokens` - Not configurable per request (handled internally)
- `max_completion_tokens` - Not configurable per request
- `presence_penalty` - Not supported
- `frequency_penalty` - Not supported
- `logit_bias` - Not supported
- `logprobs` - Not supported
- `top_logprobs` - Not supported
- `n` - Always 1 (single response)
- `stop` - Not configurable
- `seed` - Not supported
- `user` - Not used (account ID in header instead)
```rust
/// Turn state for sticky routing.
///
/// This is an `OnceLock` that stores the turn state value received from the server
/// on turn start via the `x-codex-turn-state` response header. Once set, this value
/// should be sent back to the server in the `x-codex-turn-state` request header for
/// all subsequent requests within the same turn to maintain sticky routing.
turn_state: Arc<OnceLock<String>>,
```
### 12.2 Codex-Specific Fields
**Location**: `codex-rs/core/src/client.rs:209-219`
Fields that exist in Codex but not in standard OpenAI API:
### 5. Session vs Turn Scope
#### Codex-Only Fields:
- `instructions` - Replaces system message
- `input` - Replaces `messages` array
- `prompt_cache_key` - For prompt caching
- `client_metadata` - For telemetry and tracking
- `include` - For including reasoning content
- `text.verbosity` - Response verbosity control
- `reasoning.effort` - Reasoning effort level
- `reasoning.summary` - Reasoning summary type
- **Session-scoped**: `ModelClient` - lives for entire conversation
- **Turn-scoped**: `ModelClientSession` - created per turn, manages WebSocket connection
### 12.3 Tool Definition Differences
```rust
pub fn new_session(&self) -> ModelClientSession {
ModelClientSession {
client: self.clone(),
websocket_session: self.take_cached_websocket_session(),
turn_state: Arc::new(OnceLock::new()),
**OpenAI Standard:**
```json
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather information",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
},
"required": ["location"]
}
}
}
```
**Location**: `codex-rs/core/src/client.rs:300-306`
---
## Session Flow
**Codex/ChatGPT Format:**
```json
{
"type": "function",
"name": "get_weather",
"description": "Get weather information",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
},
"required": ["location"],
"additionalProperties": false
}
}
```
### Overview of a Complete Session
**Key differences:**
- No nested `function` object
- `additionalProperties: false` is typically included
- Tool definition is flatter
A session represents an entire conversation from start to finish. Understanding the flow of requests and how conversation state is maintained is essential for implementing a compatible client.
### 12.4 Built-in Tool Types
### Lifecycle of a Session
Codex supports special built-in tool types not in standard OpenAI:
```json
{
"type": "local_shell"
}
```
┌─────────────────────────────────────────────────────────────────────────────┐
│ SESSION LIFETIME │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ 1. Initialization │
│ ├── Load/validate authentication credentials │
│ ├── Fetch available models from /v1/models │
│ └── Create ModelClient (session-scoped) │
│ │
│ 2. Turn 1: User Input → First Response │
│ ├── Create ModelClientSession (turn-scoped) │
│ ├── Preconnect WebSocket (optional, recommended) │
│ ├── Build request with base instructions │
│ ├── Send user message in input array │
│ ├── Receive streaming response (SSE events) │
│ └── Extract response items, tool calls │
│ │
│ 3. Tool Execution Loop (within Turn N) │
│ ├── API emits FunctionCall item │
│ ├── Client executes tool locally │
│ ├── Send FunctionCallOutput back in next request │
│ └── Continue receiving response (may loop) │
│ │
│ 4. Turn N: Subsequent Requests │
│ ├── Build request with previous_response_id │
│ ├── Include all tool call results since turn start │
│ ├── Optionally compact conversation history │
│ └── Receive updated conversation state │
│ │
│ 5. Session End │
│ ├── Final response received │
│ ├── Close WebSocket connection │
│ └── Session cleanup │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
```json
{
"type": "web_search",
"external_web_access": true,
"search_context_size": "medium"
}
```
### Detailed Turn Flow
```json
{
"type": "image_generation",
"output_format": "url"
}
```
#### Turn 1: Initial Request
```json
{
"type": "tool_search",
"execution": "client",
"description": "Search for available tools",
"parameters": {...}
}
```
```rust
// 1. Create session (turn-scoped)
let session = client.new_session();
// 2. Optionally preconnect WebSocket for lower latency
session.preconnect_websocket(telemetry, &model_info).await?;
// 3. Build first request
let request = ResponsesApiRequest {
model: "gpt-4".to_string(),
instructions: base_instructions.text, // Required for ChatGPT mode
input: vec![
// Developer message (optional, for system context)
ResponseInputItem::Message {
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: permissions_instructions
}],
},
// User message
ResponseInputItem::Message {
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: user_input.clone()
}],
},
],
tools: create_tools_json(tools)?,
// ...other fields
};
// 4. Send request and process streaming response
let mut stream = session.stream_request(request, options).await?;
while let Some(event) = stream.next().await {
match event {
Ok(ResponseEvent::OutputItemAdded { item }) => {
// New item created - could be Message, FunctionCall, Reasoning
match item {
ResponseItem::FunctionCall { name, arguments, call_id, .. } => {
// Tool call to execute
tool_calls_to_execute.push((call_id, name, arguments));
}
ResponseItem::Message { content, .. } => {
// Text response
}
_ => {}
}
}
Ok(ResponseEvent::ContentPartDelta { delta }) => {
// Accumulate incremental text
}
Ok(ResponseEvent::Completed { response_id, usage }) => {
// Turn complete
}
Err(e) => { /* Handle error */ }
}
```json
{
"type": "namespace",
"name": "mcp_server_name",
"description": "Tools from MCP server",
"tools": [...]
}
```
**Location**: `codex-rs/core/src/client.rs:1303-1576`
### 12.5 Response Format Differences
#### Subsequent Turns: Tool Execution Loop
**OpenAI Standard Response:**
```json
{
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": 1677652288,
"model": "gpt-4",
"choices": [{
"index": 0,
"delta": {
"content": "Hello"
},
"finish_reason": null
}]
}
```
```rust
// After receiving a FunctionCall, execute the tool and send results back
// 1. Execute tool (shell command, file operation, etc.)
let tool_result = execute_tool(&tool_name, tool_args).await?;
// 2. Send tool result back to API
let next_request = ResponsesApiRequest {
model: "gpt-4".to_string(),
instructions: base_instructions.text,
input: vec![
// Include original user message
user_message.clone(),
// Include the assistant's function call
ResponseItem::FunctionCall { ... }.into(),
// Send tool result
ResponseInputItem::FunctionCallOutput {
call_id: tool_call.call_id,
output: FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(tool_result),
success: Some(true),
},
},
],
// ...
};
**Codex/ChatGPT SSE Events:**
```
data: {"type":"response.created","response_id":"resp_123"}
**Location**: `codex-rs/protocol/src/models.rs:1180-1288`
data: {"type":"response.output_item.added","item":{"type":"message","role":"assistant","content":[]}}
### Conversation State Management
data: {"type":"response.output_text.delta","delta":"Hello"}
#### Maintaining Conversation History
data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hello"}]}}
Codex maintains conversation state across requests within a turn. This is handled in two ways:
data: {"type":"response.completed","response_id":"resp_123","token_usage":{"input_tokens":10,"output_tokens":5}}
```
1. **HTTP/SSE**: Full `input` array sent with each request
2. **WebSocket**: Server maintains state, client sends `previous_response_id`
### 12.6 Service Tier
```rust
// For HTTP: Include full conversation history
let input_items: Vec<ResponseInputItem> = conversation
.items()
.iter()
.flat_map(|item| item.to_input_item())
.collect();
let request = ResponsesApiRequest {
// ...
input: input_items,
// ...
};
// For WebSocket: Only send new items + previous_response_id
let request = ResponseCreateWsRequest {
previous_response_id: last_response_id, // Links to previous
input: new_items_only, // Just the new messages
// ...
};
Codex uses `service_tier` field for priority routing:
```json
{
"service_tier": "default"
}
```
**Location**: `codex-rs/codex-api/src/websocket.rs:89-156`
**Options:**
- `"default"` - Standard priority
- `"priority"` - High priority (maps to "fast" internally)
- `null` - No specific tier
#### Session vs Turn
This is different from OpenAI's standard API which doesn't have this field.
Understanding the distinction between session and turn is critical:
---
| Concept | Scope | Description |
|--------------|---------------------|----------------------------------------------------------|
| **Session** | Entire conversation | From first user message to session end |
| **Turn** | Single exchange | One user message + all subsequent tool calls + response |
| **Window** | UI context | Visible context in the interface |
## 13. Complete Conversion Guide: OpenAI → Codex Format
### 13.1 Basic Conversion Function
```python
def convert_openai_to_codex(openai_request: dict) -> dict:
"""
Convert OpenAI API format to Codex/ChatGPT format
"""
codex_request = {
"model": openai_request.get("model", "gpt-4"),
"instructions": "",
"input": [],
"tools": [],
"tool_choice": openai_request.get("tool_choice", "auto"),
"parallel_tool_calls": openai_request.get("parallel_tool_calls", True),
"store": False,
"stream": openai_request.get("stream", True),
"include": [],
"service_tier": None,
"prompt_cache_key": str(uuid.uuid4()),
}
# Extract system message as instructions
messages = openai_request.get("messages", [])
for msg in messages:
if msg.get("role") == "system":
codex_request["instructions"] = msg.get("content", "")
break
# Convert messages to input items
for msg in messages:
if msg.get("role") == "system":
continue # Already handled
role = msg.get("role")
content = msg.get("content", "")
# Handle string content
if isinstance(content, str):
codex_request["input"].append({
"type": "message",
"role": role,
"content": [{
"type": "input_text" if role == "user" else "output_text",
"text": content
}]
})
# Handle array content (multimodal)
elif isinstance(content, list):
content_items = []
for item in content:
if item.get("type") == "text":
content_items.append({
"type": "input_text" if role == "user" else "output_text",
"text": item.get("text", "")
})
elif item.get("type") == "image_url":
content_items.append({
"type": "input_image",
"source": {
"type": "url",
"url": item.get("image_url", {}).get("url", "")
},
"detail": item.get("image_url", {}).get("detail", "auto")
})
codex_request["input"].append({
"type": "message",
"role": role,
"content": content_items
})
# Handle tool calls in assistant messages
if msg.get("tool_calls"):
for tool_call in msg["tool_calls"]:
codex_request["input"].append({
"type": "function_call",
"call_id": tool_call.get("id", ""),
"name": tool_call.get("function", {}).get("name", ""),
"arguments": tool_call.get("function", {}).get("arguments", "{}")
})
# Handle tool responses
if msg.get("role") == "tool":
codex_request["input"].append({
"type": "function_call_output",
"call_id": msg.get("tool_call_id", ""),
"output": {
"type": "text",
"text": msg.get("content", "")
}
})
# Convert tools
if "tools" in openai_request:
for tool in openai_request["tools"]:
if tool.get("type") == "function":
func = tool.get("function", {})
codex_request["tools"].append({
"type": "function",
"name": func.get("name", ""),
"description": func.get("description", ""),
"parameters": func.get("parameters", {})
})
return codex_request
```
```rust
// Session lives for the entire conversation
pub struct ModelClient {
// Authentication and transport (permanent)
transport: T,
provider: Provider,
auth: Box<dyn CodexAuth>,
}
### 13.2 Usage Example
// Turn is created fresh for each user message
pub struct ModelClientSession {
client: ModelClient,
websocket_session: Option<WebSocketSession>,
turn_state: Arc<OnceLock<String>>, // Turn-scoped sticky routing
```python
# OpenAI format request
openai_request = {
"model": "gpt-4",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"}
],
"temperature": 0.7, # Will be ignored
"max_tokens": 100, # Will be ignored
"stream": True
}
```
**Location**: `codex-rs/core/src/client.rs:183-306`
# Convert to Codex format
codex_request = convert_openai_to_codex(openai_request)
### Compact/Summarize Flow
# Send to ChatGPT backend
response = requests.post(
"https://chatgpt.com/backend-api/v1/responses",
headers=headers,
json=codex_request,
stream=True
)
```
When conversation history grows too large, Codex can compact it:
---
```rust
// Compact request (HTTP)
let compact_request = ResponsesApiRequest {
model: model.slug.clone(),
instructions: base_instructions.text,
input: vec![/* conversation items to compact */],
tools: vec![],
// ...
};
// Or via MCP subagent
let subagent_request = ResponsesApiRequestWithMetadata {
// ...
metadata: RequestMetadata {
x_openai_subagent: Some("compact"),
// ...
},
};
```
## 14. Summary of Request Fields
**Location**: `codex-rs/core/src/client.rs:1610-1650`
### 14.1 Required Fields
### Error Handling and Recovery
- `model` - Model identifier (e.g., "gpt-4")
- `input` - Array of ResponseItem objects
- `tools` - Array of tool definitions (can be empty)
- `tool_choice` - Tool selection mode ("auto", "none", or specific tool)
- `parallel_tool_calls` - Boolean for parallel execution
- `store` - Boolean for storing conversation
- `stream` - Boolean for streaming responses
#### Retry on Transient Errors
### 14.2 Optional Fields
```rust
async fn with_retry<R, F, Fut>(&self, request: R, mut attempts: u64) -> Result<F::Output, ApiError>
where
R: Clone,
F: Fn(T, R) -> Fut,
Fut: Future<Output = Result<F::Output, ApiError>>,
{
let base_delay = Duration::from_millis(200);
loop {
match self.send_request(request.clone()).await {
Ok(response) => return Ok(response),
Err(ApiError::ServerError(status)) if status.as_u16() >= 500 && attempts > 0 => {
// Exponential backoff
tokio::time::sleep(base_delay * 2_u64.pow(4 - attempts)).await;
attempts -= 1;
}
Err(e) => return Err(e),
}
}
}
```
- `instructions` - System prompt (empty string if not provided)
- `reasoning` - Reasoning controls (effort and summary)
- `include` - Array of fields to include (e.g., reasoning content)
- `service_tier` - Priority routing ("default", "priority", or null)
- `prompt_cache_key` - Cache key for prompt caching
- `text` - Text controls (verbosity and format)
- `client_metadata` - Metadata for telemetry
#### Rate Limit Handling
### 14.3 Field Value Constraints
```rust
match error {
ApiError::RateLimited { retry_after } => {
// Wait and retry (or notify user)
tokio::time::sleep(Duration::from_secs(retry_after)).await;
}
_ => return Err(error),
}
```
**model:**
- Must be a valid model identifier
- Examples: "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo"
---
**tool_choice:**
- `"auto"` - Model decides when to use tools
- `"none"` - Never use tools
- `{"type": "function", "name": "tool_name"}` - Force specific tool
## Summary
**parallel_tool_calls:**
- `true` - Allow multiple tool calls in parallel
- `false` - Execute tools sequentially
To implement a compatible client that mimics codex-cli:
**store:**
- `false` - Don't store (typical for Codex)
- `true` - Store conversation
1. **Use the Responses API** (`/v1/responses`), not Chat Completions
2. **Implement proper authentication** with Bearer token and optional ChatGPT-Account-ID
3. **Set correct User-Agent** following the format: `{originator}/{version} ({os} {version}; {arch}) {terminal}`
4. **Include required headers**: `originator`, `x-client-request-id`, `session_id`
5. **Handle SSE streaming** with proper event parsing
6. **Implement retry logic** with exponential backoff
7. **Support WebSocket transport** for better performance
8. **Handle turn state** for sticky routing
9. **Compress requests** with zstd when using ChatGPT auth
10. **Parse model capabilities** from `/v1/models` endpoint
**stream:**
- `true` - Stream responses via SSE
- `false` - Return complete response
All code references are from the `codex-rs` directory in the repository.
This completes the comprehensive documentation of the ChatGPT/Codex API request format with detailed examples and conversion guidance.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment