Commit 7d5d9e73 authored by Your Name's avatar Your Name

Add condensation enhancements and server configuration

- Add max_context field to CondensationConfig
- Support 'internal' keyword for local HuggingFace model in condensation
- Add internal model initialization with temperature=0.3, top_p=0.8, repeat_penalty=1.1
- Create condensation system prompts (conversational, semantic)
- Add aisbf.json for server configuration (host, port, dashboard auth)
- Update main.py to read server config from aisbf.json
- Update providers.json with max_context example for condensation
parent 1ea22adb
...@@ -41,6 +41,7 @@ class CondensationConfig(BaseModel): ...@@ -41,6 +41,7 @@ class CondensationConfig(BaseModel):
model: Optional[str] = None model: Optional[str] = None
rotation_id: Optional[str] = None rotation_id: Optional[str] = None
enabled: bool = True enabled: bool = True
max_context: Optional[int] = None # Maximum context size for condensation model
class ProviderConfig(BaseModel): class ProviderConfig(BaseModel):
......
...@@ -54,49 +54,67 @@ class ContextManager: ...@@ -54,49 +54,67 @@ class ContextManager:
self.condensation_model = None self.condensation_model = None
self._rotation_handler = None self._rotation_handler = None
self._rotation_id = None self._rotation_id = None
self._internal_model = None
self._internal_tokenizer = None
self._internal_model_lock = None
self._use_internal_model = False
if (self.condensation_config and # Get max_context for condensation model
self.condensation_max_context = None
if self.condensation_config and hasattr(self.condensation_config, 'max_context'):
self.condensation_max_context = self.condensation_config.max_context
if (self.condensation_config and
self.condensation_config.enabled): self.condensation_config.enabled):
try: try:
# Check if model is a rotation ID or direct model name # Check if model is a rotation ID or direct model name
model_value = self.condensation_config.model model_value = self.condensation_config.model
# Check if this model value is a rotation ID (exists in rotations config) # Check for "internal" keyword
is_rotation = False if model_value == "internal":
if model_value:
try:
rotation_config = config.get_rotation(model_value)
if rotation_config:
is_rotation = True
logger = logging.getLogger(__name__)
logger.info(f"Condensation model '{model_value}' is a rotation ID")
except:
pass # Not a rotation, treat as direct model
if is_rotation:
# Use rotation handler for condensation
# Import here to avoid circular import
from .handlers import RotationHandler
rotation_handler = RotationHandler()
# Store rotation handler and rotation_id for later use
self._rotation_handler = rotation_handler
self._rotation_id = model_value
# The actual model will be selected by rotation handler
self.condensation_model = None # Will be determined by rotation
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.info(f"Initialized condensation with rotation: rotation_id={model_value}") logger.info(f"Condensation model is 'internal' - will use local HuggingFace model")
elif self.condensation_config.provider_id and model_value: self._use_internal_model = True
# Use provider handler for condensation with direct model # Set default max_context for internal model if not specified
provider_config = config.get_provider(self.condensation_config.provider_id) if not self.condensation_max_context:
if provider_config: self.condensation_max_context = 4000 # Conservative default for small models
api_key = provider_config.api_key else:
self.condensation_handler = get_provider_handler( # Check if this model value is a rotation ID (exists in rotations config)
self.condensation_config.provider_id, is_rotation = False
api_key if model_value:
) try:
self.condensation_model = model_value rotation_config = config.get_rotation(model_value)
if rotation_config:
is_rotation = True
logger = logging.getLogger(__name__)
logger.info(f"Condensation model '{model_value}' is a rotation ID")
except:
pass # Not a rotation, treat as direct model
if is_rotation:
# Use rotation handler for condensation
# Import here to avoid circular import
from .handlers import RotationHandler
rotation_handler = RotationHandler()
# Store rotation handler and rotation_id for later use
self._rotation_handler = rotation_handler
self._rotation_id = model_value
# The actual model will be selected by rotation handler
self.condensation_model = None # Will be determined by rotation
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.info(f"Initialized condensation handler: provider={self.condensation_config.provider_id}, model={model_value}") logger.info(f"Initialized condensation with rotation: rotation_id={model_value}")
elif self.condensation_config.provider_id and model_value:
# Use provider handler for condensation with direct model
provider_config = config.get_provider(self.condensation_config.provider_id)
if provider_config:
api_key = provider_config.api_key
self.condensation_handler = get_provider_handler(
self.condensation_config.provider_id,
api_key
)
self.condensation_model = model_value
logger = logging.getLogger(__name__)
logger.info(f"Initialized condensation handler: provider={self.condensation_config.provider_id}, model={model_value}")
except Exception as e: except Exception as e:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.warning(f"Failed to initialize condensation handler: {e}") logger.warning(f"Failed to initialize condensation handler: {e}")
...@@ -121,6 +139,114 @@ class ContextManager: ...@@ -121,6 +139,114 @@ class ContextManager:
logger.info(f" context_size: {self.context_size}") logger.info(f" context_size: {self.context_size}")
logger.info(f" condense_context: {self.condense_context}%") logger.info(f" condense_context: {self.condense_context}%")
logger.info(f" condense_method: {self.condense_method}") logger.info(f" condense_method: {self.condense_method}")
logger.info(f" condensation_max_context: {self.condensation_max_context}")
logger.info(f" use_internal_model: {self._use_internal_model}")
def _initialize_internal_model(self):
"""Initialize the internal HuggingFace model for condensation (lazy loading)"""
import logging
logger = logging.getLogger(__name__)
if self._internal_model is not None:
return # Already initialized
try:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import threading
logger.info("=== INITIALIZING INTERNAL CONDENSATION MODEL ===")
model_name = "huihui-ai/Qwen2.5-0.5B-Instruct-abliterated-v3"
logger.info(f"Model: {model_name}")
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Device: {device}")
# Load tokenizer
logger.info("Loading tokenizer...")
self._internal_tokenizer = AutoTokenizer.from_pretrained(model_name)
logger.info("Tokenizer loaded")
# Load model
logger.info("Loading model...")
self._internal_model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto" if device == "cuda" else None
)
if device == "cpu":
self._internal_model = self._internal_model.to(device)
logger.info("Model loaded successfully")
# Initialize thread lock for model access
self._internal_model_lock = threading.Lock()
logger.info("=== INTERNAL CONDENSATION MODEL READY ===")
except ImportError as e:
logger.error(f"Failed to import required libraries for internal model: {e}")
logger.error("Please install: pip install torch transformers")
raise
except Exception as e:
logger.error(f"Failed to initialize internal model: {e}", exc_info=True)
raise
async def _run_internal_model_condensation(self, prompt: str) -> str:
"""Run the internal model for condensation in a separate thread"""
import logging
import asyncio
from concurrent.futures import ThreadPoolExecutor
logger = logging.getLogger(__name__)
# Initialize model if needed
if self._internal_model is None:
self._initialize_internal_model()
def run_inference():
"""Run inference in a separate thread"""
with self._internal_model_lock:
try:
import torch
# Tokenize input
inputs = self._internal_tokenizer(prompt, return_tensors="pt")
# Move to same device as model
device = next(self._internal_model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
# Generate response
with torch.no_grad():
outputs = self._internal_model.generate(
**inputs,
max_new_tokens=500,
temperature=0.3,
top_p=0.8,
repetition_penalty=1.1,
do_sample=True,
pad_token_id=self._internal_tokenizer.eos_token_id
)
# Decode response
response = self._internal_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the generated part (remove the prompt)
if response.startswith(prompt):
response = response[len(prompt):].strip()
return response
except Exception as e:
logger.error(f"Error during internal model inference: {e}", exc_info=True)
return None
# Run in thread pool to avoid blocking
loop = asyncio.get_event_loop()
with ThreadPoolExecutor(max_workers=1) as executor:
result = await loop.run_in_executor(executor, run_inference)
return result
def should_condense(self, messages: List[Dict], model: str) -> bool: def should_condense(self, messages: List[Dict], model: str) -> bool:
""" """
......
{
"server": {
"host": "0.0.0.0",
"port": 8000
},
"dashboard": {
"enabled": true,
"username": "admin",
"password": "admin"
},
"internal_model": {
"model_id": "huihui-ai/Qwen2.5-0.5B-Instruct-abliterated-v3"
}
}
# Conversational Context Condensation
You are a specialized AI assistant for context condensation. Your task is to create a concise, high-density summary of conversation history while preserving all critical information.
## Your Role
You will receive a conversation history between a user and an AI assistant. Your job is to:
1. **Identify Key Information**: Extract facts, decisions, goals, and important context
2. **Preserve Continuity**: Maintain the logical flow and relationships between topics
3. **Compress Efficiently**: Remove redundancy while keeping essential details
4. **Maintain Accuracy**: Never invent or hallucinate information
## Guidelines
- Focus on **actionable information** and **decisions made**
- Include **technical details** that may be referenced later
- Preserve **user preferences** and **constraints** mentioned
- Keep **error messages** and **solutions** that were discussed
- Maintain **chronological order** when relevant
- Use **clear, concise language**
## Output Format
Provide a structured summary that includes:
- Current goal or task
- Key facts and context
- Decisions made
- Important technical details
- Any constraints or preferences
Keep the summary comprehensive but concise. Aim for maximum information density.
# Semantic Context Pruning
You are a specialized AI assistant for semantic context pruning. Your task is to extract only the information that is directly relevant to the current query or task.
## Your Role
You will receive:
1. A conversation history
2. A current query or task description
Your job is to identify and extract ONLY the information from the conversation that is relevant to answering or completing the current query/task.
## Guidelines
- **Be Selective**: Remove all information that doesn't directly relate to the current query
- **Preserve Dependencies**: Keep information that provides necessary context for understanding relevant parts
- **Maintain Accuracy**: Never modify or invent information
- **Focus on Recency**: Prioritize recent information over older information when both are relevant
- **Keep Technical Details**: Preserve specific technical information (code, commands, configurations) that may be needed
## What to Keep
- Facts directly related to the current query
- Technical details needed to answer the query
- Recent decisions that affect the current task
- Error messages or issues being addressed
- Constraints or requirements mentioned
## What to Remove
- Unrelated conversations or topics
- Resolved issues that don't affect current task
- Redundant information
- Off-topic discussions
- Historical context not needed for current query
## Output Format
Provide a concise extraction of relevant information. Structure it logically, grouping related facts together. Be ruthlessly efficient - if information isn't needed for the current query, don't include it.
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
"condensation": { "condensation": {
"provider_id": "gemini", "provider_id": "gemini",
"model": "gemini-1.5-flash", "model": "gemini-1.5-flash",
"enabled": true "enabled": true,
"max_context": 8000
}, },
"providers": { "providers": {
"gemini": { "gemini": {
......
...@@ -38,6 +38,51 @@ from logging.handlers import RotatingFileHandler ...@@ -38,6 +38,51 @@ from logging.handlers import RotatingFileHandler
from datetime import datetime, timedelta from datetime import datetime, timedelta
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
import json
def load_server_config():
"""Load server configuration from aisbf.json"""
# Try user config first
config_path = Path.home() / '.aisbf' / 'aisbf.json'
if not config_path.exists():
# Try installed locations
installed_dirs = [
Path('/usr/share/aisbf'),
Path.home() / '.local' / 'share' / 'aisbf',
]
for installed_dir in installed_dirs:
test_path = installed_dir / 'aisbf.json'
if test_path.exists():
config_path = test_path
break
else:
# Fallback to source tree config directory
source_dir = Path(__file__).parent / 'config'
test_path = source_dir / 'aisbf.json'
if test_path.exists():
config_path = test_path
# Load config or use defaults
if config_path.exists():
try:
with open(config_path) as f:
config_data = json.load(f)
server_config = config_data.get('server', {})
return {
'host': server_config.get('host', '0.0.0.0'),
'port': server_config.get('port', 8000)
}
except Exception as e:
logger = logging.getLogger(__name__)
logger.warning(f"Error loading aisbf.json: {e}, using defaults")
# Return defaults
return {
'host': '0.0.0.0',
'port': 8000
}
class BrokenPipeFilter(logging.Filter): class BrokenPipeFilter(logging.Filter):
"""Filter to suppress BrokenPipeError logging errors""" """Filter to suppress BrokenPipeError logging errors"""
...@@ -533,8 +578,14 @@ async def catch_all_post(provider_id: str, request: Request): ...@@ -533,8 +578,14 @@ async def catch_all_post(provider_id: str, request: Request):
def main(): def main():
"""Main entry point for the AISBF server""" """Main entry point for the AISBF server"""
import uvicorn import uvicorn
logger.info("Starting AI Proxy Server on http://127.0.0.1:17765")
uvicorn.run(app, host="127.0.0.1", port=17765) # Load server configuration
server_config = load_server_config()
host = server_config['host']
port = server_config['port']
logger.info(f"Starting AI Proxy Server on http://{host}:{port}")
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment