Keywords AI Data Export Guide¶
This guide explains how to export full chat requests and responses from Keywords AI for compliance, analysis, and debugging.
Export Methods¶
1. Dashboard Export (Manual)¶
Via Web Interface: 1. Log into https://platform.keywordsai.co 2. Navigate to Logs section 3. Apply filters (date range, user_id, model, etc.) 4. Click Export button 5. Select format: CSV or JSON 6. Download file
Pros: - Quick and easy - No code required - Good for ad-hoc exports
Cons: - Manual process - Not suitable for automation - Limited to dashboard filters
2. API Export (Automated)¶
Using Keywords AI SDK:
import os
from keywordsai_sdk import KeywordsAI
from datetime import datetime, timedelta
import json
# Initialize client
client = KeywordsAI(api_key=os.getenv("KEYWORDSAI_API_KEY"))
# Define date range
end_date = datetime.now()
start_date = end_date - timedelta(days=7) # Last 7 days
# Fetch logs
logs = client.logs.list(
start_date=start_date.isoformat(),
end_date=end_date.isoformat(),
limit=1000 # Max per request
)
# Save to JSON
with open("chat_logs_export.json", "w") as f:
json.dump([log.dict() for log in logs], f, indent=2)
print(f"Exported {len(logs)} logs to chat_logs_export.json")
Paginated Export (for large datasets):
def export_all_logs(start_date, end_date, output_file):
"""Export all logs with pagination."""
client = KeywordsAI(api_key=os.getenv("KEYWORDSAI_API_KEY"))
all_logs = []
offset = 0
limit = 1000
while True:
logs = client.logs.list(
start_date=start_date,
end_date=end_date,
limit=limit,
offset=offset
)
if not logs:
break
all_logs.extend(logs)
offset += limit
print(f"Fetched {len(all_logs)} logs so far...")
# Save to file
with open(output_file, "w") as f:
json.dump([log.dict() for log in all_logs], f, indent=2)
print(f"✅ Exported {len(all_logs)} total logs to {output_file}")
return all_logs
# Usage
export_all_logs("2025-01-01", "2025-01-31", "january_logs.json")
Filter by User:
# Export logs for specific user
logs = client.logs.list(
user_id="user_123",
start_date="2025-01-01",
end_date="2025-01-31"
)
Filter by Model:
# Export only GPT-5 logs
logs = client.logs.list(
model="gpt-5",
start_date="2025-01-01",
end_date="2025-01-31"
)
3. Scheduled Exports (Automated)¶
Daily Export Script:
#!/usr/bin/env python3
"""
Daily log export script for Keywords AI.
Run via cron: 0 2 * * * /path/to/export_daily_logs.py
"""
import os
from keywordsai_sdk import KeywordsAI
from datetime import datetime, timedelta
import json
from pathlib import Path
def export_daily_logs():
"""Export yesterday's logs to local storage."""
client = KeywordsAI(api_key=os.getenv("KEYWORDSAI_API_KEY"))
# Export yesterday's logs
yesterday = datetime.now() - timedelta(days=1)
start_date = yesterday.replace(hour=0, minute=0, second=0)
end_date = yesterday.replace(hour=23, minute=59, second=59)
print(f"Exporting logs from {start_date} to {end_date}...")
# Fetch logs
all_logs = []
offset = 0
limit = 1000
while True:
logs = client.logs.list(
start_date=start_date.isoformat(),
end_date=end_date.isoformat(),
limit=limit,
offset=offset
)
if not logs:
break
all_logs.extend(logs)
offset += limit
# Create export directory
export_dir = Path("exports/keywords_ai")
export_dir.mkdir(parents=True, exist_ok=True)
# Save to dated file
filename = f"logs_{yesterday.strftime('%Y-%m-%d')}.json"
filepath = export_dir / filename
with open(filepath, "w") as f:
json.dump([log.dict() for log in all_logs], f, indent=2)
print(f"✅ Exported {len(all_logs)} logs to {filepath}")
# Also save a summary CSV
csv_file = export_dir / f"summary_{yesterday.strftime('%Y-%m-%d')}.csv"
with open(csv_file, "w") as f:
f.write("timestamp,user_id,model,prompt_tokens,completion_tokens,cost_usd,latency_ms\n")
for log in all_logs:
f.write(f"{log.timestamp},{log.user_id},{log.model},{log.prompt_tokens},{log.completion_tokens},{log.cost},{log.latency_ms}\n")
print(f"✅ Saved summary to {csv_file}")
if __name__ == "__main__":
export_daily_logs()
Cron Setup:
# Add to crontab (run daily at 2 AM)
0 2 * * * cd /path/to/archety && python scripts/export_daily_logs.py
4. Real-Time Streaming (Advanced)¶
Stream logs to local database:
from keywordsai_sdk import KeywordsAI
import time
from datetime import datetime
from sqlalchemy import create_engine, Column, String, Integer, Float, DateTime, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class LLMLog(Base):
"""Local database table for LLM logs."""
__tablename__ = "llm_logs"
id = Column(String, primary_key=True)
timestamp = Column(DateTime, nullable=False)
user_id = Column(String, index=True)
model = Column(String)
prompt = Column(Text)
completion = Column(Text)
prompt_tokens = Column(Integer)
completion_tokens = Column(Integer)
total_tokens = Column(Integer)
cost_usd = Column(Float)
latency_ms = Column(Integer)
# Create local database
engine = create_engine("sqlite:///llm_logs.db")
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
def sync_logs_to_local_db():
"""Continuously sync Keywords AI logs to local database."""
client = KeywordsAI(api_key=os.getenv("KEYWORDSAI_API_KEY"))
session = Session()
last_sync = datetime.now() - timedelta(minutes=5)
while True:
try:
# Fetch new logs since last sync
logs = client.logs.list(
start_date=last_sync.isoformat(),
limit=100
)
for log in logs:
# Check if already exists
existing = session.query(LLMLog).filter_by(id=log.id).first()
if not existing:
# Insert new log
db_log = LLMLog(
id=log.id,
timestamp=log.timestamp,
user_id=log.user_id,
model=log.model,
prompt=log.prompt_messages,
completion=log.completion_message,
prompt_tokens=log.prompt_tokens,
completion_tokens=log.completion_tokens,
total_tokens=log.total_tokens,
cost_usd=log.cost,
latency_ms=log.latency_ms
)
session.add(db_log)
session.commit()
print(f"Synced {len(logs)} new logs")
# Update last sync time
last_sync = datetime.now()
# Wait before next sync
time.sleep(60) # Sync every minute
except Exception as e:
print(f"Error syncing logs: {e}")
session.rollback()
time.sleep(60)
# Run as background service
if __name__ == "__main__":
sync_logs_to_local_db()
Data Structure¶
Each exported log contains:
{
"id": "log_abc123",
"timestamp": "2025-01-14T10:30:45.123Z",
"user_id": "user_xyz",
"model": "gpt-5",
"prompt_messages": [
{
"role": "system",
"content": "You are a helpful assistant named Sage..."
},
{
"role": "user",
"content": "What's the weather like today?"
}
],
"completion_message": {
"role": "assistant",
"content": "I don't have real-time weather data..."
},
"prompt_tokens": 150,
"completion_tokens": 75,
"total_tokens": 225,
"cost": 0.001125,
"latency_ms": 1250,
"finish_reason": "stop",
"metadata": {
"persona_id": "sage",
"chat_guid": "imessage:abc123"
}
}
Use Cases¶
1. Compliance & Audit¶
Export all conversations for a specific user:
# GDPR data export request
user_logs = client.logs.list(
user_id="user_requesting_data",
start_date="2024-01-01", # Account creation date
end_date=datetime.now().isoformat()
)
# Save to file for user
with open(f"user_data_export_{user_id}.json", "w") as f:
json.dump([log.dict() for log in user_logs], f, indent=2)
2. Quality Analysis¶
Export failed/refused requests:
# Get logs with errors
logs = client.logs.list(
start_date="2025-01-01",
end_date="2025-01-31"
)
failed_logs = [log for log in logs if log.finish_reason in ["error", "refused"]]
print(f"Found {len(failed_logs)} failed requests")
for log in failed_logs:
print(f"User: {log.user_id}, Error: {log.error_message}")
3. Cost Analysis¶
Calculate cost by feature:
from collections import defaultdict
logs = client.logs.list(start_date="2025-01-01", end_date="2025-01-31")
cost_by_model = defaultdict(float)
for log in logs:
cost_by_model[log.model] += log.cost
print("Cost breakdown by model:")
for model, cost in cost_by_model.items():
print(f" {model}: ${cost:.4f}")
4. Performance Monitoring¶
Find slow requests:
logs = client.logs.list(start_date="2025-01-01", end_date="2025-01-31")
slow_requests = [log for log in logs if log.latency_ms > 5000] # >5 seconds
print(f"Found {len(slow_requests)} slow requests (>5s)")
for log in slow_requests:
print(f" {log.timestamp}: {log.latency_ms}ms - User: {log.user_id}")
Best Practices¶
1. Regular Exports¶
- Export daily to avoid data loss
- Keywords AI retains logs for 30-90 days depending on plan
- Store exports in secure backup location
2. Data Security¶
- Encrypt exports at rest
- Use secure transfer methods (SFTP, S3 with encryption)
- Implement access controls
3. Privacy Compliance¶
- Implement automated data deletion after retention period
- Support user data export requests (GDPR)
- Redact PII if required
4. Monitoring¶
- Set up alerts for export failures
- Monitor export job completion
- Track export file sizes for anomalies
Troubleshooting¶
"Rate limit exceeded"¶
# Add delay between API calls
import time
for page in range(0, total_pages):
logs = client.logs.list(offset=page * 1000, limit=1000)
time.sleep(1) # 1 second delay
"Authentication failed"¶
# Verify API key
import os
api_key = os.getenv("KEYWORDSAI_API_KEY")
if not api_key:
raise ValueError("KEYWORDSAI_API_KEY not set in environment")
"Empty response"¶
# Check date range
logs = client.logs.list(
start_date="2025-01-14T00:00:00Z",
end_date="2025-01-14T23:59:59Z"
)
if not logs:
print("No logs found in date range - check your filters")
Support¶
For export issues: - Keywords AI Docs: https://docs.keywordsai.co - Support: support@keywordsai.co - Status Page: https://status.keywordsai.co
Last Updated: 2025-01-14