Mini-App Framework Testing Guide¶
Testing Strategy¶
We'll test at 4 levels: 1. Unit Tests - Individual components (reducers, detectors) 2. Integration Tests - Components working together (EventStore + StateCoordinator) 3. End-to-End Tests - Full message flow via API 4. Manual Testing - Real conversations via Telegram/iMessage
1. Unit Tests¶
Test the MiniAppDetector¶
# test_miniapp_detector.py
import pytest
from app.orchestrator.miniapp_detector import MiniAppDetector
def test_bill_split_trigger():
detector = MiniAppDetector()
enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]
# Should trigger
assert detector.detect_trigger("let's split this bill", enabled_apps) == "bill_split"
assert detector.detect_trigger("who owes what?", enabled_apps) == "bill_split"
# Should NOT trigger
assert detector.detect_trigger("how's the weather?", enabled_apps) is None
def test_todo_list_trigger():
detector = MiniAppDetector()
enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]
assert detector.detect_trigger("add milk to my list", enabled_apps) == "todo_list"
assert detector.detect_trigger("remind me to call mom", enabled_apps) == "todo_list"
def test_trip_planner_trigger():
detector = MiniAppDetector()
enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]
assert detector.detect_trigger("plan a trip to Paris", enabled_apps) == "trip_planner"
assert detector.detect_trigger("vacation planning", enabled_apps) == "trip_planner"
def test_confidence_scoring():
detector = MiniAppDetector()
# High confidence (exact phrase)
assert detector.get_trigger_confidence("split this bill", "bill_split") >= 0.9
# Medium confidence (regex)
assert detector.get_trigger_confidence("who owes me money", "bill_split") >= 0.7
# Low confidence (single keyword)
assert detector.get_trigger_confidence("bill", "bill_split") < 0.7
Test the Reducers¶
# test_bill_split_reducer.py
from app.miniapps.apps.bill_split.reducer import bill_split_reducer, get_initial_state
from app.miniapps.apps.bill_split.schema import BillEventType
def test_initial_state():
state = get_initial_state("bill_123")
assert state["bill_id"] == "bill_123"
assert state["version"] == 0
assert state["items"] == []
assert state["splits"] == {}
def test_bill_created_event():
state = get_initial_state("bill_123")
event = {
"event_type": BillEventType.BILL_CREATED.value,
"event_data": {
"merchant_name": "Pizza Place",
"total_amount": 45.50,
"currency": "USD"
},
"actor_user_id": "user_1",
"timestamp": "2025-11-17T12:00:00Z"
}
new_state = bill_split_reducer(state, event)
assert new_state["merchant_name"] == "Pizza Place"
assert new_state["total_amount"] == 45.50
assert new_state["version"] == 1
def test_item_added_and_split_calculation():
state = get_initial_state("bill_123")
# Add participants
state["participants"] = {
"user_1": {"name": "Alice", "phone": "+1234"},
"user_2": {"name": "Bob", "phone": "+5678"}
}
state["splits"] = {
"user_1": {"user_id": "user_1", "user_name": "Alice", "total_owed": 0, "items_share": [], "paid": False},
"user_2": {"user_id": "user_2", "user_name": "Bob", "total_owed": 0, "items_share": [], "paid": False}
}
# Add item shared by both
event = {
"event_type": BillEventType.ITEM_ADDED.value,
"event_data": {
"item_id": "item_1",
"description": "Pizza",
"amount": 20.0,
"shared_by": ["user_1", "user_2"]
},
"actor_user_id": "user_1",
"timestamp": "2025-11-17T12:00:00Z"
}
new_state = bill_split_reducer(state, event)
# Each person should owe $10
assert new_state["splits"]["user_1"]["total_owed"] == 10.0
assert new_state["splits"]["user_2"]["total_owed"] == 10.0
def test_split_adjustment():
# Test "I didn't have alcohol" scenario
state = get_initial_state("bill_123")
state["participants"] = {
"user_1": {"name": "Alice"},
"user_2": {"name": "Bob"}
}
state["items"] = [
{"id": "item_1", "description": "Beer", "amount": 6.0, "category": "alcohol", "shared_by": ["user_1", "user_2"]}
]
# Bob says "I didn't have alcohol"
event = {
"event_type": BillEventType.SPLIT_ADJUSTED.value,
"event_data": {
"adjustment_type": "exclude_category",
"category": "alcohol",
"user_id": "user_2"
},
"actor_user_id": "user_2",
"timestamp": "2025-11-17T12:00:00Z"
}
new_state = bill_split_reducer(state, event)
# Only Alice should be in shared_by now
assert new_state["items"][0]["shared_by"] == ["user_1"]
# test_todo_list_reducer.py
from app.miniapps.apps.todo_list.reducer import todo_list_reducer, get_initial_state
from app.miniapps.apps.todo_list.schema import TodoEventType
def test_task_lifecycle():
state = get_initial_state("list_123")
# Add task
add_event = {
"event_type": TodoEventType.TASK_ADDED.value,
"event_data": {
"task_id": "task_1",
"description": "Buy milk",
"priority": "normal"
},
"actor_user_id": "user_1",
"timestamp": "2025-11-17T12:00:00Z"
}
state = todo_list_reducer(state, add_event)
assert len(state["tasks"]) == 1
assert state["tasks"][0]["description"] == "Buy milk"
assert state["tasks"][0]["status"] == "pending"
# Complete task
complete_event = {
"event_type": TodoEventType.TASK_COMPLETED.value,
"event_data": {"task_id": "task_1"},
"actor_user_id": "user_1",
"timestamp": "2025-11-17T13:00:00Z"
}
state = todo_list_reducer(state, complete_event)
assert state["tasks"][0]["status"] == "completed"
assert state["tasks"][0]["completed_by"] == "user_1"
Run Unit Tests¶
cd /Users/justin-genies/Code/archety
pytest tests/test_miniapp_detector.py -v
pytest tests/test_bill_split_reducer.py -v
pytest tests/test_todo_list_reducer.py -v
2. Integration Tests¶
Test EventStore + StateCoordinator¶
# test_miniapp_integration.py
import pytest
from uuid import uuid4
from sqlalchemy.orm import Session
from app.miniapps.event_store import EventStore
from app.miniapps.state_coordinator import StateCoordinator
from app.miniapps.apps.bill_split.schema import BillEventType
@pytest.fixture
def db_session():
# Use test database
from app.database.db import get_db
db = next(get_db())
yield db
db.rollback()
def test_bill_split_event_replay(db_session: Session):
"""Test that state can be reconstructed from events"""
room_id = uuid4()
# Create EventStore and StateCoordinator
event_store = EventStore(db_session)
state_coord = StateCoordinator(db_session)
# Append events
event_store.append_event(
room_id=room_id,
event_type="bill_created",
event_data={"merchant_name": "Restaurant", "total_amount": 50.0},
actor_user_id=uuid4(),
mini_app_id="bill_split"
)
event_store.append_event(
room_id=room_id,
event_type="item_added",
event_data={
"item_id": "item_1",
"description": "Pizza",
"amount": 30.0,
"shared_by": ["user_1", "user_2"]
},
actor_user_id=uuid4(),
mini_app_id="bill_split"
)
# Reconstruct state
state = state_coord.get_current_state("bill_split", room_id)
assert state["merchant_name"] == "Restaurant"
assert len(state["items"]) == 1
assert state["version"] == 2 # Two events applied
def test_concurrent_event_writes(db_session: Session):
"""Test that advisory locks prevent race conditions"""
import threading
room_id = uuid4()
event_store = EventStore(db_session)
def append_events():
for i in range(10):
event_store.append_event(
room_id=room_id,
event_type="task_added",
event_data={"task_id": f"task_{i}"},
mini_app_id="todo_list"
)
# Run 2 threads concurrently
threads = [threading.Thread(target=append_events) for _ in range(2)]
for t in threads:
t.start()
for t in threads:
t.join()
# Verify all events have unique sequence numbers
events = event_store.get_events(room_id)
sequences = [e.sequence_number for e in events]
assert len(sequences) == len(set(sequences)) # All unique
assert sequences == sorted(sequences) # Sequential
3. End-to-End API Tests¶
Test Full Message Flow¶
# test_miniapp_e2e.py
import pytest
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_bill_split_auto_trigger():
"""Test that 'split this bill' triggers bill split mini-app"""
response = client.post("/orchestrator/message", json={
"chat_guid": "test_chat_123",
"mode": "group",
"sender": "+15551234567",
"text": "let's split this bill",
"timestamp": 1700000000,
"participants": ["+15551234567", "+15559876543"]
})
assert response.status_code == 200
data = response.json()
# Should trigger bill split and return confirmation
assert "bill" in data["response_text"].lower() or "receipt" in data["response_text"].lower()
def test_todo_list_auto_trigger():
"""Test that 'add to list' triggers todo list mini-app"""
response = client.post("/orchestrator/message", json={
"chat_guid": "test_chat_456",
"mode": "direct",
"sender": "+15551234567",
"text": "add milk to my list",
"timestamp": 1700000000,
"participants": ["+15551234567"]
})
assert response.status_code == 200
data = response.json()
# Should add task and confirm
assert "added" in data["response_text"].lower() or "list" in data["response_text"].lower()
def test_no_trigger_for_normal_conversation():
"""Test that normal conversation doesn't trigger mini-apps"""
response = client.post("/orchestrator/message", json={
"chat_guid": "test_chat_789",
"mode": "direct",
"sender": "+15551234567",
"text": "how's the weather?",
"timestamp": 1700000000,
"participants": ["+15551234567"]
})
assert response.status_code == 200
# Should get normal Sage response, not mini-app trigger
Run E2E Tests¶
4. Manual Testing¶
Option A: Via Telegram Bot (Easiest)¶
Setup:
Test Script: 1. Test Bill Split:
You: "let's split this $45 dinner bill"
Bot: "got it! I'll help you split this bill. upload a receipt or tell me the total?"
You: "the total is $45"
Bot: [Creates bill split room, shows breakdown]
-
Test Todo List:
-
Test Trip Planner:
Option B: Direct API Testing (Postman/curl)¶
Test Auto-Trigger:
curl -X POST http://localhost:8000/orchestrator/message \
-H "Content-Type: application/json" \
-d '{
"chat_guid": "test_123",
"mode": "direct",
"sender": "+15551234567",
"text": "split this bill",
"timestamp": 1700000000,
"participants": ["+15551234567"]
}'
Expected Response:
{
"response_text": "got it! I'll help you split this bill. upload a receipt or tell me the total?",
"chat_guid": "test_123",
"mode": "direct"
}
Option C: Python Test Script¶
# manual_test.py
import requests
BASE_URL = "http://localhost:8000"
def test_bill_split():
print("\n=== Testing Bill Split ===")
response = requests.post(f"{BASE_URL}/orchestrator/message", json={
"chat_guid": "manual_test_1",
"mode": "group",
"sender": "+15551234567",
"text": "let's split this bill",
"timestamp": 1700000000,
"participants": ["+15551234567", "+15559999999"]
})
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
def test_todo_list():
print("\n=== Testing Todo List ===")
response = requests.post(f"{BASE_URL}/orchestrator/message", json={
"chat_guid": "manual_test_2",
"mode": "direct",
"sender": "+15551234567",
"text": "add milk to my list",
"timestamp": 1700000000,
"participants": ["+15551234567"]
})
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
def test_normal_conversation():
print("\n=== Testing Normal Conversation (No Trigger) ===")
response = requests.post(f"{BASE_URL}/orchestrator/message", json={
"chat_guid": "manual_test_3",
"mode": "direct",
"sender": "+15551234567",
"text": "how are you doing today?",
"timestamp": 1700000000,
"participants": ["+15551234567"]
})
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
if __name__ == "__main__":
test_bill_split()
test_todo_list()
test_normal_conversation()
Run:
5. Receipt Analyzer Testing¶
Test Vision API Integration¶
# test_receipt_analyzer.py
from app.miniapps.apps.bill_split.receipt_analyzer import get_receipt_analyzer
import base64
def test_receipt_analysis():
analyzer = get_receipt_analyzer()
# Use a sample receipt image (you'd need to provide one)
with open("test_fixtures/sample_receipt.jpg", "rb") as f:
image_data = f.read()
result = analyzer.analyze_receipt(image_data)
assert "items" in result
assert "total_amount" in result
assert "merchant_name" in result
assert len(result["items"]) > 0
print(f"Merchant: {result['merchant_name']}")
print(f"Total: ${result['total_amount']}")
print(f"Items: {len(result['items'])}")
Quick Start Testing¶
Fastest Way to Test Everything:¶
-
Start Backend:
-
Run Unit Tests:
-
Test via Telegram:
- Message bot: "split this bill"
- Message bot: "add milk to my list"
-
Message bot: "plan a trip to Paris"
-
Check Logs:
Expected Results¶
✅ Success Indicators:¶
Unit Tests: - All detector tests pass - All reducer tests pass - State reconstruction works
Integration Tests: - Events stored correctly - State rebuilds from events - Concurrent writes are safe
E2E Tests: - Auto-trigger works - Correct mini-app selected - Normal conversation unaffected
Manual Testing: - Natural language triggers work - User gets appropriate response - Multi-user rooms work
❌ Common Issues:¶
- Trigger Not Detected:
- Check MiniAppDetector patterns
- Verify user has app enabled
-
Check logs for detection attempts
-
State Not Updating:
- Verify reducer is registered
- Check EventStore for events
-
Verify sequence numbers
-
Vision API Fails:
- Check OpenAI API key
- Verify image size < 20MB
- Check model name (should be gpt-5)
Continuous Testing¶
CI/CD Integration:¶
# .github/workflows/test-miniapps.yml
name: Test Mini-Apps
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.11'
- run: pip install -r requirements.txt
- run: pytest tests/test_miniapp*.py -v
Performance Testing¶
Load Test:¶
# test_miniapp_performance.py
import asyncio
import time
from app.orchestrator.miniapp_detector import get_detector
async def benchmark_detection():
detector = get_detector()
enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]
messages = [
"split this bill",
"add to list",
"plan a trip",
"create a poll",
"normal conversation"
] * 1000
start = time.time()
for msg in messages:
detector.detect_trigger(msg, enabled_apps)
end = time.time()
total_time = end - start
avg_time = total_time / len(messages)
print(f"Total: {total_time:.2f}s")
print(f"Average: {avg_time*1000:.2f}ms per detection")
assert avg_time < 0.001 # Should be < 1ms
asyncio.run(benchmark_detection())
Expected: <1ms per detection ✅
Summary¶
Testing Priorities: 1. ✅ Unit tests - Quick feedback, test logic 2. ✅ Manual Telegram - Real UX validation 3. ✅ E2E API - Integration verification 4. ⏩ Performance - Optional optimization
Quick Validation:
# 1. Run tests
pytest tests/test_miniapp*.py
# 2. Start server
./run.sh
# 3. Test via Telegram
# Send: "split this bill"
# Should get: Bill split confirmation
# 4. Check it worked
curl http://localhost:8000/health
That's it! The framework is ready to test. 🚀