Mini-App Framework Testing Guide¶

Testing Strategy¶

We'll test at 4 levels: 1. Unit Tests - Individual components (reducers, detectors) 2. Integration Tests - Components working together (EventStore + StateCoordinator) 3. End-to-End Tests - Full message flow via API 4. Manual Testing - Real conversations via Telegram/iMessage

1. Unit Tests¶

Test the MiniAppDetector¶

# test_miniapp_detector.py
import pytest
from app.orchestrator.miniapp_detector import MiniAppDetector

def test_bill_split_trigger():
    detector = MiniAppDetector()
    enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]

    # Should trigger
    assert detector.detect_trigger("let's split this bill", enabled_apps) == "bill_split"
    assert detector.detect_trigger("who owes what?", enabled_apps) == "bill_split"

    # Should NOT trigger
    assert detector.detect_trigger("how's the weather?", enabled_apps) is None

def test_todo_list_trigger():
    detector = MiniAppDetector()
    enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]

    assert detector.detect_trigger("add milk to my list", enabled_apps) == "todo_list"
    assert detector.detect_trigger("remind me to call mom", enabled_apps) == "todo_list"

def test_trip_planner_trigger():
    detector = MiniAppDetector()
    enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]

    assert detector.detect_trigger("plan a trip to Paris", enabled_apps) == "trip_planner"
    assert detector.detect_trigger("vacation planning", enabled_apps) == "trip_planner"

def test_confidence_scoring():
    detector = MiniAppDetector()

    # High confidence (exact phrase)
    assert detector.get_trigger_confidence("split this bill", "bill_split") >= 0.9

    # Medium confidence (regex)
    assert detector.get_trigger_confidence("who owes me money", "bill_split") >= 0.7

    # Low confidence (single keyword)
    assert detector.get_trigger_confidence("bill", "bill_split") < 0.7

Test the Reducers¶

# test_bill_split_reducer.py
from app.miniapps.apps.bill_split.reducer import bill_split_reducer, get_initial_state
from app.miniapps.apps.bill_split.schema import BillEventType

def test_initial_state():
    state = get_initial_state("bill_123")
    assert state["bill_id"] == "bill_123"
    assert state["version"] == 0
    assert state["items"] == []
    assert state["splits"] == {}

def test_bill_created_event():
    state = get_initial_state("bill_123")

    event = {
        "event_type": BillEventType.BILL_CREATED.value,
        "event_data": {
            "merchant_name": "Pizza Place",
            "total_amount": 45.50,
            "currency": "USD"
        },
        "actor_user_id": "user_1",
        "timestamp": "2025-11-17T12:00:00Z"
    }

    new_state = bill_split_reducer(state, event)

    assert new_state["merchant_name"] == "Pizza Place"
    assert new_state["total_amount"] == 45.50
    assert new_state["version"] == 1

def test_item_added_and_split_calculation():
    state = get_initial_state("bill_123")

    # Add participants
    state["participants"] = {
        "user_1": {"name": "Alice", "phone": "+1234"},
        "user_2": {"name": "Bob", "phone": "+5678"}
    }
    state["splits"] = {
        "user_1": {"user_id": "user_1", "user_name": "Alice", "total_owed": 0, "items_share": [], "paid": False},
        "user_2": {"user_id": "user_2", "user_name": "Bob", "total_owed": 0, "items_share": [], "paid": False}
    }

    # Add item shared by both
    event = {
        "event_type": BillEventType.ITEM_ADDED.value,
        "event_data": {
            "item_id": "item_1",
            "description": "Pizza",
            "amount": 20.0,
            "shared_by": ["user_1", "user_2"]
        },
        "actor_user_id": "user_1",
        "timestamp": "2025-11-17T12:00:00Z"
    }

    new_state = bill_split_reducer(state, event)

    # Each person should owe $10
    assert new_state["splits"]["user_1"]["total_owed"] == 10.0
    assert new_state["splits"]["user_2"]["total_owed"] == 10.0

def test_split_adjustment():
    # Test "I didn't have alcohol" scenario
    state = get_initial_state("bill_123")
    state["participants"] = {
        "user_1": {"name": "Alice"},
        "user_2": {"name": "Bob"}
    }
    state["items"] = [
        {"id": "item_1", "description": "Beer", "amount": 6.0, "category": "alcohol", "shared_by": ["user_1", "user_2"]}
    ]

    # Bob says "I didn't have alcohol"
    event = {
        "event_type": BillEventType.SPLIT_ADJUSTED.value,
        "event_data": {
            "adjustment_type": "exclude_category",
            "category": "alcohol",
            "user_id": "user_2"
        },
        "actor_user_id": "user_2",
        "timestamp": "2025-11-17T12:00:00Z"
    }

    new_state = bill_split_reducer(state, event)

    # Only Alice should be in shared_by now
    assert new_state["items"][0]["shared_by"] == ["user_1"]

# test_todo_list_reducer.py
from app.miniapps.apps.todo_list.reducer import todo_list_reducer, get_initial_state
from app.miniapps.apps.todo_list.schema import TodoEventType

def test_task_lifecycle():
    state = get_initial_state("list_123")

    # Add task
    add_event = {
        "event_type": TodoEventType.TASK_ADDED.value,
        "event_data": {
            "task_id": "task_1",
            "description": "Buy milk",
            "priority": "normal"
        },
        "actor_user_id": "user_1",
        "timestamp": "2025-11-17T12:00:00Z"
    }
    state = todo_list_reducer(state, add_event)
    assert len(state["tasks"]) == 1
    assert state["tasks"][0]["description"] == "Buy milk"
    assert state["tasks"][0]["status"] == "pending"

    # Complete task
    complete_event = {
        "event_type": TodoEventType.TASK_COMPLETED.value,
        "event_data": {"task_id": "task_1"},
        "actor_user_id": "user_1",
        "timestamp": "2025-11-17T13:00:00Z"
    }
    state = todo_list_reducer(state, complete_event)
    assert state["tasks"][0]["status"] == "completed"
    assert state["tasks"][0]["completed_by"] == "user_1"

Run Unit Tests¶

cd /Users/justin-genies/Code/archety
pytest tests/test_miniapp_detector.py -v
pytest tests/test_bill_split_reducer.py -v
pytest tests/test_todo_list_reducer.py -v

2. Integration Tests¶

Test EventStore + StateCoordinator¶

# test_miniapp_integration.py
import pytest
from uuid import uuid4
from sqlalchemy.orm import Session
from app.miniapps.event_store import EventStore
from app.miniapps.state_coordinator import StateCoordinator
from app.miniapps.apps.bill_split.schema import BillEventType

@pytest.fixture
def db_session():
    # Use test database
    from app.database.db import get_db
    db = next(get_db())
    yield db
    db.rollback()

def test_bill_split_event_replay(db_session: Session):
    """Test that state can be reconstructed from events"""
    room_id = uuid4()

    # Create EventStore and StateCoordinator
    event_store = EventStore(db_session)
    state_coord = StateCoordinator(db_session)

    # Append events
    event_store.append_event(
        room_id=room_id,
        event_type="bill_created",
        event_data={"merchant_name": "Restaurant", "total_amount": 50.0},
        actor_user_id=uuid4(),
        mini_app_id="bill_split"
    )

    event_store.append_event(
        room_id=room_id,
        event_type="item_added",
        event_data={
            "item_id": "item_1",
            "description": "Pizza",
            "amount": 30.0,
            "shared_by": ["user_1", "user_2"]
        },
        actor_user_id=uuid4(),
        mini_app_id="bill_split"
    )

    # Reconstruct state
    state = state_coord.get_current_state("bill_split", room_id)

    assert state["merchant_name"] == "Restaurant"
    assert len(state["items"]) == 1
    assert state["version"] == 2  # Two events applied

def test_concurrent_event_writes(db_session: Session):
    """Test that advisory locks prevent race conditions"""
    import threading
    room_id = uuid4()
    event_store = EventStore(db_session)

    def append_events():
        for i in range(10):
            event_store.append_event(
                room_id=room_id,
                event_type="task_added",
                event_data={"task_id": f"task_{i}"},
                mini_app_id="todo_list"
            )

    # Run 2 threads concurrently
    threads = [threading.Thread(target=append_events) for _ in range(2)]
    for t in threads:
        t.start()
    for t in threads:
        t.join()

    # Verify all events have unique sequence numbers
    events = event_store.get_events(room_id)
    sequences = [e.sequence_number for e in events]
    assert len(sequences) == len(set(sequences))  # All unique
    assert sequences == sorted(sequences)  # Sequential

3. End-to-End API Tests¶

Test Full Message Flow¶

# test_miniapp_e2e.py
import pytest
from fastapi.testclient import TestClient
from app.main import app

client = TestClient(app)

def test_bill_split_auto_trigger():
    """Test that 'split this bill' triggers bill split mini-app"""

    response = client.post("/orchestrator/message", json={
        "chat_guid": "test_chat_123",
        "mode": "group",
        "sender": "+15551234567",
        "text": "let's split this bill",
        "timestamp": 1700000000,
        "participants": ["+15551234567", "+15559876543"]
    })

    assert response.status_code == 200
    data = response.json()

    # Should trigger bill split and return confirmation
    assert "bill" in data["response_text"].lower() or "receipt" in data["response_text"].lower()

def test_todo_list_auto_trigger():
    """Test that 'add to list' triggers todo list mini-app"""

    response = client.post("/orchestrator/message", json={
        "chat_guid": "test_chat_456",
        "mode": "direct",
        "sender": "+15551234567",
        "text": "add milk to my list",
        "timestamp": 1700000000,
        "participants": ["+15551234567"]
    })

    assert response.status_code == 200
    data = response.json()

    # Should add task and confirm
    assert "added" in data["response_text"].lower() or "list" in data["response_text"].lower()

def test_no_trigger_for_normal_conversation():
    """Test that normal conversation doesn't trigger mini-apps"""

    response = client.post("/orchestrator/message", json={
        "chat_guid": "test_chat_789",
        "mode": "direct",
        "sender": "+15551234567",
        "text": "how's the weather?",
        "timestamp": 1700000000,
        "participants": ["+15551234567"]
    })

    assert response.status_code == 200
    # Should get normal Sage response, not mini-app trigger

Run E2E Tests¶

pytest tests/test_miniapp_e2e.py -v

4. Manual Testing¶

Option A: Via Telegram Bot (Easiest)¶

Setup:

# Make sure Telegram bot is configured
# Backend should already be running on Railway or locally

Test Script: 1. Test Bill Split:

You: "let's split this $45 dinner bill"
Bot: "got it! I'll help you split this bill. upload a receipt or tell me the total?"

You: "the total is $45"
Bot: [Creates bill split room, shows breakdown]

Test Todo List:

You: "add eggs to my shopping list"
Bot: "✅ added to your list! anything else?"

You: "also add bread"
Bot: "✅ added to your list! anything else?"

You: "show my list"
Bot: [Shows all tasks]

Test Trip Planner:

You: "plan a trip to Tokyo"
Bot: "awesome! where are we planning to go?"

You: "Tokyo, Japan"
Bot: "nice choice! when were you thinking?"

Option B: Direct API Testing (Postman/curl)¶

Test Auto-Trigger:

curl -X POST http://localhost:8000/orchestrator/message \
  -H "Content-Type: application/json" \
  -d '{
    "chat_guid": "test_123",
    "mode": "direct",
    "sender": "+15551234567",
    "text": "split this bill",
    "timestamp": 1700000000,
    "participants": ["+15551234567"]
  }'

Expected Response:

{
  "response_text": "got it! I'll help you split this bill. upload a receipt or tell me the total?",
  "chat_guid": "test_123",
  "mode": "direct"
}

Option C: Python Test Script¶

# manual_test.py
import requests

BASE_URL = "http://localhost:8000"

def test_bill_split():
    print("\n=== Testing Bill Split ===")

    response = requests.post(f"{BASE_URL}/orchestrator/message", json={
        "chat_guid": "manual_test_1",
        "mode": "group",
        "sender": "+15551234567",
        "text": "let's split this bill",
        "timestamp": 1700000000,
        "participants": ["+15551234567", "+15559999999"]
    })

    print(f"Status: {response.status_code}")
    print(f"Response: {response.json()}")

def test_todo_list():
    print("\n=== Testing Todo List ===")

    response = requests.post(f"{BASE_URL}/orchestrator/message", json={
        "chat_guid": "manual_test_2",
        "mode": "direct",
        "sender": "+15551234567",
        "text": "add milk to my list",
        "timestamp": 1700000000,
        "participants": ["+15551234567"]
    })

    print(f"Status: {response.status_code}")
    print(f"Response: {response.json()}")

def test_normal_conversation():
    print("\n=== Testing Normal Conversation (No Trigger) ===")

    response = requests.post(f"{BASE_URL}/orchestrator/message", json={
        "chat_guid": "manual_test_3",
        "mode": "direct",
        "sender": "+15551234567",
        "text": "how are you doing today?",
        "timestamp": 1700000000,
        "participants": ["+15551234567"]
    })

    print(f"Status: {response.status_code}")
    print(f"Response: {response.json()}")

if __name__ == "__main__":
    test_bill_split()
    test_todo_list()
    test_normal_conversation()

Run:

python manual_test.py

5. Receipt Analyzer Testing¶

Test Vision API Integration¶

# test_receipt_analyzer.py
from app.miniapps.apps.bill_split.receipt_analyzer import get_receipt_analyzer
import base64

def test_receipt_analysis():
    analyzer = get_receipt_analyzer()

    # Use a sample receipt image (you'd need to provide one)
    with open("test_fixtures/sample_receipt.jpg", "rb") as f:
        image_data = f.read()

    result = analyzer.analyze_receipt(image_data)

    assert "items" in result
    assert "total_amount" in result
    assert "merchant_name" in result
    assert len(result["items"]) > 0

    print(f"Merchant: {result['merchant_name']}")
    print(f"Total: ${result['total_amount']}")
    print(f"Items: {len(result['items'])}")

Quick Start Testing¶

Fastest Way to Test Everything:¶

Start Backend:

./run.sh
# or
uvicorn app.main:app --reload

Run Unit Tests:
```
pytest tests/ -v -k miniapp
```
Test via Telegram:
Message bot: "split this bill"
Message bot: "add milk to my list"
Message bot: "plan a trip to Paris"

Check Logs:

tail -f logs/app.log | grep "Mini-app trigger"

Expected Results¶

✅ Success Indicators:¶

Unit Tests: - All detector tests pass - All reducer tests pass - State reconstruction works

Integration Tests: - Events stored correctly - State rebuilds from events - Concurrent writes are safe

E2E Tests: - Auto-trigger works - Correct mini-app selected - Normal conversation unaffected

Manual Testing: - Natural language triggers work - User gets appropriate response - Multi-user rooms work

❌ Common Issues:¶

Trigger Not Detected:
Check MiniAppDetector patterns
Verify user has app enabled
Check logs for detection attempts
State Not Updating:
Verify reducer is registered
Check EventStore for events
Verify sequence numbers
Vision API Fails:
Check OpenAI API key
Verify image size < 20MB
Check model name (should be gpt-5)

Continuous Testing¶

CI/CD Integration:¶

# .github/workflows/test-miniapps.yml
name: Test Mini-Apps

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions/setup-python@v2
        with:
          python-version: '3.11'
      - run: pip install -r requirements.txt
      - run: pytest tests/test_miniapp*.py -v

Performance Testing¶

Load Test:¶

# test_miniapp_performance.py
import asyncio
import time
from app.orchestrator.miniapp_detector import get_detector

async def benchmark_detection():
    detector = get_detector()
    enabled_apps = ["bill_split", "trip_planner", "todo_list", "poll"]

    messages = [
        "split this bill",
        "add to list",
        "plan a trip",
        "create a poll",
        "normal conversation"
    ] * 1000

    start = time.time()
    for msg in messages:
        detector.detect_trigger(msg, enabled_apps)
    end = time.time()

    total_time = end - start
    avg_time = total_time / len(messages)

    print(f"Total: {total_time:.2f}s")
    print(f"Average: {avg_time*1000:.2f}ms per detection")
    assert avg_time < 0.001  # Should be < 1ms

asyncio.run(benchmark_detection())

Expected: <1ms per detection ✅

Summary¶

Testing Priorities: 1. ✅ Unit tests - Quick feedback, test logic 2. ✅ Manual Telegram - Real UX validation 3. ✅ E2E API - Integration verification 4. ⏩ Performance - Optional optimization

Quick Validation:

# 1. Run tests
pytest tests/test_miniapp*.py

# 2. Start server
./run.sh

# 3. Test via Telegram
# Send: "split this bill"
# Should get: Bill split confirmation

# 4. Check it worked
curl http://localhost:8000/health

That's it! The framework is ready to test. 🚀