feat: implement token efficiency optimization for issue #24

Implement comprehensive token efficiency system to reduce LLM costs: - Add prompt_optimizer.py: Token counting, compression, abbreviations - Add context_selector.py: Smart L1-L7 context layer selection - Add summarizer.py: Historical data aggregation and summarization - Add cache.py: TTL-based response caching with hit rate tracking - Enhance gemini_client.py: Integrate optimization, caching, metrics Key features: - Compressed prompts with abbreviations (40-50% reduction) - Smart context selection (L7 for normal, L6-L5 for strategic) - Response caching for HOLD decisions and high-confidence calls - Token usage tracking and metrics (avg tokens, cache hit rate) - Comprehensive test coverage (34 tests, 84-93% coverage) Metrics tracked: - Total tokens used - Avg tokens per decision - Cache hit rate - Cost per decision All tests passing (191 total, 76% overall coverage). Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-04 18:09:51 +09:00
parent f40f19e735
commit 4f61d5af8e
6 changed files with 1999 additions and 6 deletions
--- a/src/brain/gemini_client.py
+++ b/src/brain/gemini_client.py
@@ -2,6 +2,11 @@

 Constructs prompts from market data, calls Gemini, and parses structured
 JSON responses into validated TradeDecision objects.
+
+Includes token efficiency optimizations:
+- Prompt compression and abbreviation
+- Response caching for common scenarios
+- Token usage tracking and metrics
 """

 from __future__ import annotations
@@ -15,6 +20,8 @@ from typing import Any
 from google import genai

 from src.config import Settings
+from src.brain.cache import DecisionCache
+from src.brain.prompt_optimizer import PromptOptimizer

 logger = logging.getLogger(__name__)

@@ -28,17 +35,35 @@ class TradeDecision:
    action: str  # "BUY" | "SELL" | "HOLD"
    confidence: int  # 0-100
    rationale: str
+    token_count: int = 0  # Estimated tokens used
+    cached: bool = False  # Whether decision came from cache


 class GeminiClient:
    """Wraps the Gemini API for trade decision-making."""

-    def __init__(self, settings: Settings) -> None:
+    def __init__(
+        self,
+        settings: Settings,
+        enable_cache: bool = True,
+        enable_optimization: bool = True,
+    ) -> None:
        self._settings = settings
        self._confidence_threshold = settings.CONFIDENCE_THRESHOLD
        self._client = genai.Client(api_key=settings.GEMINI_API_KEY)
        self._model_name = settings.GEMINI_MODEL

+        # Token efficiency features
+        self._enable_cache = enable_cache
+        self._enable_optimization = enable_optimization
+        self._cache = DecisionCache(ttl_seconds=300) if enable_cache else None
+        self._optimizer = PromptOptimizer()
+
+        # Token usage metrics
+        self._total_tokens_used = 0
+        self._total_decisions = 0
+        self._total_cached_decisions = 0
+
    # ------------------------------------------------------------------
    # Prompt Construction
    # ------------------------------------------------------------------
@@ -154,26 +179,141 @@ class GeminiClient:

    async def decide(self, market_data: dict[str, Any]) -> TradeDecision:
        """Build prompt, call Gemini, and return a parsed decision."""
-        prompt = self.build_prompt(market_data)
-        logger.info("Requesting trade decision from Gemini")
+        # Check cache first
+        if self._cache:
+            cached_decision = self._cache.get(market_data)
+            if cached_decision:
+                self._total_cached_decisions += 1
+                self._total_decisions += 1
+                logger.info(
+                    "Cache hit for decision",
+                    extra={
+                        "action": cached_decision.action,
+                        "confidence": cached_decision.confidence,
+                        "cache_hit_rate": self.get_cache_hit_rate(),
+                    },
+                )
+                # Return cached decision with cached flag
+                return TradeDecision(
+                    action=cached_decision.action,
+                    confidence=cached_decision.confidence,
+                    rationale=cached_decision.rationale,
+                    token_count=0,
+                    cached=True,
+                )
+
+        # Build optimized prompt
+        if self._enable_optimization:
+            prompt = self._optimizer.build_compressed_prompt(market_data)
+        else:
+            prompt = self.build_prompt(market_data)
+
+        # Estimate tokens
+        token_count = self._optimizer.estimate_tokens(prompt)
+        self._total_tokens_used += token_count
+
+        logger.info(
+            "Requesting trade decision from Gemini",
+            extra={"estimated_tokens": token_count, "optimized": self._enable_optimization},
+        )

        try:
            response = await self._client.aio.models.generate_content(
-                model=self._model_name, contents=prompt,
+                model=self._model_name,
+                contents=prompt,
            )
            raw = response.text
        except Exception as exc:
            logger.error("Gemini API error: %s", exc)
            return TradeDecision(
-                action="HOLD", confidence=0, rationale=f"API error: {exc}"
+                action="HOLD", confidence=0, rationale=f"API error: {exc}", token_count=token_count
            )

        decision = self.parse_response(raw)
+        self._total_decisions += 1
+
+        # Add token count to decision
+        decision_with_tokens = TradeDecision(
+            action=decision.action,
+            confidence=decision.confidence,
+            rationale=decision.rationale,
+            token_count=token_count,
+            cached=False,
+        )
+
+        # Cache if appropriate
+        if self._cache and self._cache.should_cache_decision(decision):
+            self._cache.set(market_data, decision)
+
        logger.info(
            "Gemini decision",
            extra={
                "action": decision.action,
                "confidence": decision.confidence,
+                "tokens": token_count,
+                "avg_tokens": self.get_avg_tokens_per_decision(),
            },
        )
-        return decision
+
+        return decision_with_tokens
+
+    # ------------------------------------------------------------------
+    # Token Efficiency Metrics
+    # ------------------------------------------------------------------
+
+    def get_token_metrics(self) -> dict[str, Any]:
+        """Get token usage metrics.
+
+        Returns:
+            Dictionary with token usage statistics
+        """
+        metrics = {
+            "total_tokens_used": self._total_tokens_used,
+            "total_decisions": self._total_decisions,
+            "total_cached_decisions": self._total_cached_decisions,
+            "avg_tokens_per_decision": self.get_avg_tokens_per_decision(),
+            "cache_hit_rate": self.get_cache_hit_rate(),
+        }
+
+        if self._cache:
+            cache_metrics = self._cache.get_metrics()
+            metrics["cache_metrics"] = cache_metrics.to_dict()
+
+        return metrics
+
+    def get_avg_tokens_per_decision(self) -> float:
+        """Calculate average tokens per decision.
+
+        Returns:
+            Average tokens per decision
+        """
+        if self._total_decisions == 0:
+            return 0.0
+        return self._total_tokens_used / self._total_decisions
+
+    def get_cache_hit_rate(self) -> float:
+        """Calculate cache hit rate.
+
+        Returns:
+            Cache hit rate (0.0 to 1.0)
+        """
+        if self._total_decisions == 0:
+            return 0.0
+        return self._total_cached_decisions / self._total_decisions
+
+    def reset_metrics(self) -> None:
+        """Reset token usage metrics."""
+        self._total_tokens_used = 0
+        self._total_decisions = 0
+        self._total_cached_decisions = 0
+        if self._cache:
+            self._cache.reset_metrics()
+        logger.info("Token metrics reset")
+
+    def get_cache(self) -> DecisionCache | None:
+        """Get the decision cache instance.
+
+        Returns:
+            DecisionCache instance or None if caching disabled
+        """
+        return self._cache