11model_list :
2+ # Primary: Tier 1 project
23 - model_name : gemini-2.5-flash
34 litellm_params :
45 model : gemini/gemini-2.5-flash
5- api_key : os.environ/GEMINI_API_KEY
6+ api_key : os.environ/GEMINI_API_KEY # Tier 1 key
7+
8+ # Fallback: Free Tier project (aliased so LiteLLM doesn’t confuse it)
69 - model_name : gemini-2.5-flash-fallback
710 litellm_params :
8- model : gemini/gemini-2.5-flash
9- api_key : os.environ/GEMINI_API_KEY_FALLBACK_1
11+ model : gemini/gemini-2.5-flash@fallback
12+ api_key : os.environ/GEMINI_API_KEY_FALLBACK_1 # Free Tier key
13+
14+ # Last resort: flash-lite on Tier 1
1015 - model_name : gemini-2.5-flash-lite
1116 litellm_params :
1217 model : gemini/gemini-2.5-flash-lite
13- api_key : os.environ/GEMINI_API_KEY
14- - model_name : gemini-2.5-flash-lite
18+ api_key : os.environ/GEMINI_API_KEY # Tier 1 key
19+
20+ # Optional: flash-lite on Free Tier (extra cushion)
21+ - model_name : gemini-2.5-flash-lite-fallback
1522 litellm_params :
16- model : gemini/gemini-2.5-flash-lite
23+ model : gemini/gemini-2.5-flash-lite@fallback
1724 api_key : os.environ/GEMINI_API_KEY_FALLBACK_1
1825
1926litellm_settings :
2027 drop_params : true
21- fallbacks : [{"gemini-2.5-flash": ["gemini-2.5-flash-lite","gemini-2.5-flash-fallback"]}]
28+ pre_call_checks : true # check quota before routing
29+ num_retries : 3 # retry call up to 3 times per model before moving on
30+ request_timeout : 10 # raise error if request takes >10s
31+ allowed_fails : 3 # cooldown model if >3 fails/min
32+ cooldown_time : 30 # cooldown duration in seconds
33+ fallbacks :
34+ - {"gemini-2.5-flash": ["gemini-2.5-flash-fallback", "gemini-2.5-flash-lite", "gemini-2.5-flash-lite-fallback"]}
0 commit comments