-
Notifications
You must be signed in to change notification settings - Fork 29
UN-3400 add bedrock llm test case #339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f0950d5
5d041df
c3d20f4
fc6225a
272f10d
e39508a
2b8860c
e3a9d02
70eaf3c
66f7787
2151887
e858805
6da8c36
d896f00
c6bacef
5bd211f
07cba30
beb30ce
91f9ea9
152255f
202d289
2ed1ae4
c6f75b9
5705ab8
3bea3d1
c3d097e
d372bd7
1918b4a
be9479a
b17e693
3c96a3e
df6b951
b1ba3e5
a33e694
db63745
5a2ea81
8e7b421
268d307
133d5e7
26e5411
9a7e5fe
a891ac6
d335bef
3603456
a093cc3
e6551d6
bd8d534
38a72e5
8b8146a
18e51a2
6b74e96
5818376
f066f91
58b8b86
a189a04
5e3e487
d071dce
9d23d9c
cccf521
b7b6378
d2516e4
8e8cf49
d3fe8f1
242be26
d7f9d78
af25a24
15c887f
4477b6b
abe8b46
c4d60fc
8848e2a
5f0f7d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,13 @@ on: | |
| jobs: | ||
| smoke-test: | ||
| runs-on: ubuntu-latest | ||
|
|
||
| # ─── Job‑level ENV ───────────────────────────────────────── | ||
| # every step below will have these in its shell environment | ||
| env: | ||
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
| AWS_REGION: ${{ vars.AWS_REGION }} | ||
|
|
||
| # 🚀 Ollama Service runs alongside main container | ||
| services: | ||
|
|
@@ -47,6 +54,11 @@ jobs: | |
| shell: bash | ||
| run: | | ||
| build_scripts/set_ollama_model.sh | ||
|
|
||
| - name: Check access to Bedrock models | ||
| shell: bash | ||
| run: | | ||
| build_scripts/check_list_bedrock_models_profile_single.sh | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. triggered a check of access llm list in yml file |
||
|
|
||
| - name: Show installed packages | ||
| run: pip freeze | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Script set up needed to do a quick test connection using AWS CLI to list all Bedrocks on this credential. | ||
|
|
||
| # Install AWS CLI prerequisites | ||
| apt-get install -y unzip zip | ||
|
|
||
| # Install AWS CLI v2 (which includes the Bedrock commands) | ||
| curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o awscliv2.zip | ||
| unzip awscliv2.zip | ||
| ./aws/install | ||
| aws --version | ||
|
|
||
| # List Bedrock foundation models for single profile only | ||
| aws bedrock list-foundation-models | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Install aws cli and then check access to bedrock model list script |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| from typing import Any | ||
| from typing import Dict | ||
| from typing import Union | ||
|
|
||
| from neuro_san.interfaces.coded_tool import CodedTool | ||
|
|
||
|
|
||
| class Accountant(CodedTool): | ||
| """ | ||
| A tool that updates a running cost each time it is called. | ||
| """ | ||
|
|
||
| def invoke(self, args: Dict[str, Any], sly_data: Dict[str, Any]) -> Dict[str, Any]: | ||
| """ | ||
| Updates the passed running cost each time it's called. | ||
| :param args: A dictionary with the following keys: | ||
| "running_cost": the running cost to update. | ||
|
|
||
| :param sly_data: A dictionary containing parameters that should be kept out of the chat stream. | ||
| Keys expected for this implementation are: | ||
| None | ||
|
|
||
| :return: A dictionary containing: | ||
| "running_cost": the updated running cost. | ||
| """ | ||
| tool_name = self.__class__.__name__ | ||
| print(f"========== Calling {tool_name} ==========") | ||
| # Parse the arguments | ||
| print(f"args: {args}") | ||
| running_cost: float = float(args.get("running_cost")) | ||
|
|
||
| # Increment the running cost not using value other 1 | ||
| # This would make a little hard if the LLM wanted to guess | ||
| updated_running_cost: float = running_cost + 3.0 | ||
|
|
||
| tool_response = { | ||
| "running_cost": updated_running_cost | ||
| } | ||
| print("-----------------------") | ||
| print(f"{tool_name} response: ", tool_response) | ||
| print(f"========== Done with {tool_name} ==========") | ||
| return tool_response | ||
|
|
||
| async def async_invoke(self, args: Dict[str, Any], sly_data: Dict[str, Any]) -> Union[Dict[str, Any], str]: | ||
| """ | ||
| Delegates to the synchronous invoke method because it's quick, non-blocking. | ||
| """ | ||
| return self.invoke(args, sly_data) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| "music_nerd_pro_llm_azure.hocon": true, | ||
| "music_nerd_pro_llm_ollama.hocon": true, | ||
| "music_nerd_pro_multi_agents.hocon": true, | ||
| "music_nerd_pro_llm_bedrock_claude.hocon": true, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added new agent hocon to the registry. |
||
|
|
||
| # This one is an example of agents calling tools that has no parameters. | ||
| "date_time.hocon": true, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
|
|
||
| # Copyright (C) 2023-2025 Cognizant Digital Business, Evolutionary AI. | ||
| # All Rights Reserved. | ||
| # Issued under the Academic Public License. | ||
| # | ||
| # You can be released from the terms, and requirements of the Academic Public | ||
| # License by purchasing a commercial license. | ||
| # Purchase of a commercial license is mandatory for any use of the | ||
| # neuro-san SDK Software in commercial settings. | ||
| # | ||
| # END COPYRIGHT | ||
|
|
||
| # The schema specifications for this file are documented here: | ||
| # https://github.com/cognizant-ai-lab/neuro-san/blob/main/docs/agent_hocon_reference.md | ||
|
|
||
| { | ||
| "llm_config": { | ||
| "model_name": "bedrock-us-claude-3-7-sonnet", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added hocon using what is listed in the User's guide for this Agent. |
||
| }, | ||
| "tools": [ | ||
| # These tool definitions do not have to be in any particular order | ||
| # How they are linked and call each other is defined within their | ||
| # own specs. This could be a graph, potentially even with cycles. | ||
|
|
||
| # This first agent definition is regarded as the "Front Man", which | ||
| # does all the talking to the outside world/client. | ||
| # It is identified as such because it is either: | ||
| # A) The only one with no parameters in his function definition, | ||
| # and therefore he needs to talk to the outside world to get things rolling. | ||
| # B) The first agent listed, regardless of function parameters. | ||
| # | ||
| # Some disqualifications from being a front man: | ||
| # 1) Cannot use a CodedTool "class" definition | ||
| # 2) Cannot use a Tool "toolbox" definition | ||
| { | ||
| "name": "MusicNerdPro", | ||
|
|
||
| # Note that there are no parameters defined for this guy's "function" key. | ||
| # This is the primary way to identify this tool as a front-man, | ||
| # distinguishing it from the rest of the tools. | ||
|
|
||
| "function": { | ||
|
|
||
| # The description acts as an initial prompt. | ||
| "description": """ | ||
| I can help with music-related inquiries. | ||
| """ | ||
| }, | ||
|
|
||
| "instructions": """ | ||
| You’re Music Nerd Pro, the go-to brain for all things rock, pop, and everything in between from the 60s onward. You live for liner notes, B-sides, lost demos, and legendary live sets. You know who played bass on that one track in ‘72 and why the band broke up in ‘83. People come to you with questions like: | ||
| • “What’s the story behind this song?” | ||
| • “Which album should I start with?” | ||
| • “Who influenced this band’s sound?” | ||
| • “Is there a deeper meaning in these lyrics?” | ||
| • “What’s a hidden gem I probably missed?” | ||
| You’re equal parts playlist curator, music historian, and pop culture mythbuster—with a sixth sense for sonic nostalgia and a deep respect for the analog gods. | ||
|
|
||
| - You must call the Accountant tool exactly once per user question — no more, no less. | ||
| - You must not estimate, guess, or invent the cost under any circumstance. | ||
| - You must not skip calling the Accountant tool — it is required once for every question. | ||
|
|
||
| For each question you receive, call your Accountant agent to calculate the running cost. Otherwise you won't get paid! | ||
| Then answer with a JSON message that has two keys: | ||
| 1. An "answer" key whose value has the answer to the question | ||
| 2. A "running_cost" key whose value has the running cost computed by the Accountant agent. | ||
| """, | ||
| "tools": ["Accountant"] | ||
|
|
||
| # Parse any JSON in responses into the structure field of the ChatMessage. | ||
| "structure_formats": "json" | ||
| }, | ||
| { | ||
| "name": "Accountant", | ||
| "function": { | ||
| "description": """ | ||
| You are an API that keeps track of the running cost of the MusicNerdPro service. Pass the current running cost | ||
| to the API to get the updated running cost. If no running cost it known, pass 0.00. | ||
| """, | ||
| "parameters": { | ||
| "type": "object", | ||
| "properties": { | ||
| "running_cost": { | ||
| "type": "float", | ||
| "description": "The current running total of the service cost." | ||
| }, | ||
| }, | ||
| "required": ["running_cost"] | ||
| } | ||
| }, | ||
| "class": "accountant.Accountant" | ||
| }, | ||
| ] | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,6 @@ | |
|
|
||
| { | ||
| "llm_config": { | ||
| "class": "gemini", | ||
| "model_name": "gemini-2.5-flash", | ||
| }, | ||
| "tools": [ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,12 @@ markers = | |
| non_default_llm_provider: Tests that exercise llm providers other than our default Open AI | ||
| anthropic: Tests that specifically use anthropic as the llm provider | ||
| gemini: Tests that specifically use gemini as the llm provider | ||
| azure: Tests that specifically use gemini as the llm provider | ||
| ollama: Tests that specifically use gemini as the llm provider | ||
| azure: Tests that specifically use azure as the llm provider | ||
| ollama: Tests that specifically use ollama as the llm provider | ||
| bedrock_claude: mark tests that target the Bedrock‑Claude LLM backend | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignored some of the non-important annoying warning messages. |
||
| needs_server: Tests that need a server running in order to complete successfully | ||
|
|
||
| # silence specific warnings | ||
| filterwarnings = | ||
| # Ignore warnings about protobuf 4 | ||
| ignore:Type google._upb._message.* uses PyType_Spec with a metaclass that has custom tp_new:DeprecationWarning | ||
| # Ignore warnings about protobuf 4's PyType_Spec metaclass usage | ||
| ignore:Type google\._upb\._message.* uses PyType_Spec with a metaclass that has custom tp_new:DeprecationWarning | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| # Copyright (C) 2023-2025 Cognizant Digital Business, Evolutionary AI. | ||
| # All Rights Reserved. | ||
| # Issued under the Academic Public License. | ||
| # | ||
| # You can be released from the terms, and requirements of the Academic Public | ||
| # License by purchasing a commercial license. | ||
| # Purchase of a commercial license is mandatory for any use of the | ||
| # neuro-san SDK Software in commercial settings. | ||
| # | ||
| # END COPYRIGHT | ||
|
|
||
| # This file defines everything necessary for a data-driven test. | ||
| { | ||
| # Describes what agent to test against. | ||
| "agent": "music_nerd_pro_llm_bedrock_claude", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added test hocon for this agent model |
||
|
|
||
| # Connect to the agent via a server | ||
| "connections": ["direct"], | ||
|
|
||
| # Interactions are a series of dictionaries with request elements paired with | ||
| # descriptions of response checks. | ||
| "interactions": [ | ||
| { | ||
| # This is what we send as input to streaming_chat() | ||
| "text": "Who did Yellow Submarine?", | ||
|
|
||
| # The response block treats how we are going to test what comes back | ||
| "response": { | ||
| # Structure block says how we are going to examine the structure | ||
| # (dictionary) returned as part of the response. | ||
| "structure": { | ||
| # "answer" is a key that is supposed to be in the dictionary. | ||
| "answer": { | ||
| # Keywords says we are going to look for exact matches for each | ||
| # element in a list of strings. All elements need to show up | ||
| # in order to pass. | ||
| "keywords": "Beatles" | ||
| }, | ||
| "running_cost": { | ||
| "value": 3.0 | ||
| } | ||
| } | ||
| } | ||
| }, | ||
| { | ||
| # This next interaction block tests the ability for an agent | ||
| # to continue its conversation with the context from the previous | ||
| # interaction. | ||
| "text": "Where were they from?", | ||
| "response": { | ||
| "structure": { | ||
| "answer": { | ||
| "keywords": "Liverpool" | ||
| }, | ||
| "running_cost": { | ||
| "value": 6.0 | ||
| } | ||
| } | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,15 +87,17 @@ def test_hocon_with_server(self, test_name: str, test_hocon: str): | |
| "music_nerd_pro_llm_anthropic/combination_responses_with_history_direct.hocon", | ||
| "music_nerd_pro_llm_gemini/combination_responses_with_history_direct.hocon", | ||
| "music_nerd_pro_llm_azure/combination_responses_with_history_direct.hocon", | ||
| "music_nerd_pro_llm_bedrock_claude/combination_responses_with_history_direct.hocon", | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added this test case as part of Smoke test marker |
||
|
|
||
| # List more hocon files as they become available here. | ||
| ])) | ||
| @pytest.mark.timeout(30) # in seconds for this test | ||
| @pytest.mark.timeout(60) # in seconds for this test | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The timeout was bumped up as this test case took slightly longer than others. |
||
| @pytest.mark.smoke | ||
| @pytest.mark.non_default_llm_provider | ||
| @pytest.mark.anthropic | ||
| @pytest.mark.gemini | ||
| @pytest.mark.azure | ||
| @pytest.mark.bedrock_claude | ||
| def test_hocon_with_non_default_llm(self, test_name: str, test_hocon: str): | ||
| """ | ||
| Test method for a single parameterized test case specified by a hocon file. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are the environment variables needed for a single profile [ AWS CLI].