Skip to content

Commit f6ba95a

Browse files
tgasser-nvPouyanpi
authored andcommitted
feat(jailbreak): Add direct API key configuration support (#1260)
* Support direct jailbreak api key, not via environment variable * Add unit-test to cover api_key_env_var being set, but no environment variable exists with the value * Removed unused imports, fixed test docstring copy-and-paste * Rename get_auth_token() to get_api_key()
1 parent c707e09 commit f6ba95a

File tree

5 files changed

+139
-9
lines changed

5 files changed

+139
-9
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Jailbreak Detection using NIMs
2+
3+
This examples showcases the jailbreak detection capabilities of NeMo Guardrails using a NIM hosted on NVCF.
4+
5+
The structure of the config folder is the following:
6+
7+
- `config.yml` - The config file holding all the configuration options.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
models:
2+
- type: main
3+
engine: nvidia_ai_endpoints
4+
model: mistralai/mixtral-8x7b-instruct-v0.1
5+
parameters:
6+
temperature: 0.7
7+
max_tokens: 1000
8+
timeout: 120
9+
api_key: "<insert NVCF API key here>"
10+
11+
rails:
12+
config:
13+
jailbreak_detection:
14+
nim_base_url: "https://ai.api.nvidia.com"
15+
nim_server_endpoint: "/v1/security/nvidia/nemoguard-jailbreak-detect"
16+
api_key: "<insert NVCF API key here>"
17+
input:
18+
flows:
19+
- jailbreak detection model
20+
output:
21+
flows: []
22+
retrieval:
23+
flows: []
24+
25+
instructions:
26+
- type: general
27+
content: |
28+
Below is a conversation between a helpful AI assistant and a user.
29+
The assistant is direct, honest, and concise.
30+
If the assistant does not know something, it says so.
31+
The assistant does not engage in harmful, unethical, or illegal behavior.
32+
33+
sample_conversation: |
34+
user "Hello there!"
35+
express greeting
36+
bot express greeting
37+
"Hello! How can I assist you today?"
38+
user "What can you do for me?"
39+
ask about capabilities
40+
bot respond about capabilities
41+
"As an AI assistant, I can help you with a wide range of tasks. This includes question answering on various topics, generating text for various purposes and providing suggestions based on your preferences."

nemoguardrails/library/jailbreak_detection/actions.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,7 @@ async def jailbreak_detection_model(
9797
jailbreak_api_url = jailbreak_config.server_endpoint
9898
nim_base_url = jailbreak_config.nim_base_url
9999
nim_classification_path = jailbreak_config.nim_server_endpoint
100-
if jailbreak_config.api_key_env_var is not None:
101-
nim_auth_token = os.getenv(jailbreak_config.api_key_env_var)
102-
if nim_auth_token is None:
103-
log.warning(
104-
"Specified a value for jailbreak config api_key_env var at %s but the environment variable was not set!"
105-
% jailbreak_config.api_key_env_var
106-
)
107-
else:
108-
nim_auth_token = None
100+
nim_auth_token = jailbreak_config.get_api_key()
109101

110102
if context is not None:
111103
prompt = context.get("user_message", "")

nemoguardrails/rails/llm/config.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
BaseModel,
2727
ConfigDict,
2828
Field,
29+
SecretStr,
2930
model_validator,
3031
root_validator,
3132
validator,
@@ -572,6 +573,10 @@ class JailbreakDetectionConfig(BaseModel):
572573
default="classify",
573574
description="Classification path uri. Defaults to 'classify' for NemoGuard JailbreakDetect.",
574575
)
576+
api_key: Optional[SecretStr] = Field(
577+
default=None,
578+
description="Secret String with API key for use in Jailbreak requests. Takes precedence over api_key_env_var",
579+
)
575580
api_key_env_var: Optional[str] = Field(
576581
default=None,
577582
description="Environment variable containing API key for jailbreak detection model",
@@ -600,6 +605,31 @@ def migrate_deprecated_fields(self) -> "JailbreakDetectionConfig":
600605
self.nim_base_url = f"http://{self.nim_url}:{port}/v1"
601606
return self
602607

608+
def get_api_key(self) -> Optional[str]:
609+
"""Helper to return an API key (if it exists) from a Jailbreak configuration.
610+
This can come from (in descending order of priority):
611+
612+
1. The `api_key` field, a Pydantic SecretStr from which we extract the full string.
613+
2. The `api_key_env_var` field, a string stored in this environment variable.
614+
615+
If neither is found, None is returned.
616+
"""
617+
618+
if self.api_key:
619+
return self.api_key.get_secret_value()
620+
621+
if self.api_key_env_var:
622+
nim_auth_token = os.getenv(self.api_key_env_var)
623+
if nim_auth_token:
624+
return nim_auth_token
625+
626+
log.warning(
627+
"Specified a value for jailbreak config api_key_env var at %s but the environment variable was not set!"
628+
% self.api_key_env_var
629+
)
630+
631+
return None
632+
603633

604634
class AutoAlignOptions(BaseModel):
605635
"""List of guardrails that are activated"""

tests/test_jailbreak_config.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15+
import os
16+
from unittest.mock import patch
1517

18+
from pydantic import SecretStr
1619

1720
from nemoguardrails.rails.llm.config import JailbreakDetectionConfig
1821

@@ -128,3 +131,60 @@ def test_empty_configuration(self):
128131
assert config.nim_url is None
129132
assert config.nim_port is None
130133
assert config.embedding is None
134+
135+
def test_get_api_key_no_key(self):
136+
"""Check when neither `api_key` nor `api_key_env_var` are provided, auth token is None"""
137+
138+
config = JailbreakDetectionConfig(
139+
nim_base_url="http://localhost:8000/v1",
140+
nim_server_endpoint="classify",
141+
)
142+
143+
auth_token = config.get_api_key()
144+
assert auth_token is None
145+
146+
def test_get_api_key_api_key(self):
147+
"""Check when both `api_key` and `api_key_env_var` are provided, `api_key` takes precedence"""
148+
api_key_value = "nvapi-abcdef12345"
149+
api_key_env_var_name = "CUSTOM_API_KEY"
150+
api_key_env_var_value = "env-var-nvapi-abcdef12345"
151+
152+
with patch.dict(os.environ, {api_key_env_var_name: api_key_env_var_value}):
153+
config = JailbreakDetectionConfig(
154+
nim_base_url="http://localhost:8000/v1",
155+
nim_server_endpoint="classify",
156+
api_key=api_key_value,
157+
api_key_env_var=api_key_env_var_name,
158+
)
159+
160+
auth_token = config.get_api_key()
161+
assert auth_token == api_key_value
162+
163+
def test_get_api_key_api_key_env_var(self):
164+
"""Check when only `api_key_env_var` is provided, the env-var value is correctly returned"""
165+
api_key_env_var_name = "CUSTOM_API_KEY"
166+
api_key_env_var_value = "env-var-nvapi-abcdef12345"
167+
168+
with patch.dict(os.environ, {api_key_env_var_name: api_key_env_var_value}):
169+
config = JailbreakDetectionConfig(
170+
nim_base_url="http://localhost:8000/v1",
171+
nim_server_endpoint="classify",
172+
api_key_env_var=api_key_env_var_name,
173+
)
174+
175+
auth_token = config.get_api_key()
176+
assert auth_token == api_key_env_var_value
177+
178+
def test_get_api_key_api_key_env_var_not_set(self):
179+
"""Check configuring an `api_key_env_var` that isn't set in the shell returns None"""
180+
api_key_env_var_name = "CUSTOM_API_KEY"
181+
182+
with patch.dict(os.environ, {}):
183+
config = JailbreakDetectionConfig(
184+
nim_base_url="http://localhost:8000/v1",
185+
nim_server_endpoint="classify",
186+
api_key_env_var=api_key_env_var_name,
187+
)
188+
189+
auth_token = config.get_api_key()
190+
assert auth_token is None

0 commit comments

Comments
 (0)