from datasets import load_dataset
ds = load_dataset("Ztrimus/llm-safety-flip-dataset", split="full")
# Preview a sample
print(ds[0])
# Analyze flip rate
import pandas as pd
df = ds.to_pandas()
flip_rate = ((df.original_response_safety == "safe") & (df.perturbed_response_safety == "unsafe")).mean()
print(f"Safe → Unsafe flip rate: {flip_rate:.2%}")module load mamba/latest
source activate llm_safety_39- Create
credentials.pyat src/config location with your personal credentials.
ASURITE_ID = "YOUR_ASURITE_ID"
HF_TOKEN ="PUT_HF_TOKEN_HERE"- to make src contains importable
cd llm-sensitivity
export PYTHONPATH=$(pwd)/src