ed-donner · Odinachi · Mar 5, 2026 · Mar 5, 2026
diff --git a/week7/community_contributions/Odinachi/week7_exercise_qwen.ipynb b/week7/community_contributions/Odinachi/week7_exercise_qwen.ipynb
@@ -0,0 +1,283 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "46044157",
+   "metadata": {},
+   "source": [
+    "# The Price Is Right\n",
+    "\n",
+    "### And now, to evaluate our fine-tuned open source model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0608318f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q --upgrade bitsandbytes trl\n",
+    "!wget -q https://raw.githubusercontent.com/ed-donner/llm_engineering/main/week7/util.py -O util.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7235feab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# imports\n",
+    "\n",
+    "import os\n",
+    "import re\n",
+    "import math\n",
+    "from tqdm import tqdm\n",
+    "from google.colab import userdata\n",
+    "from huggingface_hub import login\n",
+    "import torch\n",
+    "import transformers\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n",
+    "from datasets import load_dataset, Dataset, DatasetDict\n",
+    "from datetime import datetime\n",
+    "from peft import PeftModel\n",
+    "from util import evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "254becd5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Constants\n",
+    "\n",
+    "BASE_MODEL = \"Qwen/Qwen3-4B-Base\"\n",
+    "PROJECT_NAME = \"price\"\n",
+    "HF_USER = \"odinachidavid\" # your HF name here!\n",
+    "\n",
+    "LITE_MODE = True\n",
+    "\n",
+    "DATA_USER = \"ed-donner\"\n",
+    "DATASET_NAME = f\"{DATA_USER}/items_prompts_lite\" if LITE_MODE else f\"{DATA_USER}/items_prompts_full\"\n",
+    "\n",
+    "if LITE_MODE:\n",
+    "  RUN_NAME = \"2026-03-05_06.41.35-lite\"\n",
+    "  REVISION = \"c9400d9a136ee5d79a85ba34e10f82ca583b6a26\"\n",
+    "else:\n",
+    "  RUN_NAME = \"2025-11-28_18.47.07\"\n",
+    "  REVISION = \"b19c8bfea3b6ff62237fbb0a8da9779fc12cefbd\"\n",
+    "\n",
+    "PROJECT_RUN_NAME = f\"{PROJECT_NAME}-{RUN_NAME}\"\n",
+    "HUB_MODEL_NAME = f\"{HF_USER}/{PROJECT_RUN_NAME}\"\n",
+    "\n",
+    "\n",
+    "# Hyper-parameters - QLoRA\n",
+    "\n",
+    "QUANT_4_BIT = True\n",
+    "capability = torch.cuda.get_device_capability()\n",
+    "use_bf16 = capability[0] >= 8"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "beb76364",
+   "metadata": {},
+   "source": [
+    "### Log in to HuggingFace\n",
+    "\n",
+    "If you don't already have a HuggingFace account, visit https://huggingface.co to sign up and create a token.\n",
+    "\n",
+    "Then select the Secrets for this Notebook by clicking on the key icon in the left, and add a new secret called `HF_TOKEN` with the value as your token.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba9b4abf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Log in to HuggingFace\n",
+    "\n",
+    "hf_token = userdata.get('HF_TOKEN')\n",
+    "login(hf_token, add_to_git_credential=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a23a40a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = load_dataset(DATASET_NAME)\n",
+    "test = dataset['test']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b705d5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93262413",
+   "metadata": {},
+   "source": [
+    "## Now load the Tokenizer and Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4208c73a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pick the right quantization\n",
+    "\n",
+    "if QUANT_4_BIT:\n",
+    "  quant_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_use_double_quant=True,\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,\n",
+    "    bnb_4bit_quant_type=\"nf4\"\n",
+    "  )\n",
+    "else:\n",
+    "  quant_config = BitsAndBytesConfig(\n",
+    "    load_in_8bit=True,\n",
+    "    bnb_8bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8cc2ff60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the Tokenizer and the Model\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "tokenizer.padding_side = \"right\"\n",
+    "\n",
+    "base_model = AutoModelForCausalLM.from_pretrained(\n",
+    "    BASE_MODEL,\n",
+    "    quantization_config=quant_config,\n",
+    "    device_map=\"auto\",\n",
+    ")\n",
+    "base_model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
+    "\n",
+    "# Load the fine-tuned model with PEFT\n",
+    "if REVISION:\n",
+    "  fine_tuned_model = PeftModel.from_pretrained(base_model, HUB_MODEL_NAME, revision=REVISION)\n",
+    "else:\n",
+    "  fine_tuned_model = PeftModel.from_pretrained(base_model, HUB_MODEL_NAME)\n",
+    "\n",
+    "\n",
+    "print(f\"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "609835b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fine_tuned_model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2a66d2d4",
+   "metadata": {},
+   "source": [
+    "# THE MOMENT OF TRUTH!\n",
+    "\n",
+    "## Use the model in inference mode\n",
+    "\n",
+    "Trying to beat \"human\" level of performance at $87.62\n",
+    "\n",
+    "Or possibly close to gpt-4.1-nano at $62.51\n",
+    "\n",
+    "## Caveat\n",
+    "\n",
+    "Keep in mind that prices of goods vary considerably; the model can't predict things like sale prices that it doesn't have any information about."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "582fad13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def model_predict(item):\n",
+    "    inputs = tokenizer(item[\"prompt\"],return_tensors=\"pt\").to(\"cuda\")\n",
+    "    with torch.no_grad():\n",
+    "        output_ids = fine_tuned_model.generate(**inputs, max_new_tokens=8)\n",
+    "    prompt_len = inputs[\"input_ids\"].shape[1]\n",
+    "    generated_ids = output_ids[0, prompt_len:]\n",
+    "    return tokenizer.decode(generated_ids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e50a24f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set_seed(42)\n",
+    "evaluate(model_predict, test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f01998a7",
+   "metadata": {},
+   "source": [
+    "## Performance Evaluation\n",
+    "\n",
+    "The performance of the fine-tuned **Qwen3-4B model** for price prediction was lower than expected, especially when compared to the results obtained with **Llama 3.2**.\n",
+    "\n",
+    "\n",
+    "The performance results from the evaluation are shown below:\n",
+    "\n",
+    "Predicted Error: 64.97\n",
+    "\n",
+    "MSE: 11,984\n",
+    "\n",
+    "$r^2$ : 45.5%\n",
+    "\n",
+    "Further improvements may require:\n",
+    "- Adjusting hyperparameters during fine-tuning.\n",
+    "- Improving preprocessing and feature engineering for the price data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "597066c4",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}