HTTPArchive · max-ostapenko · Jan 15, 2026 · Jan 12, 2026 · Jan 13, 2026 · Jan 14, 2026
diff --git a/src/tools/scripts/chart-adjustments.ipynb → sql/util/chart-adjustments.ipynb b/src/tools/scripts/chart-adjustments.ipynb → sql/util/chart-adjustments.ipynb
@@ -12,15 +12,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Authenticate the user\n",
+    "import sys\n",
+    "\n",
+    "if 'google.colab' in sys.modules:\n",
+    "    from google.colab import auth\n",
+    "\n",
+    "    auth.authenticate_user()\n",
+    "    credentials = auth.get_user_credentials()\n",
+    "else:\n",
+    "    import google.auth\n",
+    "\n",
+    "    SCOPES = [\n",
+    "        'https://www.googleapis.com/auth/spreadsheets'\n",
+    "    ]\n",
+    "    credentials, project = google.auth.default(scopes=SCOPES)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
    "metadata": {
     "id": "cOcbpC6qRou_"
    },
    "outputs": [],
    "source": [
-    "from google.colab import auth\n",
     "from googleapiclient.discovery import build\n",
     "\n",
+    "sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)\n",
+    "\n",
     "def update_chart_size(spreadsheet_id, is_dry_run=False, target_width=600, target_height=371):\n",
     "    response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()\n",
     "    sheets = response.get('sheets', [])\n",
@@ -54,26 +78,22 @@
     "                print(f\"\"\"sheet: {sheet['properties']['title']},\n",
     "chart: {chart['spec']['title']},\n",
     "dimensions: {chart['position']['overlayPosition']['widthPixels']} x {chart['position']['overlayPosition'].get('heightPixels', 'N/A')}\n",
-    "                \"\"\")\n",
-    "\n",
-    "# Authenticate the user\n",
-    "auth.authenticate_user()\n",
-    "sheets_service = build('sheets', 'v4', cache_discovery=False)"
+    "                \"\"\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "id": "vp1izUBSLxp9"
    },
    "outputs": [],
    "source": [
     "# Replace this with the ID of your Google Sheets file\n",
-    "SPREADSHEET_ID = '18r8cT6x9lPdM-rXvXjsqx84W7ZDdTDYGD59xr0UGOwg'\n",
+    "SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'\n",
     "\n",
     "# Call the function to update the chart width\n",
-    "update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=True)"
+    "update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=False)"
    ]
   }
  ],
@@ -85,11 +105,21 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": ".venv",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.2"
   }
  },
  "nbformat": 4,

diff --git a/sql/util/generate_figure_markup.py b/sql/util/generate_figure_markup.py
@@ -0,0 +1,80 @@
+import re
+import os
+from googleapiclient.discovery import build  # pylint: disable=import-error
+import google.auth  # pylint: disable=import-error
+
+# Configuration
+SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'
+PUBCHART_ID = '2PACX-1vRC5wrzy5NEsWNHn9w38RLsMURRScnP4jgjO1mDiVhsfFCY55tujlTUZhUaEWzmPtJza0QA7w8S4uK5'
+SQL_DIR = '../2025/privacy'  # Relative to this script's location
+
+SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
+
+
+def get_sql_to_sheet_map(sql_dir):
+    mapping = {}
+    if not os.path.exists(sql_dir):
+        print(f"Directory not found: {sql_dir}")
+        return mapping
+    for filename in os.listdir(sql_dir):
+        if filename.endswith(".sql"):
+            # Generate sheet name from filename using the regex:
+            # re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
+            sheet_name = re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
+            mapping[sheet_name] = filename
+    return mapping
+
+
+def generate_figure_markup(spreadsheet_id, sql_dir):
+    try:
+        credentials, project = google.auth.default(scopes=SCOPES)
+        sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)
+    except Exception as e:
+        print(f"Authentication failed: {e}")
+        print("Please ensure you have application default credentials set up.")
+        return
+
+    sql_map = get_sql_to_sheet_map(sql_dir)
+    response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()
+    sheets = response.get('sheets', [])
+
+    for sheet in sheets:
+        sheet_name = sheet['properties']['title']
+        sheet_id = sheet['properties']['sheetId']
+        charts = sheet.get('charts', [])
+
+        sql_file = sql_map.get(sheet_name)
+        if not sql_file:
+            # Try to match case-insensitively or show warning
+            sql_file = "TODO.sql"
+
+        for chart in charts:
+            title = chart['spec'].get('title', 'Untitled Chart')
+            chart_id = chart['chartId']
+
+            # Slugify for image name
+            image_name = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') + ".png"
+
+            # Construct markup
+            markup = f"""{{{{ figure_markup(
+image="{image_name}",
+caption="{title}",
+description="",
+chart_url="https://docs.google.com/spreadsheets/d/e/{PUBCHART_ID}/pubchart?oid={chart_id}&format=interactive",
+sheets_gid="{sheet_id}",
+sql_file="{sql_file}"
+)
+}}}}"""
+            print(markup)
+            print()
+
+
+if __name__ == "__main__":
+    # Resolve relative SQL_DIR based on script location
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    absolute_sql_dir = os.path.normpath(os.path.join(script_dir, SQL_DIR))
+
+    print(f"Processing Spreadsheet: {SPREADSHEET_ID}")
+    print(f"SQL Directory: {absolute_sql_dir}\n")
+
+    generate_figure_markup(SPREADSHEET_ID, absolute_sql_dir)
diff --git a/sql/util/requirements.txt b/sql/util/requirements.txt
@@ -1,5 +1,7 @@
 pandas==2.3.3
 google-cloud-bigquery==3.40.0
+google-api-python-client==2.188.0
+google-auth==2.47.0
 requests==2.32.5
 tabulate==0.9.0
 gspread==6.2.1

diff --git a/src/.gitignore b/src/.gitignore
@@ -9,3 +9,4 @@ templates/sitemap.xml
 static/html/
 static/js/web-vitals.js
 .coverage
+eng.traineddata
diff --git a/src/README.md b/src/README.md
@@ -117,58 +117,70 @@ There is also a file watcher, which monitors the `content` directory and automat
 npm run watch
 ```
 
-## Generating chapter images
 
-We can automate the generation of chapter images from the command line to save this onerous task.
+## Figures preparation
 
-This requires the figure markup to exist in the chapter's markdown file, including the `image` and `chart_url` attributes:
+The following tools help automate figure positioning and metadata:
 
-```py
-{{ figure_markup(
-  image="pwa-timeseries-of-service-worker-installations.png",
-  ...
-  chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vRRpTSA4fsHwUap-ByQ08j95uo7Zm1kY6lTSvA-DZT54g2QZ0guV7db3QyQwQgMPzsKsJ43gbzqfJst/pubchart?oid=1883263914&format=interactive",
-  ...
-  )
-}}
-```
+1. **Running queries and exporting to sheets**: [bq_to_sheets.ipynb](../sql/util/bq_to_sheets.ipynb) runs queries and exports the results to Google Sheets.
+2. **Normalization**: [chart-adjustments.ipynb](../sql/util/chart-adjustments.ipynb) normalizes chart sizes in Google Sheets to ensure consistent dimensions for image generation.
+3. **Figures markup generation**: [generate_figure_markup.py](../sql/util/generate_figure_markup.py) scans Google Sheets for charts and generates the corresponding `figure_markup` shortcodes with mapped SQL files and sheet IDs.
+4. **Generating chapter images**: We can automate the generation of chapter images from the command line to save this onerous task.
 
-It can be run like below, by passing a chapter markdown (with or without the `.md` extension):
+    This requires the figure markup to exist in the chapter's markdown file, including the `image` and `chart_url` attributes:
 
-```bash
-npm run figure-images en/2021/pwa
-```
+    ```py
+    {{ figure_markup(
+    image="pwa-timeseries-of-service-worker-installations.png",
+    ...
+    chart_url="https://docs.google.com/spreadsheets/d/e/2PACX-1vRRpTSA4fsHwUap-ByQ08j95uo7Zm1kY6lTSvA-DZT54g2QZ0guV7db3QyQwQgMPzsKsJ43gbzqfJst/pubchart?oid=1883263914&format=interactive",
+    ...
+    )
+    }}
+    ```
 
-Which will then generate any missing figures based on the chapter markup, skipping images that already exist:
+    It can be run like below, by passing a chapter markdown (with or without the `.md` extension):
 
-```log
-> almanac.httparchive.org@0.0.1 figure-images
-> node ./tools/generate/generate_figure_images "en/2021/pwa"
-
-Generating for chapter: pwa for year 2021
-  Skipping: pwa-service-worker-controlled-pages-by-rank.png as image already exists
-  Skipping: pwa-most-used-service-worker-events.png as image already exists
-  Skipping: pwa-service-worker-and-manifest-usage.png as image already exists
-  Skipping: pwa-top-pwa-manifest-properties.png as image already exists
-  Skipping: pwa-top-pwa-manifest-icon-sizes.png as image already exists
-  Skipping: pwa-manifest-display-values.png as image already exists
-  Skipping: pwa-manifests-preferring-native-app.png as image already exists
-  Skipping: pwa-industry-categories.png as image already exists
-  Skipping: pwa-lighthouse-pwa-audits.png as image already exists
-  Skipping: pwa-lighthouse-pwa-scores.png as image already exists
-  Skipping: pwa-libraries-and-scripts.png as image already exists
-  Skipping: pwa-top-workbox-versions.png as image already exists
-  Skipping: pwa-top-workbox-packages.png as image already exists
-  Generating image pwa-workbox-runtime-caching-strategies.png...
-  Generating image pwa-notification-acceptance-rates.png...
-  Generating image pwa-install-events.png...
-```
+    ```bash
+    npm run figure-images en/2021/pwa
+    ```
+
+    Which will then generate any missing figures based on the chapter markup, skipping images that already exist:
+
+    ```log
+    > almanac.httparchive.org@0.0.1 figure-images
+    > node ./tools/generate/generate_figure_images "en/2021/pwa"
+
+    Generating for chapter: pwa for year 2021
+    Skipping: pwa-service-worker-controlled-pages-by-rank.png as image already exists
+    Skipping: pwa-most-used-service-worker-events.png as image already exists
+    Skipping: pwa-service-worker-and-manifest-usage.png as image already exists
+    Skipping: pwa-top-pwa-manifest-properties.png as image already exists
+    Skipping: pwa-top-pwa-manifest-icon-sizes.png as image already exists
+    Skipping: pwa-manifest-display-values.png as image already exists
+    Skipping: pwa-manifests-preferring-native-app.png as image already exists
+    Skipping: pwa-industry-categories.png as image already exists
+    Skipping: pwa-lighthouse-pwa-audits.png as image already exists
+    Skipping: pwa-lighthouse-pwa-scores.png as image already exists
+    Skipping: pwa-libraries-and-scripts.png as image already exists
+    Skipping: pwa-top-workbox-versions.png as image already exists
+    Skipping: pwa-top-workbox-packages.png as image already exists
+    Generating image pwa-workbox-runtime-caching-strategies.png...
+    Generating image pwa-notification-acceptance-rates.png...
+    Generating image pwa-install-events.png...
+    ```
+
+    Authors can delete images and rerun if they want to, to regenerate images.
 
-Authors can delete images and rerun if they want to, to regenerate images.
+    Images will automatically be compressed by our Calibre GitHub Action when uploaded to GitHub, but you can get a lot more compression (about 44% more!) by running them through <https://tinypng.com> instead (at which point the Calibre Action will usually not find any further compression gains). It's quite simple to drag them up, and download them, so would encourage analysts/authors to take this step.
 
-Images will automatically be compressed by our Calibre GitHub Action when uploaded to GitHub, but you can get a lot more compression (about 44% more!) by running them through <https://tinypng.com> instead (at which point the Calibre Action will usually not find any further compression gains). It's quite simple to drag them up, and download them, so would encourage analysts/authors to take this step.
+    Running them through <https://tinypng.com> also has the added advantage of the compression being repeatable each time. So if you are not sure which images you have changed, you can delete them all, regenerate them all, run them through TinyPNG, and then a `git diff` will only show differences on the images that have changed. This will not be the case if you use the Calibre GitHub Action and it will look like all images have changed.
 
-Running them through <https://tinypng.com> also has the added advantage of the compression being repeatable each time. So if you are not sure which images you have changed, you can delete them all, regenerate them all, run them through TinyPNG, and then a `git diff` will only show differences on the images that have changed. This will not be the case if you use the Calibre GitHub Action and it will look like all images have changed.
+5. **Automated descriptions**: [generate_figure_descriptions.js](tools/generate/generate_figure_descriptions.js) uses OCR (via Tesseract.js) to automatically populate `description=""` fields in markdown chapters based on the generated images.
+
+    ```bash
+    node ./tools/generate/generate_figure_descriptions en/2025/privacy
+    ```
 
 ## Linting files
 

diff --git a/src/config/2025.json b/src/config/2025.json
@@ -67,8 +67,7 @@
           "chapter_number": "8",
           "title": "Privacy",
           "slug": "privacy",
-          "hero_dir": "2020",
-          "todo": true
+          "hero_dir": "2020"
         },
         {
           "part": "II",

diff --git a/src/config/contributors.json b/src/config/contributors.json
@@ -3229,6 +3229,10 @@
         "analysts",
         "developers",
         "authors"
+      ],
+      "2025": [
+        "analysts",
+        "editors"
       ]
     },
     "website": "https://maxostapenko.com"
@@ -3772,6 +3776,7 @@
       "2025": [
         "committee",
         "leads",
+        "authors",
         "reviewers"
       ]
     },
@@ -5154,6 +5159,7 @@
     "name": "Vinod Tiwari",
     "teams": {
       "2025": [
+        "authors",
         "reviewers"
       ]
     },
@@ -5421,5 +5427,18 @@
       ]
     },
     "twitter": "_cybai"
+  },
+  "RumaisaHabib": {
+    "avatar_url": "66083065",
+    "github": "RumaisaHabib",
+    "name": "Rumaisa Habib",
+    "teams": {
+      "2025": [
+        "authors"
+      ]
+    },
+    "website": "https://rumaisahabib.com/",
+    "linkedin": "rumaisahabib"
   }
+
 }