Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
05c2be5
init
max-ostapenko Jan 12, 2026
73efcee
update chart formatting script
max-ostapenko Jan 13, 2026
40149d5
text
max-ostapenko Jan 14, 2026
79bfd6c
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 14, 2026
e7314a9
chapter + tools
max-ostapenko Jan 14, 2026
941752b
Optimised images with calibre/image-actions
github-actions[bot] Jan 14, 2026
44bf6f3
lint
max-ostapenko Jan 14, 2026
3f6c1e1
Merge branch 'privacy-markdown-2025' of https://github.com/HTTPArchiv…
max-ostapenko Jan 14, 2026
585fe72
lint
max-ostapenko Jan 14, 2026
8bb1809
lint
max-ostapenko Jan 14, 2026
92c584b
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 14, 2026
b6f4b85
readme for chart tools
max-ostapenko Jan 15, 2026
0f6ee30
revert changes
max-ostapenko Jan 15, 2026
eabf2c9
lint
max-ostapenko Jan 15, 2026
635cc18
fix
max-ostapenko Jan 15, 2026
aee4df9
CodeQL fix
max-ostapenko Jan 15, 2026
cfc3dbb
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
6a57098
Merge branch 'privacy-markdown-2025' of https://github.com/HTTPArchiv…
max-ostapenko Jan 15, 2026
c944424
jannis's suggestion
max-ostapenko Jan 15, 2026
59e77b8
Apply suggestion from @JannisBush
max-ostapenko Jan 15, 2026
ea1c907
nrllh as 3rd author
max-ostapenko Jan 15, 2026
160abc0
copilot review
max-ostapenko Jan 15, 2026
f06f529
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 15, 2026
b64a48a
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
abd35c4
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
1a35552
Metadata cleanup
tunetheweb Jan 15, 2026
8e8e86a
Interationalise links with no translations
tunetheweb Jan 15, 2026
6fa352f
Code formatting and smart quotes
tunetheweb Jan 15, 2026
23b356c
Headings
tunetheweb Jan 15, 2026
4cbc9b1
Big number
tunetheweb Jan 15, 2026
6d40b07
Misc edits
tunetheweb Jan 15, 2026
1aaec86
Chart title for Clients Hints
tunetheweb Jan 15, 2026
08e3bfb
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 15, 2026
011f26b
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
c696dba
featured stats
max-ostapenko Jan 15, 2026
a2154e1
new images
max-ostapenko Jan 15, 2026
abdc018
Optimised images with calibre/image-actions
github-actions[bot] Jan 15, 2026
721ba3f
capitalized client names for charts
max-ostapenko Jan 15, 2026
c76e91d
lint
max-ostapenko Jan 15, 2026
1b74a27
mention the UA reduction upgrade
max-ostapenko Jan 15, 2026
98281db
fix
max-ostapenko Jan 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,39 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Authenticate the user\n",
"import sys\n",
"\n",
"if 'google.colab' in sys.modules:\n",
" from google.colab import auth\n",
"\n",
" auth.authenticate_user()\n",
" credentials = auth.get_user_credentials()\n",
"else:\n",
" import google.auth\n",
"\n",
" SCOPES = [\n",
" 'https://www.googleapis.com/auth/spreadsheets'\n",
" ]\n",
" credentials, project = google.auth.default(scopes=SCOPES)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "cOcbpC6qRou_"
},
"outputs": [],
"source": [
"from google.colab import auth\n",
"from googleapiclient.discovery import build\n",
"\n",
"sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)\n",
"\n",
"def update_chart_size(spreadsheet_id, is_dry_run=False, target_width=600, target_height=371):\n",
" response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()\n",
" sheets = response.get('sheets', [])\n",
Expand Down Expand Up @@ -54,26 +78,22 @@
" print(f\"\"\"sheet: {sheet['properties']['title']},\n",
"chart: {chart['spec']['title']},\n",
"dimensions: {chart['position']['overlayPosition']['widthPixels']} x {chart['position']['overlayPosition'].get('heightPixels', 'N/A')}\n",
" \"\"\")\n",
"\n",
"# Authenticate the user\n",
"auth.authenticate_user()\n",
"sheets_service = build('sheets', 'v4', cache_discovery=False)"
" \"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"id": "vp1izUBSLxp9"
},
"outputs": [],
"source": [
"# Replace this with the ID of your Google Sheets file\n",
"SPREADSHEET_ID = '18r8cT6x9lPdM-rXvXjsqx84W7ZDdTDYGD59xr0UGOwg'\n",
"SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'\n",
"\n",
"# Call the function to update the chart width\n",
"update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=True)"
"update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=False)"
]
}
],
Expand All @@ -85,11 +105,21 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.2"
}
},
"nbformat": 4,
Expand Down
80 changes: 80 additions & 0 deletions sql/util/generate_chart_markup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import re
import os
from googleapiclient.discovery import build
import google.auth

# Configuration
SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'
PUBCHART_ID = '2PACX-1vRC5wrzy5NEsWNHn9w38RLsMURRScnP4jgjO1mDiVhsfFCY55tujlTUZhUaEWzmPtJza0QA7w8S4uK5'
SQL_DIR = '../2025/privacy' # Relative to this script's location

SCOPES = ['https://www.googleapis.com/auth/spreadsheets']


def get_sql_to_sheet_map(sql_dir):
mapping = {}
if not os.path.exists(sql_dir):
print(f"Directory not found: {sql_dir}")
return mapping
for filename in os.listdir(sql_dir):
if filename.endswith(".sql"):
# Generate sheet name from filename using the regex:
# re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
sheet_name = re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
mapping[sheet_name] = filename
return mapping


def generate_figure_markup(spreadsheet_id, sql_dir):
try:
credentials, project = google.auth.default(scopes=SCOPES)
sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)
except Exception as e:
print(f"Authentication failed: {e}")
print("Please ensure you have application default credentials set up.")
return

sql_map = get_sql_to_sheet_map(sql_dir)
response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()
sheets = response.get('sheets', [])

for sheet in sheets:
sheet_name = sheet['properties']['title']
sheet_id = sheet['properties']['sheetId']
charts = sheet.get('charts', [])

sql_file = sql_map.get(sheet_name)
if not sql_file:
# Try to match case-insensitively or show warning
sql_file = "TODO.sql"

for chart in charts:
title = chart['spec'].get('title', 'Untitled Chart')
chart_id = chart['chartId']

# Slugify for image name
image_name = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') + ".png"

# Construct markup
markup = f"""{{{{ figure_markup(
image="{image_name}",
caption="{title}",
description="",
chart_url="https://docs.google.com/spreadsheets/d/e/{PUBCHART_ID}/pubchart?oid={chart_id}&format=interactive",
sheets_gid="{sheet_id}",
sql_file="{sql_file}"
)
}}}}"""
print(markup)
print()


if __name__ == "__main__":
# Resolve relative SQL_DIR based on script location
script_dir = os.path.dirname(os.path.abspath(__file__))
absolute_sql_dir = os.path.normpath(os.path.join(script_dir, SQL_DIR))

print(f"Processing Spreadsheet: {SPREADSHEET_ID}")
print(f"SQL Directory: {absolute_sql_dir}\n")

generate_figure_markup(SPREADSHEET_ID, absolute_sql_dir)
2 changes: 2 additions & 0 deletions sql/util/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
pandas==2.3.3
google-cloud-bigquery==3.40.0
google-api-python-client==2.188.0
google-auth==2.47.0
requests==2.32.5
tabulate==0.9.0
gspread==6.2.1
Expand Down
3 changes: 1 addition & 2 deletions src/config/2025.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@
"chapter_number": "8",
"title": "Privacy",
"slug": "privacy",
"hero_dir": "2020",
"todo": true
"hero_dir": "2020"
},
{
"part": "II",
Expand Down
21 changes: 20 additions & 1 deletion src/config/contributors.json
Original file line number Diff line number Diff line change
Expand Up @@ -3240,6 +3240,10 @@
"analysts",
"developers",
"authors"
],
"2025": [
"analysts",
"editors"
]
},
"website": "https://maxostapenko.com"
Expand Down Expand Up @@ -3769,7 +3773,8 @@
],
"2025": [
"leads",
"committee"
"committee",
"authors"
]
},
"twitter": "nrllah",
Expand Down Expand Up @@ -5151,6 +5156,7 @@
"name": "Vinod Tiwari",
"teams": {
"2025": [
"authors",
"reviewers"
]
},
Expand Down Expand Up @@ -5418,5 +5424,18 @@
]
},
"twitter": "_cybai"
},
"RumaisaHabib": {
"avatar_url": "66083065",
"github": "RumaisaHabib",
"name": "Rumaisa Habib",
"teams": {
"2025": [
"authors"
]
},
"website": "https://rumaisahabib.com/",
"linkedin": "rumaisahabib"
}

}
Loading
Loading