Fix GUC generator (#4291)

philkra · web-flow · commit 842488e894ee · 2025-07-15T14:17:39.000+02:00
* fix path to tooling

* add real values

* fixes and meta

* cosmetics and fixes
diff --git a/.github/workflows/tsdb-refresh-gucs-list.yaml b/.github/workflows/tsdb-refresh-gucs-list.yaml
@@ -25,8 +25,8 @@ jobs:
       
       - name: Update list of GUCs
         run: |
-          pip install -r ./helper-scripts/timescaledb/requirements.txt
-          python ./helper-scripts/timescaledb/generate_guc_overview.py "${{ github.event.inputs.tag }}" ./_partials/_timescaledb-gucs.md
+          pip install -r .helper-scripts/timescaledb/requirements.txt
+          python .helper-scripts/timescaledb/generate_guc_overview.py "${{ github.event.inputs.tag }}" ./_partials/_timescaledb-gucs.md
 
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v7
diff --git a/.helper-scripts/timescaledb/generate_guc_overview.py b/.helper-scripts/timescaledb/generate_guc_overview.py
@@ -20,10 +20,17 @@
 args = parser.parse_args()
 
 TYPES = {
-    "DefineCustomBoolVariable": "BOOLEAN",
-    "DefineCustomIntVariable": "INTEGER",
-    "DefineCustomEnumVariable": "ENUM",
+    "DefineCustomBoolVariable"  : "BOOLEAN",
+    "DefineCustomIntVariable"   : "INTEGER",
+    "DefineCustomEnumVariable"  : "ENUM",
     "DefineCustomStringVariable": "STRING",
+    "DefineCustomRealVariable"  : "REAL",
+}
+
+SCOPES = {
+    "PG16_GE"      : "Postgres 16 or greater",
+    "TS_DEBUG"     : "Debug mode",
+    "USE_TELEMETRY": "Telemetry enabled", 
 }
 
 # List of GUCs to exclude from the docs
@@ -62,8 +69,22 @@ def unwrap(gucs: list, guc_type: str) -> dict:
     map = {}
 
     for guc in gucs:
-        # sanitize data
-        it = [re.sub(r"[\n\t]*", "", v).strip() for v in guc.split(",")]
+        # unwrap element 
+        # first split on new line, then join on ,
+        lines = [re.sub(r"[\n\t]*", "", v).strip() for v in guc.split("\n")]
+        it = []
+        lst = []
+        for line in lines:
+            # ends with "," --> take all preceding values from the list
+            # concatenate them with this element minus the trailing ","
+            # and reset the list again
+            if line[-1:] == ",":
+                val = "".join(lst) + line[:-1]
+                lst = []
+                it.append(val)
+            else:
+                # add the line to the list to concatenate later
+                lst.append(line)
 
         # sanitize elements
         name = re.sub(r"[\"\(\)]*", "", it[0])
@@ -73,20 +94,19 @@ def unwrap(gucs: list, guc_type: str) -> dict:
         # Exclude GUCs (if specified)
         if name not in EXCLUDE:
             map[name] = {
-                "name": name,
+                "name"      : name,
                 "short_desc": short_desc,
-                "long_desc": long_desc,
-                "value": get_value(guc_type, it),
-                "type": guc_type,
-                "scopes": [], # assigned later during scope discovery
+                "long_desc" : long_desc,
+                "value"     : get_value(guc_type, it),
+                "meta"      : get_meta_data(guc_type, it),
+                "type"      : guc_type,
+                "scopes"    : [], # assigned later during scope discovery
             }
-
-    logging.info("registered %d GUCs of type: %s" % (len(map), guc_type))
     return map
 
 def sanitize_description(text) -> str:
     # Remove all quotes and normalize whitespace to single line
-    return ' '.join(text.replace('"', '').split()).strip()
+    return strip_comment_pattern(' '.join(text.replace('"', '').split()).strip())
 
 def strip_comment_pattern(text) -> str:
     pattern = r'/\*\s*[a-zA-Z0-9_]*=\s*\*/'
@@ -101,12 +121,21 @@ def get_value(type: str, parts: list) -> str:
     """
     Get the value of the GUC based on the type
     """
+    # ENUM needs different handling, finding the struct and the strings
+    # identifying the options
+
+    # Every other type
+    return strip_comment_pattern(parts[4]).strip()
+
+def get_meta_data(type: str, parts: list) -> str:
+    """
+    Build any meta data if present based on the type
+    """
     if type == "BOOLEAN":
-        if parts[5].upper()[0:4] == "PGC_":
-            return strip_comment_pattern(parts[4]).strip()
-        else:
-            return strip_comment_pattern(parts[5]).strip()
-    return strip_comment_pattern(parts[5]).strip()
+        return ""
+    if type in ["INTEGER", "REAL"]:
+        return "min: `%s`, max: `%s`" % (strip_comment_pattern(parts[5]).strip(), strip_comment_pattern(parts[6]).strip())
+    return ""
 
 """
 Parse GUCs and prepare them for rendering
@@ -118,26 +147,43 @@ def prepare(content: str) -> dict:
 
     # Find all GUCs based on patterns and prepare them in a dict
     for pattern, val in TYPES.items():
+        # Run twice to find variants, e.g., there is a nicer way with one regex to do this
+        # but this is not time sensitive nor consuming, so we're good
+        # - DefineCustomStringVariable(MAKE_EXTOPTION(
+        # - DefineCustomStringVariable(/* name= */ MAKE_EXTOPTION(
         map.update(unwrap(re.findall(r"%s\(MAKE_EXTOPTION(.*?)\);" % pattern, content, re.DOTALL), val))
+        map.update(unwrap(re.findall(r"%s\(\/\* name= \*\/ MAKE_EXTOPTION(.*?)\);" % pattern, content, re.DOTALL), val))
 
     # TODO: find scopes
     # https://github.com/timescale/timescaledb/blob/2.19.x/src/guc.c#L797
+    # SCOPES
 
+    # print summary
+    summary = {}
+    for v in map.values():
+        if v["type"] not in summary.keys():
+            summary[v["type"]] = 0
+        summary[v["type"]] += 1
+    for k, v in summary.items():
+        logging.info("registered %d GUCs of type: %s" % (v, k))
 
     # Return dict with alphabetically sorted keys
     return {i: map[i] for i in sorted(map.keys())}
 
 """
 Render the GUCs to file
 """
-def render(gucs: dict, filename: str):
+def render(gucs: dict, filename: str, version: str):
     with open(filename, "w") as f:
-        f.write("| Name | Type | Default | Long Description |\n")
-        f.write("| -- | -- | -- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n")
+        f.write("| Name | Type | Default | Description |\n")
+        f.write("| -- | -- | -- | -- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n")
         for guc in gucs.values():
-            f.write("| `%s` | `%s` | `%s` | %s |\n" % (
-                guc["name"], guc["type"], guc["value"], guc["long_desc"]
-            ))
+            desc = guc["long_desc"]
+            if guc["meta"] != "":
+                desc += "<br />" + guc["meta"] 
+            f.write("| `%s` | `%s` | `%s` | %s |\n" % (guc["name"], guc["type"], guc["value"], desc))
+        f.write("\n")
+        f.write("Version: [%s](https://github.com/timescale/timescaledb/releases/tag/%s)" % (version, version))
     logging.info("rendering completed to %s" % filename)
 
 """
@@ -147,6 +193,4 @@ def render(gucs: dict, filename: str):
     content = get_content("https://raw.githubusercontent.com/timescale/timescaledb/refs/tags/%s/src/guc.c" % args.tag)
     logging.info("fetched guc.c file for version: %s" % args.tag)
     gucs = prepare(content)
-    render(gucs, args.destination)
-
-#    print(gucs)
+    render(gucs, args.destination, args.tag)
diff --git a/_partials/_timescaledb-gucs.md b/_partials/_timescaledb-gucs.md