Skip to content

Commit 5f228f8

Browse files
Merge pull request #11 from rabix/feature/image_support
Feature/image support
2 parents a491233 + fa500fc commit 5f228f8

File tree

5 files changed

+437
-152
lines changed

5 files changed

+437
-152
lines changed

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
PyYAML<7.0.0
2-
packaging
2+
packaging
3+
pillow >= 11.0.0
4+
beautifulsoup4 >= 4.11.2

wrabbit/parser/constants.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ class EXTENSIONS:
1919
# ############################ CWL Standard Bits ############################ #
2020
# A generic SB input array of files that should be available on the
2121
# instance but are not explicitly provided to the execution as wdl params.
22+
23+
# This looks best on the platform
24+
MAX_APP_DESCRIPTION_IMAGE_WIDTH = 1000
25+
2226
SAMPLE_SHEET_FUNCTION = read_js_template("sample_sheet_generator.js")
2327
SAMPLE_SHEET_SWITCH = read_js_template("sample_sheet_switch.js")
2428
SB_SAMPLES_SCHEMA_DEFAULT_NAME = "samplesheet_schema.yaml"
@@ -136,6 +140,9 @@ def sample_sheet(
136140
# ############################## Nextflow Bits ############################## #
137141
# Keys that should be skipped when parsing nextflow tower yaml file
138142

143+
REGEX_NF_VERSION_PIL = r"\[Nextflow]\([^(]+(%E2%89%A5|%E2%89%A4|=|>|<)(\d{2}\.\d+\.\d+)[^)]+\)"
144+
REGEX_NF_VERSION_NUM = r"((?:[!><=]+|))(\d{2}\.\d+\.\d+)((?:\+|))"
145+
139146
NF_CONFIG_DEFAULT_NAME = 'nextflow.config'
140147
NF_SCHEMA_DEFAULT_NAME = 'nextflow_schema.json'
141148
SB_SCHEMA_DEFAULT_NAME = 'sb_nextflow_schema'
@@ -176,3 +183,25 @@ class ExecMode(Enum):
176183

177184
def __str__(self):
178185
return self.value
186+
187+
# ############################ Image generation ############################# #
188+
# This part contains constants related to image generation for Markdown
189+
190+
# Find Markdown images
191+
REGEX_MD_IMAGE = r'((?:!|)\[([^\[\]]+)]\(([^\[\]\(\)]+\.((?:jpe?g|png)))({opt}#gh-{image_mode}-mode-only|)((?:#sbg_.*|))\))'
192+
# Find <p><img> in HTML
193+
REGEX_HTML_IMAGE = r'(?:<p(?:[^>]+)>.*?)<img[^>]+>(?:.*?</p>)'
194+
# Find <h><picture> in HTML
195+
REGEX_HTML_PICTURE = r'(?:<h\d>).*?<picture>.*?</picture>.*?(?:</h\d>)'
196+
197+
198+
class ImageMode(Enum):
199+
light = 'light'
200+
dark = 'dark'
201+
202+
@property
203+
def opposite(self):
204+
return self.light if self.value == self.dark else self.dark
205+
206+
def __str__(self):
207+
return self.value

wrabbit/parser/nextflow.py

Lines changed: 35 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import (
22
Union,
33
Optional,
4-
TextIO,
54
)
65

76
from wrabbit.parser.utils import (
@@ -13,16 +12,18 @@
1312
create_profile_enum,
1413
nf_to_sb_input_mapper,
1514
get_tower_yml,
16-
get_dict_depth,
15+
parse_output_yml,
1716
get_entrypoint,
1817
get_docs_file,
1918
get_sample_sheet_schema,
2019
find_publish_params,
20+
convert_images_to_md,
2121
)
2222

2323
from wrabbit.parser.constants import (
2424
sample_sheet,
2525
ExecMode,
26+
ImageMode,
2627
SB_SCHEMA_DEFAULT_NAME,
2728
EXTENSIONS,
2829
NF_TO_CWL_CATEGORY_MAP,
@@ -34,19 +35,13 @@
3435
SAMPLE_SHEET_FILE_ARRAY_INPUT,
3536
SAMPLE_SHEET_SWITCH,
3637
LOAD_LISTING_REQUIREMENT,
37-
SKIP_NEXTFLOW_TOWER_KEYS,
3838
NFCORE_OUTPUT_DIRECTORY_ID,
3939
)
4040

41-
from wrabbit.specification.node import (
42-
RecordType,
43-
FileType,
44-
DirectoryType,
45-
ArrayType,
46-
)
4741
from wrabbit.specification.hints import (
4842
NextflowExecutionMode,
4943
)
44+
5045
from wrabbit.specification.sbg import (
5146
ExecutorVersion
5247
)
@@ -57,7 +52,6 @@
5752
)
5853

5954
import yaml
60-
import re
6155
import os
6256
import json
6357

@@ -77,18 +71,33 @@ def __init__(
7771
entrypoint: Optional[str] = None,
7872
executor_version: Optional[str] = None,
7973
sb_package_id: Optional[str] = None,
74+
readme_path: Optional[str] = None,
75+
search_subfolders: Optional[bool] = False,
76+
image_mode: Optional[ImageMode] = ImageMode.light
8077
):
78+
self.search_subfolders = search_subfolders
8179
self.sb_wrapper = SbWrapper(label)
8280
self.workflow_path = workflow_path
8381

8482
# Locate nextflow files in the package if possible
8583
self.init_config_files()
86-
self.nf_schema_path = get_nf_schema(self.workflow_path)
87-
self.readme_path = get_docs_file(self.workflow_path)
84+
self.nf_schema_path = get_nf_schema(
85+
self.workflow_path, search_subfolders=self.search_subfolders
86+
)
87+
88+
if readme_path:
89+
self.readme_path = readme_path
90+
else:
91+
self.readme_path = get_docs_file(
92+
self.workflow_path, search_subfolders=self.search_subfolders
93+
)
94+
8895
self.sb_samplesheet_schema = get_sample_sheet_schema(
89-
self.workflow_path)
96+
self.workflow_path, search_subfolders=self.search_subfolders
97+
)
9098

9199
self.sb_doc = sb_doc
100+
self.image_mode = image_mode
92101

93102
# app contents
94103
self.entrypoint = entrypoint
@@ -100,7 +109,9 @@ def init_config_files(self):
100109
Config may be initialized multiple times while working with a code
101110
package in case a new config file is generated with nf-core lib.
102111
"""
103-
self.nf_config_files = get_config_files(self.workflow_path) or []
112+
self.nf_config_files = get_config_files(
113+
self.workflow_path, search_subfolders=self.search_subfolders
114+
) or []
104115

105116
def generate_sb_inputs(self, execution_mode=None):
106117
"""
@@ -230,8 +241,10 @@ def generate_sb_outputs(self):
230241
Generate SB output schema
231242
"""
232243
if get_tower_yml(self.workflow_path):
233-
for output in self.parse_output_yml(
234-
open(get_tower_yml(self.workflow_path))
244+
for output in parse_output_yml(
245+
open(get_tower_yml(
246+
self.workflow_path, search_subfolders=True
247+
))
235248
):
236249
self.sb_wrapper.safe_add_output(output)
237250

@@ -274,7 +287,9 @@ def generate_app_data(self):
274287
break
275288

276289
if not self.entrypoint:
277-
self.entrypoint = get_entrypoint(self.workflow_path)
290+
self.entrypoint = get_entrypoint(
291+
self.workflow_path, search_subfolders=self.search_subfolders
292+
)
278293

279294
if not self.executor_version and self.sb_doc:
280295
self.executor_version = get_executor_version(self.sb_doc)
@@ -301,7 +316,7 @@ def nf_schema_build(self):
301316
pass
302317

303318
def generate_sb_app(
304-
self, sb_entrypoint='main.nf',
319+
self, sb_entrypoint: Optional[str] = None,
305320
executor_version: Optional[str] = None,
306321
sb_package_id: Optional[str] = None,
307322
execution_mode: Optional[Union[str, ExecMode]] = None,
@@ -338,8 +353,8 @@ def generate_sb_app(
338353
if self.sb_doc:
339354
self.sb_wrapper.add_docs(self.sb_doc)
340355
elif self.readme_path:
341-
with open(self.readme_path, 'r') as docs:
342-
self.sb_wrapper.add_docs(docs.read())
356+
docs = convert_images_to_md(self.readme_path, self.image_mode)
357+
self.sb_wrapper.add_docs(docs)
343358

344359
def parse_sample_sheet_schema(self, path):
345360
"""
@@ -466,111 +481,6 @@ def parse_sample_sheet_schema(self, path):
466481
self.sb_wrapper.add_requirement(INLINE_JS_REQUIREMENT)
467482
self.sb_wrapper.add_requirement(LOAD_LISTING_REQUIREMENT)
468483

469-
def make_output_type(self, key, output_dict, is_record=False) -> dict:
470-
"""
471-
This creates an output of specific type based on information provided
472-
through output_dict.
473-
474-
:param key:
475-
:param output_dict:
476-
:param is_record:
477-
:return:
478-
"""
479-
480-
converted_cwl_output = dict()
481-
482-
file_pattern = re.compile(r'.*\.(\w+)$')
483-
folder_pattern = re.compile(r'[^.]+$')
484-
id_key = 'id'
485-
486-
if is_record:
487-
id_key = 'name'
488-
489-
name = key
490-
if 'display' in output_dict:
491-
name = output_dict['display']
492-
493-
clean_id = re.sub(r'[^a-zA-Z0-9_]', "", name.replace(
494-
" ", "_")).lower()
495-
496-
# Case 1: Output is a Record-type
497-
if get_dict_depth(output_dict) > 0:
498-
# this is a record, go through the dict_ recursively
499-
fields = [self.make_output_type(key, val, is_record=True)
500-
for key, val in output_dict.items()]
501-
502-
used_field_ids = set()
503-
504-
for field in fields:
505-
base_field_id = field.get('name', 'Output')
506-
507-
# Since name fields can be the same for multiple inputs,
508-
# correct the name if it has already been used.
509-
chk_id = base_field_id
510-
i = 1
511-
if chk_id in used_field_ids:
512-
chk_id = f"{base_field_id}_{i}"
513-
i += 1
514-
used_field_ids.add(chk_id)
515-
516-
field['name'] = chk_id
517-
518-
converted_cwl_output = {
519-
id_key: clean_id,
520-
"label": name,
521-
"type": RecordType(fields=fields, name=clean_id, optional=True)
522-
}
523-
524-
# Case 2: Output is a File type
525-
elif re.fullmatch(file_pattern, key):
526-
# create a list of files output
527-
converted_cwl_output = {
528-
id_key: clean_id,
529-
"label": name,
530-
"type": ArrayType(items=[FileType()], optional=True),
531-
"outputBinding": {
532-
"glob": key
533-
}
534-
}
535-
536-
# Case 3: Output is a folder type
537-
elif re.fullmatch(folder_pattern, key):
538-
# create a list of directories output
539-
converted_cwl_output = {
540-
id_key: clean_id,
541-
"label": name,
542-
"type": DirectoryType(optional=True),
543-
"outputBinding": {
544-
"glob": key,
545-
"loadListing": "deep_listing"
546-
}
547-
}
548-
return converted_cwl_output
549-
550-
def parse_output_yml(self, yml_file: TextIO) -> list:
551-
"""
552-
Extracts output information from a YAML file, usually in tower.yml
553-
format.
554-
555-
:param yml_file: path to YAML file.
556-
:return: list of outputs in CWL format.
557-
"""
558-
outputs = list()
559-
yml_schema = yaml.safe_load(yml_file)
560-
561-
for key, value in yml_schema.items():
562-
# Tower yml file can use "tower" key in the yml file to designate
563-
# some configurations tower uses. Since these are not output
564-
# definitions, we skip these.
565-
if key in SKIP_NEXTFLOW_TOWER_KEYS:
566-
continue
567-
568-
outputs.append(
569-
self.make_output_type(key, value)
570-
)
571-
572-
return outputs
573-
574484
def dump_sb_wrapper(self, out_format=EXTENSIONS.yaml):
575485
"""
576486
Dump SB wrapper for nextflow workflow to a file

0 commit comments

Comments
 (0)