From a6b62c8cea16b49d20a34a0e6b03dd61ed2434ed Mon Sep 17 00:00:00 2001 From: David Calawa Date: Sat, 25 Mar 2023 15:52:16 -0700 Subject: [PATCH 1/5] copy files not in db to extras folder --- build_pack.py | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/build_pack.py b/build_pack.py index 8ad5d6a7..76ea3d3b 100644 --- a/build_pack.py +++ b/build_pack.py @@ -3,6 +3,7 @@ """ use a database to identify and organize files. """ +import copy import os import sys import shutil @@ -49,6 +50,11 @@ dest="output_folder", required=True, help="set output folder") + + parser.add_argument("-e", "--extra_files_folder", + dest="extras_folder", + required=False, + help="set output folder for files not found in db") parser.add_argument("-m", "--missing", dest="missing_files", @@ -240,10 +246,11 @@ def print_function(text, end, file=sys.stdout, flush=True): print(text, end=end, file=file, flush=flush) -def parse_folder(source_folder, db, output_folder): +def parse_folder(source_folder, db, output_folder, extras_folder): """ read each file, produce a hash value and place it in the directory tree. """ + db2 = copy.deepcopy(db) i = 0 total = len([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(source_folder)) for f in fn]) @@ -287,7 +294,34 @@ def parse_folder(source_folder, db, output_folder): original) # remove the hit from the database del db[h] - + break + if extras_folder and h not in db2: + new_path = os.path.join(extras_folder) + # create directory structure if need be + if not os.path.exists(new_path): + os.makedirs(new_path, exist_ok=True) + new_file = os.path.join(extras_folder, os.path.split(info['filename'])[-1]) + if os.path.exists(new_file): + # add a folder with part of the hash in case of non duplicate file + # todo: compare hashes + new_path = os.path.join(extras_folder, h[0:4]) + # create directory structure if need be + if not os.path.exists(new_path): + os.makedirs(new_path, exist_ok=True) + new_file = os.path.join(extras_folder, h[0:4], os.path.split(info['filename'])[-1]) + if (not ARGS.skip_existing or not + os.path.exists(new_file)): + if info['archive']: + # extract file from archive to directory + extract_file(info['filename'], + info['archive']['entry'], + info['archive']['type'], + new_file) + else: + # copy the file to the new directory + copy_file(info['filename'], + new_file, + new_file) i += 1 print_progress(i, total, END_LINE) else: @@ -354,13 +388,14 @@ def get_hashes(filename): SOURCE_FOLDER = ARGS.source_folder TARGET_DATABASE = ARGS.target_database OUTPUT_FOLDER = ARGS.output_folder + EXTRAS_FOLDER = ARGS.extras_folder MISSING_FILES = ARGS.missing_files END_LINE = "\n" if ARGS.new_line else "\r" DROP_INITIAL_DIRECTORY = ARGS.drop_initial_directory DATABASE, NUMBER_OF_ENTRIES = parse_database(TARGET_DATABASE, DROP_INITIAL_DIRECTORY) - parse_folder(SOURCE_FOLDER, DATABASE, OUTPUT_FOLDER) + parse_folder(SOURCE_FOLDER, DATABASE, OUTPUT_FOLDER, EXTRAS_FOLDER) # Observed files will have either their SHA256 or their CRC32 entry # deleted (or both) from the database. For missing files, both entries From 520d56f4d7a52cbefd14a1002702e2824e3e09e4 Mon Sep 17 00:00:00 2001 From: David Calawa Date: Sat, 25 Mar 2023 16:03:20 -0700 Subject: [PATCH 2/5] doc update --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index fc576438..9ac6ad03 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ are organized `-o` (or `--output_folder`) is the folder in which to build the ROM pack +`-e` (or `--extras_folder`) is the folder to copy files to that are +not found in the db + `-m` (or `--missing`) is the text file that will list the ROMs missing in order to reach the 100% mark From 7f95e4bbc1366a843a0f8ddf157e1934c8812fc9 Mon Sep 17 00:00:00 2001 From: David Calawa Date: Mon, 16 Oct 2023 11:42:42 -0700 Subject: [PATCH 3/5] requested changes --- build_pack.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/build_pack.py b/build_pack.py index 76ea3d3b..78f1d8d3 100644 --- a/build_pack.py +++ b/build_pack.py @@ -298,28 +298,41 @@ def parse_folder(source_folder, db, output_folder, extras_folder): if extras_folder and h not in db2: new_path = os.path.join(extras_folder) # create directory structure if need be - if not os.path.exists(new_path): - os.makedirs(new_path, exist_ok=True) - new_file = os.path.join(extras_folder, os.path.split(info['filename'])[-1]) - if os.path.exists(new_file): - # add a folder with part of the hash in case of non duplicate file - # todo: compare hashes - new_path = os.path.join(extras_folder, h[0:4]) - # create directory structure if need be - if not os.path.exists(new_path): - os.makedirs(new_path, exist_ok=True) - new_file = os.path.join(extras_folder, h[0:4], os.path.split(info['filename'])[-1]) + # if not os.path.exists(new_path): + # os.makedirs(new_path, exist_ok=True) + # new_file = os.path.join(extras_folder, os.path.split(info['filename'])[-1]) + # if os.path.exists(new_file): + # add a folder with part of the hash in case of non duplicate file + # todo: compare hashes + new_path = os.path.join(extras_folder, dirpath) + # create directory structure if need be + # if not os.path.exists(new_path): + # os.makedirs(new_path, exist_ok=True) + new_file = os.path.join(extras_folder, dirpath, os.path.split(info['filename'])[-1]) if (not ARGS.skip_existing or not os.path.exists(new_file)): if info['archive']: # extract file from archive to directory - extract_file(info['filename'], + try: + extract_file(info['filename'], + info['archive']['entry'], + info['archive']['type'], + new_file) + except FileNotFoundError: + os.makedirs(new_path, exist_ok=True) + extract_file(info['filename'], info['archive']['entry'], info['archive']['type'], new_file) else: # copy the file to the new directory - copy_file(info['filename'], + try: + copy_file(info['filename'], + new_file, + new_file) + except FileNotFoundError: + os.makedirs(new_path, exist_ok=True) + copy_file(info['filename'], new_file, new_file) i += 1 From 04a6b15cced3c30b9a8d5b2300e7184e27e12367 Mon Sep 17 00:00:00 2001 From: David Calawa Date: Mon, 16 Oct 2023 11:51:17 -0700 Subject: [PATCH 4/5] forgot to save some changes --- build_pack.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/build_pack.py b/build_pack.py index 78f1d8d3..20eca34a 100644 --- a/build_pack.py +++ b/build_pack.py @@ -250,6 +250,9 @@ def parse_folder(source_folder, db, output_folder, extras_folder): """ read each file, produce a hash value and place it in the directory tree. """ + # create a second copy of the database because we will + # be deleting from the original and we will need this to + # look for files not in the SMDB table db2 = copy.deepcopy(db) i = 0 total = len([os.path.join(dp, f) for dp, dn, fn in @@ -296,18 +299,11 @@ def parse_folder(source_folder, db, output_folder, extras_folder): del db[h] break if extras_folder and h not in db2: - new_path = os.path.join(extras_folder) - # create directory structure if need be - # if not os.path.exists(new_path): - # os.makedirs(new_path, exist_ok=True) - # new_file = os.path.join(extras_folder, os.path.split(info['filename'])[-1]) - # if os.path.exists(new_file): - # add a folder with part of the hash in case of non duplicate file - # todo: compare hashes + # the file is not in the SMDB hash table + + # preserve the subdirectory structure of the extra + # file in the extras directory new_path = os.path.join(extras_folder, dirpath) - # create directory structure if need be - # if not os.path.exists(new_path): - # os.makedirs(new_path, exist_ok=True) new_file = os.path.join(extras_folder, dirpath, os.path.split(info['filename'])[-1]) if (not ARGS.skip_existing or not os.path.exists(new_file)): @@ -319,6 +315,7 @@ def parse_folder(source_folder, db, output_folder, extras_folder): info['archive']['type'], new_file) except FileNotFoundError: + # directory didn't exist so create it os.makedirs(new_path, exist_ok=True) extract_file(info['filename'], info['archive']['entry'], @@ -331,6 +328,7 @@ def parse_folder(source_folder, db, output_folder, extras_folder): new_file, new_file) except FileNotFoundError: + # directory didn't exist so create it os.makedirs(new_path, exist_ok=True) copy_file(info['filename'], new_file, From 1ddf7771a4193b5111e903470c33fa51b05f6f93 Mon Sep 17 00:00:00 2001 From: David Calawa Date: Thu, 25 Sep 2025 12:31:04 -0700 Subject: [PATCH 5/5] add choice to flatten extras folder or preserve original folder structure in extras folder --- build_pack.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/build_pack.py b/build_pack.py index 20eca34a..10fca8e6 100644 --- a/build_pack.py +++ b/build_pack.py @@ -69,6 +69,20 @@ "folder. Smart uses copy for first instance of " "a file and hardlinks to that first one for " "successive files.")) + + # Valid uses of this flag include: -fe, -fe true, -fe yes, --flatten_extra_files=1 + parser.add_argument("-fe", "--flatten_extra_files", + dest="flatten_extras", + default=False, + # nargs and const below allow us to accept the + # zero-argument form of --flatten_extra_files + nargs="?", + const=True, + type='bool', + help=("Dump all extra files into the top level of " + "the extras folder, appending a partial hash " + "if the names collide. This option has no " + "effect if --extra_files_folder is not set.")) # Valid uses of this flag include: -s, -s true, -s yes, --skip_existing=1 parser.add_argument("-s", "--skip_existing", @@ -303,8 +317,16 @@ def parse_folder(source_folder, db, output_folder, extras_folder): # preserve the subdirectory structure of the extra # file in the extras directory - new_path = os.path.join(extras_folder, dirpath) - new_file = os.path.join(extras_folder, dirpath, os.path.split(info['filename'])[-1]) + # os.path.relpath is necessary to prevent weird issues with certain paths + # such as placing the extra files in a parent directory + if not ARGS.flatten_extras_folder: + new_path = os.path.join(extras_folder, os.path.relpath(dirpath, start=source_folder)) + new_file = os.path.join(extras_folder, os.path.relpath(dirpath, start=source_folder), os.path.split(info['filename'])[-1]) + else: + new_path = extras_folder + new_file = os.path.join(extras_folder, f'{h[0:8]}_{os.path.split(info['filename'][-1])}') + + if (not ARGS.skip_existing or not os.path.exists(new_file)): if info['archive']: