diff --git a/README.md b/README.md index fc576438..9ac6ad03 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ are organized `-o` (or `--output_folder`) is the folder in which to build the ROM pack +`-e` (or `--extras_folder`) is the folder to copy files to that are +not found in the db + `-m` (or `--missing`) is the text file that will list the ROMs missing in order to reach the 100% mark diff --git a/build_pack.py b/build_pack.py index 8ad5d6a7..10fca8e6 100644 --- a/build_pack.py +++ b/build_pack.py @@ -3,6 +3,7 @@ """ use a database to identify and organize files. """ +import copy import os import sys import shutil @@ -49,6 +50,11 @@ dest="output_folder", required=True, help="set output folder") + + parser.add_argument("-e", "--extra_files_folder", + dest="extras_folder", + required=False, + help="set output folder for files not found in db") parser.add_argument("-m", "--missing", dest="missing_files", @@ -63,6 +69,20 @@ "folder. Smart uses copy for first instance of " "a file and hardlinks to that first one for " "successive files.")) + + # Valid uses of this flag include: -fe, -fe true, -fe yes, --flatten_extra_files=1 + parser.add_argument("-fe", "--flatten_extra_files", + dest="flatten_extras", + default=False, + # nargs and const below allow us to accept the + # zero-argument form of --flatten_extra_files + nargs="?", + const=True, + type='bool', + help=("Dump all extra files into the top level of " + "the extras folder, appending a partial hash " + "if the names collide. This option has no " + "effect if --extra_files_folder is not set.")) # Valid uses of this flag include: -s, -s true, -s yes, --skip_existing=1 parser.add_argument("-s", "--skip_existing", @@ -240,10 +260,14 @@ def print_function(text, end, file=sys.stdout, flush=True): print(text, end=end, file=file, flush=flush) -def parse_folder(source_folder, db, output_folder): +def parse_folder(source_folder, db, output_folder, extras_folder): """ read each file, produce a hash value and place it in the directory tree. """ + # create a second copy of the database because we will + # be deleting from the original and we will need this to + # look for files not in the SMDB table + db2 = copy.deepcopy(db) i = 0 total = len([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(source_folder)) for f in fn]) @@ -287,7 +311,50 @@ def parse_folder(source_folder, db, output_folder): original) # remove the hit from the database del db[h] - + break + if extras_folder and h not in db2: + # the file is not in the SMDB hash table + + # preserve the subdirectory structure of the extra + # file in the extras directory + # os.path.relpath is necessary to prevent weird issues with certain paths + # such as placing the extra files in a parent directory + if not ARGS.flatten_extras_folder: + new_path = os.path.join(extras_folder, os.path.relpath(dirpath, start=source_folder)) + new_file = os.path.join(extras_folder, os.path.relpath(dirpath, start=source_folder), os.path.split(info['filename'])[-1]) + else: + new_path = extras_folder + new_file = os.path.join(extras_folder, f'{h[0:8]}_{os.path.split(info['filename'][-1])}') + + + if (not ARGS.skip_existing or not + os.path.exists(new_file)): + if info['archive']: + # extract file from archive to directory + try: + extract_file(info['filename'], + info['archive']['entry'], + info['archive']['type'], + new_file) + except FileNotFoundError: + # directory didn't exist so create it + os.makedirs(new_path, exist_ok=True) + extract_file(info['filename'], + info['archive']['entry'], + info['archive']['type'], + new_file) + else: + # copy the file to the new directory + try: + copy_file(info['filename'], + new_file, + new_file) + except FileNotFoundError: + # directory didn't exist so create it + os.makedirs(new_path, exist_ok=True) + copy_file(info['filename'], + new_file, + new_file) i += 1 print_progress(i, total, END_LINE) else: @@ -354,13 +421,14 @@ def get_hashes(filename): SOURCE_FOLDER = ARGS.source_folder TARGET_DATABASE = ARGS.target_database OUTPUT_FOLDER = ARGS.output_folder + EXTRAS_FOLDER = ARGS.extras_folder MISSING_FILES = ARGS.missing_files END_LINE = "\n" if ARGS.new_line else "\r" DROP_INITIAL_DIRECTORY = ARGS.drop_initial_directory DATABASE, NUMBER_OF_ENTRIES = parse_database(TARGET_DATABASE, DROP_INITIAL_DIRECTORY) - parse_folder(SOURCE_FOLDER, DATABASE, OUTPUT_FOLDER) + parse_folder(SOURCE_FOLDER, DATABASE, OUTPUT_FOLDER, EXTRAS_FOLDER) # Observed files will have either their SHA256 or their CRC32 entry # deleted (or both) from the database. For missing files, both entries