File tree Expand file tree Collapse file tree 3 files changed +5
-10
lines changed
Expand file tree Collapse file tree 3 files changed +5
-10
lines changed Original file line number Diff line number Diff line change 257257STREAMING_OPEN_RETRY_INTERVAL = 5
258258
259259# Datasets repositories exploration
260- DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 200
261- GLOBBED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 10
262- ARCHIVED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 200
260+ ARCHIVES_MAX_NUMBER_FOR_MODULE_INFERENCE = 10
263261
264262# Async map functions
265263MAX_NUM_RUNNING_ASYNC_MAP_FUNCTIONS_IN_PARALLEL = 1000
Original file line number Diff line number Diff line change @@ -218,7 +218,7 @@ def infer_module_for_data_files_list(
218218 """
219219 extensions_counter = Counter (
220220 ("." + suffix .lower (), xbasename (filepath ) in FolderBasedBuilder .METADATA_FILENAMES )
221- for filepath in data_files_list [: config . DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE ]
221+ for filepath in data_files_list
222222 for suffix in xbasename (filepath ).split ("." )[1 :]
223223 )
224224 if extensions_counter :
@@ -255,14 +255,11 @@ def infer_module_for_data_files_list_in_archives(
255255 for filepath in data_files_list :
256256 if str (filepath ).endswith (".zip" ):
257257 archive_files_counter += 1
258- if archive_files_counter > config .GLOBBED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE :
258+ if archive_files_counter > config .ARCHIVES_MAX_NUMBER_FOR_MODULE_INFERENCE :
259259 break
260260 extracted = xjoin (StreamingDownloadManager ().extract (filepath ), "**" )
261261 archived_files += [
262- f .split ("::" )[0 ]
263- for f in xglob (extracted , recursive = True , download_config = download_config )[
264- : config .ARCHIVED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE
265- ]
262+ f .split ("::" )[0 ] for f in xglob (extracted , recursive = True , download_config = download_config )
266263 ]
267264 extensions_counter = Counter (
268265 "." + suffix .lower () for filepath in archived_files for suffix in xbasename (filepath ).split ("." )[1 :]
Original file line number Diff line number Diff line change @@ -1926,7 +1926,7 @@ def test_filter(self, in_memory):
19261926 with self ._create_dummy_dataset (in_memory , tmp_dir ) as dset :
19271927 dset .set_format ("numpy" )
19281928 fingerprint = dset ._fingerprint
1929- with dset .filter (lambda x : ( int (x ["filename" ][- 1 ]) % 2 == 0 ) ) as dset_filter_even_num :
1929+ with dset .filter (lambda x : int (x ["filename" ][- 1 ]) % 2 == 0 ) as dset_filter_even_num :
19301930 self .assertEqual (len (dset_filter_even_num ), 15 )
19311931 self .assertDictEqual (dset .features , Features ({"filename" : Value ("string" )}))
19321932 self .assertDictEqual (dset_filter_even_num .features , Features ({"filename" : Value ("string" )}))
You can’t perform that action at this time.
0 commit comments