diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 0eb8b360673..9e367d9caa3 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -243,3 +243,7 @@ Hijri Nowruz charmap hijri + +TERA +GIGA +PETA diff --git a/src/uu/sort/src/buffer_hint.rs b/src/uu/sort/src/buffer_hint.rs index bb0ea754094..164fb581372 100644 --- a/src/uu/sort/src/buffer_hint.rs +++ b/src/uu/sort/src/buffer_hint.rs @@ -9,6 +9,8 @@ use std::ffi::OsString; use crate::{ FALLBACK_AUTOMATIC_BUF_SIZE, MAX_AUTOMATIC_BUF_SIZE, MIN_AUTOMATIC_BUF_SIZE, STDIN_FILE, }; +#[cfg(test)] +use uucore::parser::parse_size::MEGA; // Heuristics to size the external sort buffer without overcommit memory. pub(crate) fn automatic_buffer_size(files: &[OsString]) -> usize { @@ -135,7 +137,7 @@ mod tests { #[test] fn desired_buffer_matches_total_when_small() { - let six_mebibytes = 6 * 1024 * 1024; + let six_mebibytes = 6 * MEGA; let expected = ((six_mebibytes as u128) * 12) .clamp(six_mebibytes as u128, crate::MAX_AUTOMATIC_BUF_SIZE as u128); assert_eq!(desired_file_buffer_bytes(six_mebibytes as u128), expected); @@ -143,7 +145,7 @@ mod tests { #[test] fn desired_buffer_caps_at_max_for_large_inputs() { - let large = 256 * 1024 * 1024; // 256 MiB + let large = 256 * MEGA; // 256 MiB assert_eq!( desired_file_buffer_bytes(large as u128), crate::MAX_AUTOMATIC_BUF_SIZE as u128 diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 61dbef73ba4..1a621ed6b98 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -22,8 +22,9 @@ use uucore::error::{UResult, USimpleError}; use crate::{ GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo, }; +use uucore::parser::parse_size::MEGA; -const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024; +const MAX_TOKEN_BUFFER_BYTES: usize = 4 * MEGA; const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / std::mem::size_of::>(); self_cell!( @@ -374,7 +375,7 @@ fn read_to_buffer( // We need to read more lines let len = buffer.len(); - let grow_by = (len / 2).max(1024 * 1024); + let grow_by = (len / 2).max(MEGA); buffer.resize(len + grow_by, 0); read_target = &mut buffer[len..]; } else { diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index d61f7d2008d..9f8f0749ce7 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -35,6 +35,7 @@ use crate::{ compare_by, merge, sort_by, }; use crate::{Line, print_sorted}; +use uucore::parser::parse_size::MEGA; // Note: update `test_sort::test_start_buffer` if this size is changed const START_BUFFER_SIZE: usize = 8_000; @@ -116,11 +117,11 @@ fn reader_writer< // Cap oversized buffer requests to avoid unnecessary allocations and give the automatic // heuristic room to grow when the user does not provide an explicit value. let mut buffer_size = match settings.buffer_size { - size if size <= 512 * 1024 * 1024 => size, + size if size <= 512 * MEGA => size, size => size / 2, }; if !settings.buffer_size_is_explicit { - buffer_size = buffer_size.max(8 * 1024 * 1024); + buffer_size = buffer_size.max(8 * MEGA); } let read_result: ReadResult = read_write_loop( files, diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index cbde70a3f1f..619c21f34f0 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -52,6 +52,9 @@ use uucore::i18n::collator::locale_cmp; use uucore::i18n::decimal::locale_decimal_separator; use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; +#[cfg(test)] +use uucore::parser::parse_size::{EXA, TERA}; +use uucore::parser::parse_size::{GIGA, KILO, MEGA}; use uucore::parser::parse_size::{ParseSizeError, Parser}; use uucore::parser::shortcut_value_parser::ShortcutValueParser; use uucore::posix::{MODERN, TRADITIONAL}; @@ -125,9 +128,9 @@ const POSITIVE: &u8 = &b'+'; // The automatic buffer heuristics clamp to this range to avoid // over-committing memory on constrained systems while still keeping // reasonably large chunks for typical workloads. -const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * 1024; // 512 KiB -const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * 1024 * 1024; // 32 MiB -const MAX_AUTOMATIC_BUF_SIZE: usize = 1024 * 1024 * 1024; // 1 GiB +const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * KILO; // 512 KiB +const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * MEGA; // 32 MiB +const MAX_AUTOMATIC_BUF_SIZE: usize = GIGA; // 1 GiB #[derive(Debug, Error)] pub enum SortError { @@ -1800,7 +1803,7 @@ fn emit_debug_warnings( show_error!("{}", translate!("sort-warning-simple-byte-comparison")); for (idx, selector) in settings.selectors.iter().enumerate() { - let key_index = idx + 1; + let key_index = idx.saturating_add(1); if let Some(legacy) = legacy_warnings .iter() .find(|warning| warning.key_index == Some(key_index)) @@ -3184,24 +3187,24 @@ mod tests { fn test_parse_byte_count() { let valid_input = [ ("0", 0), - ("50K", 50 * 1024), - ("50k", 50 * 1024), - ("1M", 1024 * 1024), - ("100M", 100 * 1024 * 1024), + ("50K", 50 * KILO), + ("50k", 50 * KILO), + ("1M", MEGA), + ("100M", 100 * MEGA), #[cfg(not(target_pointer_width = "32"))] - ("1000G", 1000 * 1024 * 1024 * 1024), + ("1000G", 1000 * GIGA), #[cfg(not(target_pointer_width = "32"))] - ("10T", 10 * 1024 * 1024 * 1024 * 1024), + ("10T", 10 * TERA), ("1b", 1), - ("1024b", 1024), - ("1024Mb", 1024 * 1024 * 1024), // NOTE: This might not be how GNU `sort` behaves for 'Mb' - ("1", 1024), // K is default - ("50", 50 * 1024), - ("K", 1024), - ("k", 1024), - ("m", 1024 * 1024), + ("1024b", KILO), + ("1024Mb", KILO * MEGA), // NOTE: This might not be how GNU `sort` behaves for 'Mb' + ("1", KILO), // K is default + ("50", 50 * KILO), + ("K", KILO), + ("k", KILO), + ("m", MEGA), #[cfg(not(target_pointer_width = "32"))] - ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("E", EXA), ]; for (input, expected_output) in &valid_input { assert_eq!( diff --git a/src/uucore/src/lib/features/parser/parse_size.rs b/src/uucore/src/lib/features/parser/parse_size.rs index 05c270e4cf8..9bb771d2ca1 100644 --- a/src/uucore/src/lib/features/parser/parse_size.rs +++ b/src/uucore/src/lib/features/parser/parse_size.rs @@ -6,6 +6,14 @@ //! Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). +// SI unit constants for byte parsing (powers of 1024) +pub const KILO: usize = 1024; +pub const MEGA: usize = 1024 * 1024; +pub const GIGA: usize = 1024 * 1024 * 1024; +pub const TERA: usize = 1024 * 1024 * 1024 * 1024; +pub const PETA: usize = 1024 * 1024 * 1024 * 1024 * 1024; +pub const EXA: usize = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; + use std::error::Error; use std::fmt; use std::num::{IntErrorKind, ParseIntError};