From 34237cbff18b46880883c827a9acaa2eccb3cee3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 1 Feb 2026 22:40:21 +0100 Subject: [PATCH 1/4] sort: use saturating_add to prevent potential overflow --- src/uu/sort/src/sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index cbde70a3f1f..cffa689f940 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1800,7 +1800,7 @@ fn emit_debug_warnings( show_error!("{}", translate!("sort-warning-simple-byte-comparison")); for (idx, selector) in settings.selectors.iter().enumerate() { - let key_index = idx + 1; + let key_index = idx.saturating_add(1); if let Some(legacy) = legacy_warnings .iter() .find(|warning| warning.key_index == Some(key_index)) From ef3a5036af07e6874f8938f4a4f1cfe66ca9442a Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 1 Feb 2026 22:41:02 +0100 Subject: [PATCH 2/4] sort: extract SI unit constants for better maintainability --- src/uu/sort/src/sort.rs | 42 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index cffa689f940..7d9a536fcd1 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -122,12 +122,20 @@ fn locale_decimal_pt() -> u8 { const NEGATIVE: &u8 = &b'-'; const POSITIVE: &u8 = &b'+'; +// SI unit constants for byte parsing +const KILO: usize = 1024; +const MEGA: usize = 1024 * 1024; +const GIGA: usize = 1024 * 1024 * 1024; +const TERA: usize = 1024 * 1024 * 1024 * 1024; +const PETA: usize = 1024 * 1024 * 1024 * 1024 * 1024; +const EXA: usize = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; + // The automatic buffer heuristics clamp to this range to avoid // over-committing memory on constrained systems while still keeping // reasonably large chunks for typical workloads. -const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * 1024; // 512 KiB -const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * 1024 * 1024; // 32 MiB -const MAX_AUTOMATIC_BUF_SIZE: usize = 1024 * 1024 * 1024; // 1 GiB +const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * KILO; // 512 KiB +const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * MEGA; // 32 MiB +const MAX_AUTOMATIC_BUF_SIZE: usize = GIGA; // 1 GiB #[derive(Debug, Error)] pub enum SortError { @@ -3184,24 +3192,24 @@ mod tests { fn test_parse_byte_count() { let valid_input = [ ("0", 0), - ("50K", 50 * 1024), - ("50k", 50 * 1024), - ("1M", 1024 * 1024), - ("100M", 100 * 1024 * 1024), + ("50K", 50 * KILO), + ("50k", 50 * KILO), + ("1M", MEGA), + ("100M", 100 * MEGA), #[cfg(not(target_pointer_width = "32"))] - ("1000G", 1000 * 1024 * 1024 * 1024), + ("1000G", 1000 * GIGA), #[cfg(not(target_pointer_width = "32"))] - ("10T", 10 * 1024 * 1024 * 1024 * 1024), + ("10T", 10 * TERA), ("1b", 1), - ("1024b", 1024), - ("1024Mb", 1024 * 1024 * 1024), // NOTE: This might not be how GNU `sort` behaves for 'Mb' - ("1", 1024), // K is default - ("50", 50 * 1024), - ("K", 1024), - ("k", 1024), - ("m", 1024 * 1024), + ("1024b", KILO), + ("1024Mb", KILO * MEGA), // NOTE: This might not be how GNU `sort` behaves for 'Mb' + ("1", KILO), // K is default + ("50", 50 * KILO), + ("K", KILO), + ("k", KILO), + ("m", MEGA), #[cfg(not(target_pointer_width = "32"))] - ("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024), + ("E", EXA), ]; for (input, expected_output) in &valid_input { assert_eq!( From 1aadae3fb50281f1453f5e5126733a66e77acff3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 1 Feb 2026 22:45:35 +0100 Subject: [PATCH 3/4] sort: define UTF8_NBSP and ISO_NBSP constants --- src/uu/sort/src/sort.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 7d9a536fcd1..abde13c453f 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -122,6 +122,10 @@ fn locale_decimal_pt() -> u8 { const NEGATIVE: &u8 = &b'-'; const POSITIVE: &u8 = &b'+'; +// Non-breaking space constants +const UTF8_NBSP: &[u8] = &[0xc2, 0xa0]; // UTF-8 encoding of non-breaking space (U+00A0) +const ISO_NBSP: u8 = 0xa0; // ISO 8859-1 non-breaking space + // SI unit constants for byte parsing const KILO: usize = 1024; const MEGA: usize = 1024 * 1024; From d8528e91384b0598b616d676a482718b6719fc19 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 1 Feb 2026 22:57:22 +0100 Subject: [PATCH 4/4] sort: deduplicate SI unit constants to shared uucore module --- .vscode/cspell.dictionaries/jargon.wordlist.txt | 4 ++++ src/uu/sort/src/buffer_hint.rs | 6 ++++-- src/uu/sort/src/chunks.rs | 5 +++-- src/uu/sort/src/ext_sort.rs | 5 +++-- src/uu/sort/src/sort.rs | 17 ++++------------- .../src/lib/features/parser/parse_size.rs | 8 ++++++++ 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 0eb8b360673..9e367d9caa3 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -243,3 +243,7 @@ Hijri Nowruz charmap hijri + +TERA +GIGA +PETA diff --git a/src/uu/sort/src/buffer_hint.rs b/src/uu/sort/src/buffer_hint.rs index bb0ea754094..164fb581372 100644 --- a/src/uu/sort/src/buffer_hint.rs +++ b/src/uu/sort/src/buffer_hint.rs @@ -9,6 +9,8 @@ use std::ffi::OsString; use crate::{ FALLBACK_AUTOMATIC_BUF_SIZE, MAX_AUTOMATIC_BUF_SIZE, MIN_AUTOMATIC_BUF_SIZE, STDIN_FILE, }; +#[cfg(test)] +use uucore::parser::parse_size::MEGA; // Heuristics to size the external sort buffer without overcommit memory. pub(crate) fn automatic_buffer_size(files: &[OsString]) -> usize { @@ -135,7 +137,7 @@ mod tests { #[test] fn desired_buffer_matches_total_when_small() { - let six_mebibytes = 6 * 1024 * 1024; + let six_mebibytes = 6 * MEGA; let expected = ((six_mebibytes as u128) * 12) .clamp(six_mebibytes as u128, crate::MAX_AUTOMATIC_BUF_SIZE as u128); assert_eq!(desired_file_buffer_bytes(six_mebibytes as u128), expected); @@ -143,7 +145,7 @@ mod tests { #[test] fn desired_buffer_caps_at_max_for_large_inputs() { - let large = 256 * 1024 * 1024; // 256 MiB + let large = 256 * MEGA; // 256 MiB assert_eq!( desired_file_buffer_bytes(large as u128), crate::MAX_AUTOMATIC_BUF_SIZE as u128 diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 61dbef73ba4..1a621ed6b98 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -22,8 +22,9 @@ use uucore::error::{UResult, USimpleError}; use crate::{ GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo, }; +use uucore::parser::parse_size::MEGA; -const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024; +const MAX_TOKEN_BUFFER_BYTES: usize = 4 * MEGA; const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / std::mem::size_of::>(); self_cell!( @@ -374,7 +375,7 @@ fn read_to_buffer( // We need to read more lines let len = buffer.len(); - let grow_by = (len / 2).max(1024 * 1024); + let grow_by = (len / 2).max(MEGA); buffer.resize(len + grow_by, 0); read_target = &mut buffer[len..]; } else { diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index d61f7d2008d..9f8f0749ce7 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -35,6 +35,7 @@ use crate::{ compare_by, merge, sort_by, }; use crate::{Line, print_sorted}; +use uucore::parser::parse_size::MEGA; // Note: update `test_sort::test_start_buffer` if this size is changed const START_BUFFER_SIZE: usize = 8_000; @@ -116,11 +117,11 @@ fn reader_writer< // Cap oversized buffer requests to avoid unnecessary allocations and give the automatic // heuristic room to grow when the user does not provide an explicit value. let mut buffer_size = match settings.buffer_size { - size if size <= 512 * 1024 * 1024 => size, + size if size <= 512 * MEGA => size, size => size / 2, }; if !settings.buffer_size_is_explicit { - buffer_size = buffer_size.max(8 * 1024 * 1024); + buffer_size = buffer_size.max(8 * MEGA); } let read_result: ReadResult = read_write_loop( files, diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index abde13c453f..619c21f34f0 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -52,6 +52,9 @@ use uucore::i18n::collator::locale_cmp; use uucore::i18n::decimal::locale_decimal_separator; use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; +#[cfg(test)] +use uucore::parser::parse_size::{EXA, TERA}; +use uucore::parser::parse_size::{GIGA, KILO, MEGA}; use uucore::parser::parse_size::{ParseSizeError, Parser}; use uucore::parser::shortcut_value_parser::ShortcutValueParser; use uucore::posix::{MODERN, TRADITIONAL}; @@ -122,18 +125,6 @@ fn locale_decimal_pt() -> u8 { const NEGATIVE: &u8 = &b'-'; const POSITIVE: &u8 = &b'+'; -// Non-breaking space constants -const UTF8_NBSP: &[u8] = &[0xc2, 0xa0]; // UTF-8 encoding of non-breaking space (U+00A0) -const ISO_NBSP: u8 = 0xa0; // ISO 8859-1 non-breaking space - -// SI unit constants for byte parsing -const KILO: usize = 1024; -const MEGA: usize = 1024 * 1024; -const GIGA: usize = 1024 * 1024 * 1024; -const TERA: usize = 1024 * 1024 * 1024 * 1024; -const PETA: usize = 1024 * 1024 * 1024 * 1024 * 1024; -const EXA: usize = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; - // The automatic buffer heuristics clamp to this range to avoid // over-committing memory on constrained systems while still keeping // reasonably large chunks for typical workloads. @@ -3207,7 +3198,7 @@ mod tests { ("1b", 1), ("1024b", KILO), ("1024Mb", KILO * MEGA), // NOTE: This might not be how GNU `sort` behaves for 'Mb' - ("1", KILO), // K is default + ("1", KILO), // K is default ("50", 50 * KILO), ("K", KILO), ("k", KILO), diff --git a/src/uucore/src/lib/features/parser/parse_size.rs b/src/uucore/src/lib/features/parser/parse_size.rs index 05c270e4cf8..9bb771d2ca1 100644 --- a/src/uucore/src/lib/features/parser/parse_size.rs +++ b/src/uucore/src/lib/features/parser/parse_size.rs @@ -6,6 +6,14 @@ //! Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes). +// SI unit constants for byte parsing (powers of 1024) +pub const KILO: usize = 1024; +pub const MEGA: usize = 1024 * 1024; +pub const GIGA: usize = 1024 * 1024 * 1024; +pub const TERA: usize = 1024 * 1024 * 1024 * 1024; +pub const PETA: usize = 1024 * 1024 * 1024 * 1024 * 1024; +pub const EXA: usize = 1024 * 1024 * 1024 * 1024 * 1024 * 1024; + use std::error::Error; use std::fmt; use std::num::{IntErrorKind, ParseIntError};