Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .vscode/cspell.dictionaries/jargon.wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,7 @@ Hijri
Nowruz
charmap
hijri

TERA
GIGA
PETA
6 changes: 4 additions & 2 deletions src/uu/sort/src/buffer_hint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::ffi::OsString;
use crate::{
FALLBACK_AUTOMATIC_BUF_SIZE, MAX_AUTOMATIC_BUF_SIZE, MIN_AUTOMATIC_BUF_SIZE, STDIN_FILE,
};
#[cfg(test)]
use uucore::parser::parse_size::MEGA;

// Heuristics to size the external sort buffer without overcommit memory.
pub(crate) fn automatic_buffer_size(files: &[OsString]) -> usize {
Expand Down Expand Up @@ -135,15 +137,15 @@ mod tests {

#[test]
fn desired_buffer_matches_total_when_small() {
let six_mebibytes = 6 * 1024 * 1024;
let six_mebibytes = 6 * MEGA;
let expected = ((six_mebibytes as u128) * 12)
.clamp(six_mebibytes as u128, crate::MAX_AUTOMATIC_BUF_SIZE as u128);
assert_eq!(desired_file_buffer_bytes(six_mebibytes as u128), expected);
}

#[test]
fn desired_buffer_caps_at_max_for_large_inputs() {
let large = 256 * 1024 * 1024; // 256 MiB
let large = 256 * MEGA; // 256 MiB
assert_eq!(
desired_file_buffer_bytes(large as u128),
crate::MAX_AUTOMATIC_BUF_SIZE as u128
Expand Down
5 changes: 3 additions & 2 deletions src/uu/sort/src/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ use uucore::error::{UResult, USimpleError};
use crate::{
GeneralBigDecimalParseResult, GlobalSettings, Line, SortMode, numeric_str_cmp::NumInfo,
};
use uucore::parser::parse_size::MEGA;

const MAX_TOKEN_BUFFER_BYTES: usize = 4 * 1024 * 1024;
const MAX_TOKEN_BUFFER_BYTES: usize = 4 * MEGA;
const MAX_TOKEN_BUFFER_ELEMS: usize = MAX_TOKEN_BUFFER_BYTES / std::mem::size_of::<Range<usize>>();

self_cell!(
Expand Down Expand Up @@ -374,7 +375,7 @@ fn read_to_buffer<T: Read>(

// We need to read more lines
let len = buffer.len();
let grow_by = (len / 2).max(1024 * 1024);
let grow_by = (len / 2).max(MEGA);
buffer.resize(len + grow_by, 0);
read_target = &mut buffer[len..];
} else {
Expand Down
5 changes: 3 additions & 2 deletions src/uu/sort/src/ext_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{
compare_by, merge, sort_by,
};
use crate::{Line, print_sorted};
use uucore::parser::parse_size::MEGA;

// Note: update `test_sort::test_start_buffer` if this size is changed
const START_BUFFER_SIZE: usize = 8_000;
Expand Down Expand Up @@ -116,11 +117,11 @@ fn reader_writer<
// Cap oversized buffer requests to avoid unnecessary allocations and give the automatic
// heuristic room to grow when the user does not provide an explicit value.
let mut buffer_size = match settings.buffer_size {
size if size <= 512 * 1024 * 1024 => size,
size if size <= 512 * MEGA => size,
size => size / 2,
};
if !settings.buffer_size_is_explicit {
buffer_size = buffer_size.max(8 * 1024 * 1024);
buffer_size = buffer_size.max(8 * MEGA);
}
let read_result: ReadResult<Tmp> = read_write_loop(
files,
Expand Down
39 changes: 21 additions & 18 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ use uucore::i18n::collator::locale_cmp;
use uucore::i18n::decimal::locale_decimal_separator;
use uucore::line_ending::LineEnding;
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
#[cfg(test)]
use uucore::parser::parse_size::{EXA, TERA};
use uucore::parser::parse_size::{GIGA, KILO, MEGA};
use uucore::parser::parse_size::{ParseSizeError, Parser};
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
use uucore::posix::{MODERN, TRADITIONAL};
Expand Down Expand Up @@ -125,9 +128,9 @@ const POSITIVE: &u8 = &b'+';
// The automatic buffer heuristics clamp to this range to avoid
// over-committing memory on constrained systems while still keeping
// reasonably large chunks for typical workloads.
const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * 1024; // 512 KiB
const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * 1024 * 1024; // 32 MiB
const MAX_AUTOMATIC_BUF_SIZE: usize = 1024 * 1024 * 1024; // 1 GiB
const MIN_AUTOMATIC_BUF_SIZE: usize = 512 * KILO; // 512 KiB
const FALLBACK_AUTOMATIC_BUF_SIZE: usize = 32 * MEGA; // 32 MiB
const MAX_AUTOMATIC_BUF_SIZE: usize = GIGA; // 1 GiB

#[derive(Debug, Error)]
pub enum SortError {
Expand Down Expand Up @@ -1800,7 +1803,7 @@ fn emit_debug_warnings(
show_error!("{}", translate!("sort-warning-simple-byte-comparison"));

for (idx, selector) in settings.selectors.iter().enumerate() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (idx, selector) in settings.selectors.iter().enumerate() {
for (key_index, selector) in (1..).zip(settings.selectors.iter()) {

let key_index = idx + 1;
let key_index = idx.saturating_add(1);
Copy link
Contributor

@xtqqczze xtqqczze Feb 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enumerate() itself doesn’t guard against overflow, so guarding only at idx + 1 doesn’t add real safety.

In any case, I think idx.checked_add(1).unwrap() would make more sense.

Copy link
Contributor

@xtqqczze xtqqczze Feb 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hang on, there is no potential overflow, key_index has the range 1..settings.selectors.len()

They all optimise to the same anyway: https://godbolt.org/z/f4WzhP1vf

if let Some(legacy) = legacy_warnings
.iter()
.find(|warning| warning.key_index == Some(key_index))
Expand Down Expand Up @@ -3184,24 +3187,24 @@ mod tests {
fn test_parse_byte_count() {
let valid_input = [
("0", 0),
("50K", 50 * 1024),
("50k", 50 * 1024),
("1M", 1024 * 1024),
("100M", 100 * 1024 * 1024),
("50K", 50 * KILO),
("50k", 50 * KILO),
("1M", MEGA),
("100M", 100 * MEGA),
#[cfg(not(target_pointer_width = "32"))]
("1000G", 1000 * 1024 * 1024 * 1024),
("1000G", 1000 * GIGA),
#[cfg(not(target_pointer_width = "32"))]
("10T", 10 * 1024 * 1024 * 1024 * 1024),
("10T", 10 * TERA),
("1b", 1),
("1024b", 1024),
("1024Mb", 1024 * 1024 * 1024), // NOTE: This might not be how GNU `sort` behaves for 'Mb'
("1", 1024), // K is default
("50", 50 * 1024),
("K", 1024),
("k", 1024),
("m", 1024 * 1024),
("1024b", KILO),
("1024Mb", KILO * MEGA), // NOTE: This might not be how GNU `sort` behaves for 'Mb'
("1", KILO), // K is default
("50", 50 * KILO),
("K", KILO),
("k", KILO),
("m", MEGA),
#[cfg(not(target_pointer_width = "32"))]
("E", 1024 * 1024 * 1024 * 1024 * 1024 * 1024),
("E", EXA),
];
for (input, expected_output) in &valid_input {
assert_eq!(
Expand Down
8 changes: 8 additions & 0 deletions src/uucore/src/lib/features/parser/parse_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

//! Parser for sizes in SI or IEC units (multiples of 1000 or 1024 bytes).

// SI unit constants for byte parsing (powers of 1024)
pub const KILO: usize = 1024;
pub const MEGA: usize = 1024 * 1024;
pub const GIGA: usize = 1024 * 1024 * 1024;
pub const TERA: usize = 1024 * 1024 * 1024 * 1024;
pub const PETA: usize = 1024 * 1024 * 1024 * 1024 * 1024;
pub const EXA: usize = 1024 * 1024 * 1024 * 1024 * 1024 * 1024;

use std::error::Error;
use std::fmt;
use std::num::{IntErrorKind, ParseIntError};
Expand Down
Loading