Skip to content

Commit 003e416

Browse files
committed
feat: Improve sample generation realism and configurability
- Replace fixed 1000.5 amounts with realistic varying amounts (1250.00, 850.50, etc.) - Add min/max amount range support via generate_decimal_with_range() - Implement weighted currency distribution based on real-world usage (USD 30%, EUR 15%, etc.) - Expand BIC codes with major international banks (US, European, Asian, Canadian, Australian) - Enhance name/address generation with realistic business names and addresses - Add configuration support for fixed values, length preferences, and amount ranges - Reduce special characters in generated strings for more realistic output - Maintain backward compatibility with existing generation functions
1 parent 8db9a66 commit 003e416

File tree

1 file changed

+197
-39
lines changed

1 file changed

+197
-39
lines changed

swift-mt-message/src/sample.rs

Lines changed: 197 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,11 @@ pub fn generate_alphanumeric(length: usize) -> String {
9898
.collect()
9999
}
100100

101-
/// Generate string with any SWIFT-allowed character
101+
/// Generate string with any SWIFT-allowed character (reduced special chars for realism)
102102
pub fn generate_any_character(length: usize) -> String {
103103
let mut rng = rand::thread_rng();
104-
let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/-?:().,'+"
104+
// Reduce special characters to make output look more realistic
105+
let chars: Vec<char> = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 /-.,"
105106
.chars()
106107
.collect();
107108
(0..length)
@@ -111,35 +112,112 @@ pub fn generate_any_character(length: usize) -> String {
111112

112113
/// Generate decimal number with specified total length and decimal places
113114
pub fn generate_decimal(length: usize, decimals: usize) -> String {
115+
generate_decimal_with_range(length, decimals, None, None)
116+
}
117+
118+
/// Generate decimal number with optional min/max range
119+
pub fn generate_decimal_with_range(
120+
length: usize,
121+
decimals: usize,
122+
min: Option<f64>,
123+
max: Option<f64>,
124+
) -> String {
114125
if decimals >= length {
115126
return "0,00".to_string();
116127
}
117128

129+
let mut rng = rand::thread_rng();
130+
131+
// If min/max are provided, generate within that range
132+
if let (Some(min_val), Some(max_val)) = (min, max) {
133+
let amount = rng.gen_range(min_val..=max_val);
134+
let formatted = format!("{amount:.2}").replace('.', ",");
135+
if formatted.len() <= length {
136+
return formatted;
137+
}
138+
}
139+
140+
// Generate realistic amounts instead of completely random
141+
let realistic_amounts = [
142+
"1250,00", "850,50", "2000,75", "500,25", "10000,00", "750,80", "3500,45", "125,60",
143+
"25000,00", "1875,90", "650,15", "4200,35",
144+
];
145+
146+
// For shorter lengths, use predefined realistic amounts
147+
if length <= 10 {
148+
let amount = realistic_amounts[rng.gen_range(0..realistic_amounts.len())];
149+
if amount.len() <= length {
150+
return amount.to_string();
151+
}
152+
}
153+
154+
// For longer amounts, generate but with realistic patterns
118155
let integer_part_len = length - decimals - 1; // -1 for comma
119-
let integer_part = generate_numeric(integer_part_len);
120-
let decimal_part = generate_numeric(decimals);
121156

157+
// Generate amounts that look realistic (not starting with 0, reasonable values)
158+
let mut integer_part = String::new();
159+
if integer_part_len > 0 {
160+
// First digit should not be 0 for realistic amounts
161+
integer_part.push_str(&rng.gen_range(1..10).to_string());
162+
163+
// Fill remaining digits
164+
for _ in 1..integer_part_len {
165+
integer_part.push_str(&rng.gen_range(0..10).to_string());
166+
}
167+
}
168+
169+
let decimal_part = generate_numeric(decimals);
122170
format!("{integer_part},{decimal_part}")
123171
}
124172

125173
/// Generate a valid BIC code
126174
pub fn generate_valid_bic() -> String {
127175
let mut rng = rand::thread_rng();
128176
let bics = [
129-
"ABNANL2A", "DEUTDEFF", "CHASUS33", "BOFAUS3N", "CITIUS33", "HSBCGB2L", "BNPAFRPP",
130-
"UBSWCHZH", "SCBLSGSG", "DBSSSGSG",
177+
// Major US banks (all 8 chars)
178+
"CHASUS33", "BOFAUS3N", "CITIUS33", "WFBIUS6W", "USBKUS44", "PNCCUS33",
179+
// Major European banks (all 8 chars)
180+
"DEUTDEFF", "HSBCGB2L", "BNPAFRPP", "UBSWCHZH", "ABNANL2A", "INGBNL2A", "CRESCHZZ",
181+
"BARCGB22", "LOYDGB2L", "NWBKGB2L", "RBOSGB2L",
182+
// Major Asian banks (all 8 chars)
183+
"SCBLSGSG", "DBSSSGSG", "OCBCSGSG", "HSBCHKHH", "CITIHKAX", "BOTKJPJT", "SMFGJPJT",
184+
"MHCBJPJT", // Major Canadian/Australian banks (all 8 chars)
185+
"ROYCCAT2", "BOFACATT", "ANZBAU3M", "CTBAAU2S",
186+
// Major international banks (all 8 chars)
187+
"ICICINBB", "HDFCINBB", "SBININBB", "BBVASPBX",
131188
];
132189
bics[rng.gen_range(0..bics.len())].to_string()
133190
}
134191

135-
/// Generate a valid currency code
192+
/// Generate a valid currency code with realistic distribution
136193
pub fn generate_valid_currency() -> String {
137194
let mut rng = rand::thread_rng();
138-
let currencies = vec![
139-
"USD", "EUR", "GBP", "JPY", "CHF", "CAD", "AUD", "NZD", "SEK", "NOK", "DKK", "SGD", "HKD",
140-
"CNY", "INR", "KRW", "MXN", "BRL", "ZAR", "AED",
141-
];
142-
currencies[rng.gen_range(0..currencies.len())].to_string()
195+
196+
// Weight currencies by real-world usage in international payments
197+
let weighted_selection = rng.gen_range(1..=100);
198+
199+
match weighted_selection {
200+
1..=30 => "USD".to_string(), // 30% - Most common
201+
31..=45 => "EUR".to_string(), // 15% - Second most common
202+
46..=55 => "GBP".to_string(), // 10% - Third most common
203+
56..=60 => "JPY".to_string(), // 5%
204+
61..=64 => "CHF".to_string(), // 4%
205+
65..=67 => "CAD".to_string(), // 3%
206+
68..=70 => "AUD".to_string(), // 3%
207+
71..=73 => "SGD".to_string(), // 3%
208+
74..=76 => "HKD".to_string(), // 3%
209+
77..=79 => "CNY".to_string(), // 3%
210+
80..=82 => "SEK".to_string(), // 3%
211+
83..=85 => "NOK".to_string(), // 3%
212+
86..=87 => "DKK".to_string(), // 2%
213+
88..=89 => "NZD".to_string(), // 2%
214+
90..=91 => "INR".to_string(), // 2%
215+
92..=93 => "KRW".to_string(), // 2%
216+
94..=95 => "BRL".to_string(), // 2%
217+
96..=97 => "ZAR".to_string(), // 2%
218+
98..=99 => "AED".to_string(), // 2%
219+
_ => "MXN".to_string(), // 1%
220+
}
143221
}
144222

145223
/// Generate a valid country code
@@ -188,6 +266,19 @@ pub fn generate_time_hhmm() -> String {
188266

189267
/// Generate a value based on SWIFT format specification
190268
pub fn generate_by_format_spec(format: &str) -> String {
269+
generate_by_format_spec_with_config(format, &FieldConfig::default())
270+
}
271+
272+
/// Generate a value based on SWIFT format specification with configuration
273+
pub fn generate_by_format_spec_with_config(format: &str, config: &FieldConfig) -> String {
274+
// Check if fixed values are provided
275+
if let Some(fixed_values) = &config.fixed_values {
276+
if !fixed_values.is_empty() {
277+
let mut rng = rand::thread_rng();
278+
return fixed_values[rng.gen_range(0..fixed_values.len())].clone();
279+
}
280+
}
281+
191282
// Parse format like "3!a", "6!n", "16x", "15d"
192283
let mut chars = format.chars().peekable();
193284
let mut length_str = String::new();
@@ -216,11 +307,36 @@ pub fn generate_by_format_spec(format: &str) -> String {
216307
}
217308

218309
let max_length: usize = length_str.parse().unwrap_or(1);
219-
let length = if is_exact {
220-
max_length
221-
} else {
222-
let mut rng = rand::thread_rng();
223-
rng.gen_range(1..=max_length)
310+
311+
// Apply length preference from config
312+
let length = match &config.length_preference {
313+
Some(LengthPreference::Exact(len)) => *len.min(&max_length),
314+
Some(LengthPreference::Range(min, max)) => {
315+
let mut rng = rand::thread_rng();
316+
let actual_min = *min.min(&max_length);
317+
let actual_max = (*max).min(max_length);
318+
if actual_min <= actual_max {
319+
rng.gen_range(actual_min..=actual_max)
320+
} else {
321+
max_length
322+
}
323+
}
324+
Some(LengthPreference::Short) => {
325+
let mut rng = rand::thread_rng();
326+
rng.gen_range(1..=(max_length / 2).max(1))
327+
}
328+
Some(LengthPreference::Long) => {
329+
let mut rng = rand::thread_rng();
330+
rng.gen_range((max_length / 2).max(1)..=max_length)
331+
}
332+
None => {
333+
if is_exact {
334+
max_length
335+
} else {
336+
let mut rng = rand::thread_rng();
337+
rng.gen_range(1..=max_length)
338+
}
339+
}
224340
};
225341

226342
match char_type {
@@ -230,7 +346,12 @@ pub fn generate_by_format_spec(format: &str) -> String {
230346
'd' => {
231347
// For decimal format, assume 2 decimal places if not specified
232348
let decimals = 2;
233-
generate_decimal(length, decimals)
349+
// Check for amount range configuration
350+
if let Some(ValueRange::Amount { min, max, .. }) = &config.value_range {
351+
generate_decimal_with_range(length, decimals, Some(*min), Some(*max))
352+
} else {
353+
generate_decimal(length, decimals)
354+
}
234355
}
235356
_ => generate_any_character(length),
236357
}
@@ -280,47 +401,84 @@ pub fn generate_instruction_code() -> String {
280401
pub fn generate_name_and_address(lines: usize) -> Vec<String> {
281402
let mut rng = rand::thread_rng();
282403
let names = [
283-
"ACME CORPORATION",
284-
"GLOBAL TRADING LTD",
285-
"INTERNATIONAL FINANCE INC",
286-
"SWIFT PAYMENTS CORP",
287-
"DIGITAL SOLUTIONS AG",
404+
"GLOBAL TRADE SOLUTIONS LTD",
405+
"INTERNATIONAL EXPORT CORP",
406+
"PRIME FINANCIAL SERVICES",
407+
"METROPOLITAN TRADING CO",
408+
"CONSOLIDATED INDUSTRIES INC",
409+
"PACIFIC RIM ENTERPRISES",
410+
"EUROPEAN COMMERCE GROUP",
411+
"ATLANTIC BUSINESS PARTNERS",
412+
"CONTINENTAL HOLDINGS LLC",
413+
"WORLDWIDE LOGISTICS CORP",
414+
"STERLING INVESTMENT GROUP",
415+
"MERIDIAN COMMERCIAL LTD",
416+
"APEX TRADING COMPANY",
417+
"NEXUS FINANCIAL CORP",
418+
"HORIZON BUSINESS SOLUTIONS",
288419
];
289420

290421
let streets = [
291-
"123 MAIN STREET",
292-
"456 PARK AVENUE",
293-
"789 BROADWAY",
294-
"321 WALL STREET",
295-
"654 FIFTH AVENUE",
422+
"125 CORPORATE PLAZA",
423+
"450 BUSINESS PARK DRIVE",
424+
"789 FINANCIAL DISTRICT",
425+
"1200 COMMERCE STREET",
426+
"650 EXECUTIVE BOULEVARD",
427+
"300 TRADE CENTER WAY",
428+
"850 INTERNATIONAL AVENUE",
429+
"1500 ENTERPRISE PARKWAY",
430+
"275 INVESTMENT PLAZA",
431+
"920 BANKING SQUARE",
432+
"1750 CORPORATE CENTER",
433+
"425 PROFESSIONAL DRIVE",
434+
"680 MARKET STREET",
435+
"1100 INDUSTRIAL WAY",
436+
"550 COMMERCIAL BOULEVARD",
296437
];
297438

298439
let cities = [
299-
"NEW YORK NY 10001",
300-
"LONDON EC1A 1BB",
440+
"NEW YORK NY 10005",
441+
"LONDON EC2V 8RF",
301442
"ZURICH 8001",
302-
"SINGAPORE 018956",
303-
"TOKYO 100-0001",
443+
"SINGAPORE 048624",
444+
"TOKYO 100-6590",
445+
"FRANKFURT AM MAIN 60311",
446+
"PARIS 75001",
447+
"MILAN 20121",
448+
"GENEVA 1204",
449+
"DUBLIN 2",
450+
"AMSTERDAM 1017 XX",
451+
"BRUSSELS 1000",
452+
"MADRID 28001",
453+
"BARCELONA 08002",
454+
"VIENNA 1010",
304455
];
305456

306457
let mut result = vec![];
307458

308459
if lines > 0 {
309460
result.push(names[rng.gen_range(0..names.len())].to_string());
310461
}
311-
if lines > 1 && rng.gen_bool(0.7) {
462+
if lines > 1 {
312463
result.push(streets[rng.gen_range(0..streets.len())].to_string());
313464
}
314-
if lines > result.len() && rng.gen_bool(0.8) {
465+
if lines > 2 {
315466
result.push(cities[rng.gen_range(0..cities.len())].to_string());
316467
}
317-
if lines > result.len() {
468+
if lines > 3 {
318469
result.push(generate_valid_country_code());
319470
}
320471

321-
// Fill remaining lines if needed
472+
// Fill remaining lines with additional address details if needed
322473
while result.len() < lines {
323-
result.push(generate_any_character(rng.gen_range(10..30)));
474+
let additional_info = [
475+
"CORPORATE HEADQUARTERS",
476+
"MAIN OFFICE",
477+
"TREASURY DEPARTMENT",
478+
"INTERNATIONAL DIVISION",
479+
"FINANCIAL SERVICES",
480+
];
481+
result.push(additional_info[rng.gen_range(0..additional_info.len())].to_string());
324482
}
325483

326484
result
@@ -423,8 +581,8 @@ mod tests {
423581
let result = generate_any_character(20);
424582
assert_eq!(result.len(), 20);
425583

426-
// All characters should be SWIFT-allowed
427-
let allowed_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/-?:().,'+";
584+
// All characters should be SWIFT-allowed (reduced set for realism)
585+
let allowed_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 /-.,";
428586
assert!(result.chars().all(|c| allowed_chars.contains(c)));
429587
}
430588

0 commit comments

Comments
 (0)