Skip to content

Commit 18add2c

Browse files
fix: replace shell=True with secure subprocess in TranscodePerturbation
Security: Convert amr-nb, ogg, and g711 codecs in TranscodePerturbation from shell=True to secure subprocess patterns using list arguments. Changes: - Replace subprocess.check_output with shell=True to subprocess.Popen and subprocess.run with explicit argument lists for amr-nb and ogg codecs - Replace subprocess.check_output with shell=True to subprocess.run with list arguments for g711 codec - Use subprocess.DEVNULL for stderr to suppress unnecessary output - Properly close stdout pipes and wait for encoder process completion to prevent potential resource leaks - Remove unused 'Any' import from typing module This follows the precedent set in PR #15165 which fixed similar security vulnerabilities in data_utils.py. Signed-off-by: Rudra-Tiwari-codes <rudratiwari2006@gmail.com> Signed-off-by: Rudra Tiwari <tiwarirudra2006@gmail.com>
1 parent 5d46d4a commit 18add2c

File tree

1 file changed

+46
-12
lines changed
  • nemo/collections/asr/parts/preprocessing

1 file changed

+46
-12
lines changed

nemo/collections/asr/parts/preprocessing/perturb.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import random
4040
import subprocess
4141
from tempfile import NamedTemporaryFile
42-
from typing import Any, List, Optional, Union
42+
from typing import List, Optional, Union
4343

4444
import librosa
4545
import numpy as np
@@ -1032,23 +1032,57 @@ def perturb(self, data):
10321032
transcoded_f = NamedTemporaryFile(suffix="_amr.wav")
10331033
rates = list(range(0, 4))
10341034
rate = rates[random.randint(0, len(rates) - 1)]
1035-
_ = subprocess.check_output(
1036-
f"sox {orig_f.name} -V0 -C {rate} -t amr-nb - | sox -t amr-nb - -V0 -b 16 -r 16000 {transcoded_f.name}",
1037-
shell=True,
1038-
)
1035+
with subprocess.Popen(
1036+
["sox", orig_f.name, "-V0", "-C", str(rate), "-t", "amr-nb", "-"],
1037+
stdout=subprocess.PIPE,
1038+
stderr=subprocess.DEVNULL,
1039+
) as sox_encode:
1040+
subprocess.run(
1041+
["sox", "-t", "amr-nb", "-", "-V0", "-b", "16", "-r", "16000", transcoded_f.name],
1042+
stdin=sox_encode.stdout,
1043+
stderr=subprocess.DEVNULL,
1044+
check=True,
1045+
)
1046+
sox_encode.stdout.close()
1047+
sox_encode.wait()
10391048
elif self._codecs[codec_ind] == "ogg":
10401049
transcoded_f = NamedTemporaryFile(suffix="_ogg.wav")
10411050
rates = list(range(-1, 8))
10421051
rate = rates[random.randint(0, len(rates) - 1)]
1043-
_ = subprocess.check_output(
1044-
f"sox {orig_f.name} -V0 -C {rate} -t ogg - | sox -t ogg - -V0 -b 16 -r 16000 {transcoded_f.name}",
1045-
shell=True,
1046-
)
1052+
with subprocess.Popen(
1053+
["sox", orig_f.name, "-V0", "-C", str(rate), "-t", "ogg", "-"],
1054+
stdout=subprocess.PIPE,
1055+
stderr=subprocess.DEVNULL,
1056+
) as sox_encode:
1057+
subprocess.run(
1058+
["sox", "-t", "ogg", "-", "-V0", "-b", "16", "-r", "16000", transcoded_f.name],
1059+
stdin=sox_encode.stdout,
1060+
stderr=subprocess.DEVNULL,
1061+
check=True,
1062+
)
1063+
sox_encode.stdout.close()
1064+
sox_encode.wait()
10471065
elif self._codecs[codec_ind] == "g711":
10481066
transcoded_f = NamedTemporaryFile(suffix="_g711.wav")
1049-
_ = subprocess.check_output(
1050-
f"sox {orig_f.name} -V0 -r 8000 -c 1 -e a-law {transcoded_f.name} lowpass 3400 highpass 300",
1051-
shell=True,
1067+
subprocess.run(
1068+
[
1069+
"sox",
1070+
orig_f.name,
1071+
"-V0",
1072+
"-r",
1073+
"8000",
1074+
"-c",
1075+
"1",
1076+
"-e",
1077+
"a-law",
1078+
transcoded_f.name,
1079+
"lowpass",
1080+
"3400",
1081+
"highpass",
1082+
"300",
1083+
],
1084+
stderr=subprocess.DEVNULL,
1085+
check=True,
10521086
)
10531087

10541088
new_data = AudioSegment.from_file(transcoded_f.name, target_sr=16000)

0 commit comments

Comments
 (0)