Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion montreal_forced_aligner/dictionary/multispeaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,7 @@ def save_oovs_found(self, directory: str) -> None:
Parameters
----------
directory : str
Path to directory to save ``oovs_found.txt``
Path to directory to save ``oovs_found_*.txt`` files.
"""
with self.session() as session:
for dict_id, base_name in self.dictionary_base_names.items():
Expand Down
12 changes: 10 additions & 2 deletions montreal_forced_aligner/validation/corpus_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,15 @@ def analyze_oovs(self, output_directory: Path = None) -> None:
if output_directory is None:
output_directory = self.output_directory
os.makedirs(output_directory, exist_ok=True)
oov_path = os.path.join(output_directory, "oovs_found.txt")
# Point users to the actual OOV files written by save_oovs_found()
if hasattr(self, "dictionary_base_names"):
if len(self.dictionary_base_names) == 1:
only_base = next(iter(self.dictionary_base_names.values()))
oov_path = os.path.join(output_directory, f"oovs_found_{only_base}.txt")
else:
oov_path = os.path.join(output_directory, "oovs_found_*.txt")
else:
oov_path = os.path.join(output_directory, "oovs_found.txt")
utterance_oov_path = os.path.join(output_directory, "utterance_oovs.txt")

total_instances = 0
Expand Down Expand Up @@ -169,7 +177,7 @@ def analyze_oovs(self, output_directory: Path = None) -> None:
if self.oovs_found:
self.save_oovs_found(output_directory)
logger.warning(f"{len(self.oovs_found)} OOV word types")
logger.warning(f"{total_instances}total OOV tokens")
logger.warning(f"{total_instances} total OOV tokens")
logger.warning(
f"For a full list of the word types, please see: {oov_path}. "
f"For a by-utterance breakdown of missing words, see: {utterance_oov_path}"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_commandline_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def test_validate_corpus(
print(result.exc_info)
raise result.exception
assert not result.return_value
# Regression test for oovs_found_*.txt path
assert "oovs_found_english_us" in result.stderr


def test_validate_training_corpus(
Expand Down
Loading