Skip to content

Commit

Permalink
3.0.3 (#783)
Browse files Browse the repository at this point in the history
* Bug fixes
  • Loading branch information
mmcauliffe authored Mar 22, 2024
1 parent 587ce0f commit 91fd82d
Show file tree
Hide file tree
Showing 15 changed files with 48 additions and 49 deletions.
10 changes: 10 additions & 0 deletions docs/source/changelog/changelog_3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
3.0 Changelog
*************

3.0.3
=====

- Fixed regression for not merging clitic words when textgrid cleanup is disabled
- Fixed issue with copying files when symlinks are not possible on windows
- Fixed an issue with using G2P models during training/alignment
- Changed default feature config to set :code:`use_energy=True` and :code:`dithering=0.0001`
- Updated tokenization when lower casing to remove extra dot for capital :code:`i` in Turkish
- Fix an issue where special disambiguation symbols were not always in the phone table

3.0.2
=====

Expand Down
7 changes: 6 additions & 1 deletion docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ General installation
2. :code:`conda install -c conda-forge mamba`
3. :code:`mamba create -n aligner -c conda-forge montreal-forced-aligner`

Updating Montreal Forced Aligner
--------------------------------

To install the latest version, please run either :code:`conda update -c conda-forge montreal-forced-aligner --update-deps` or :code:`mamba update -c conda-forge montreal-forced-aligner --update-deps` if you have mamba installed.

Installing SpeechBrain
----------------------

Expand All @@ -52,7 +57,7 @@ If you need to use an older version of MFA, you can install it via:
More stable key versions:

* Stable 3.0 release: :code:`conda install -c conda-forge montreal-forced-aligner=3.0.2`
* Stable 3.0 release: :code:`conda update -c conda-forge montreal-forced-aligner`
* Stable 2.2 release: :code:`conda install -c conda-forge montreal-forced-aligner=2.2.17 openfst=1.8.2 kaldi=5.5.1068`
* Stable 2.1 release: :code:`conda install -c conda-forge montreal-forced-aligner=2.1.7 openfst=1.8.2 kaldi=5.5.1068`
* Stable 2.0 release: :code:`conda install -c conda-forge montreal-forced-aligner=2.0.6 openfst=1.8.2 kaldi=5.5.1068`
Expand Down
6 changes: 3 additions & 3 deletions montreal_forced_aligner/alignment/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,18 +621,18 @@ def _run(self) -> None:
).symlink_to(likes_path)
except OSError:
shutil.copyfile(
job.construct_path(self.working_directory, "ali", "ark", dict_id),
ali_path,
job.construct_path(self.working_directory, "ali", "ark", dict_id),
)
shutil.copyfile(
job.construct_path(self.working_directory, "words", "ark", dict_id),
words_path,
job.construct_path(self.working_directory, "words", "ark", dict_id),
)
shutil.copyfile(
likes_path,
job.construct_path(
self.working_directory, "likelihoods", "ark", dict_id
),
likes_path,
)


Expand Down
1 change: 0 additions & 1 deletion montreal_forced_aligner/corpus/acoustic_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,6 @@ def load_corpus(self) -> None:
all_begin = time.time()
self.initialize_database()
if self.dictionary_model is not None and not self.imported:
logger.debug(f"Using {self.phone_set_type}")
self.dictionary_setup()
logger.debug(f"Loaded dictionary in {time.time() - all_begin:.3f} seconds")

Expand Down
6 changes: 4 additions & 2 deletions montreal_forced_aligner/corpus/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ def normalize_text(self) -> None:
oovs.add(w)
if self.language is Language.unknown:
to_g2p.add((w, dict_id))
word_to_g2p_mapping[dict_id][w].add(w)
else:
to_g2p.add((pronunciation_text[i], dict_id))
word_to_g2p_mapping[dict_id][w].add(pronunciation_text[i])
Expand Down Expand Up @@ -793,8 +794,8 @@ def normalize_text(self) -> None:
if g2p_model is not None:
from montreal_forced_aligner.g2p.generator import PyniniGenerator

g2pped = {}
if isinstance(g2p_model, dict):
g2pped = {}
for dict_name, g2p_model in g2p_model.items():
dict_id = dict_name_to_id[dict_name]
gen = PyniniGenerator(
Expand All @@ -811,7 +812,8 @@ def normalize_text(self) -> None:
num_pronunciations=1,
strict_graphemes=True,
)
g2pped = gen.generate_pronunciations()
dict_id = list(dictionaries.keys())[0]
g2pped[dict_id] = gen.generate_pronunciations()
for dict_id, mapping in word_to_g2p_mapping.items():
log_file.write(f"For dictionary {dict_id}:\n")
for w, ps in mapping.items():
Expand Down
6 changes: 3 additions & 3 deletions montreal_forced_aligner/corpus/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ class FeatureConfigMixin:
def __init__(
self,
feature_type: str = "mfcc",
use_energy: bool = False,
use_energy: bool = True,
frame_shift: int = 10,
frame_length: int = 25,
snip_edges: bool = False,
Expand All @@ -602,7 +602,7 @@ def __init__(
sample_frequency: int = 16000,
allow_downsample: bool = True,
allow_upsample: bool = True,
dither: float = 0.0,
dither: float = 0.0001,
energy_floor: float = 1.0,
num_coefficients: int = 13,
num_mel_bins: int = 23,
Expand Down Expand Up @@ -800,7 +800,7 @@ def mfcc_options(self) -> MetaDict:
}
options.update(
{
"dither": 0.0,
"dither": 0.0001,
"energy_floor": 1.0,
}
)
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/corpus/ivector_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def collect_speaker_ivectors(self) -> None:
speaker_ids.append(speaker_id)
num_utts.append(utts)
ivector_normalize_length(ivector)
ivectors.append(FloatVector(ivector))
ivectors.append(DoubleVector(ivector))
ivector_subtract_mean(ivectors)
update_mapping = []
for i in range(len(speaker_ids)):
Expand Down
3 changes: 3 additions & 0 deletions montreal_forced_aligner/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,9 @@ def phone_table(self):
if not hasattr(self, "_phone_table"):
if self.phone_symbol_table_path.exists():
self._phone_table = pywrapfst.SymbolTable.read_text(self.phone_symbol_table_path)
for k in ["#0", "#1", "#2"]:
if not self._phone_table.member(k):
self._phone_table.add_symbol(k)
else:
self.phones_directory.mkdir(parents=True, exist_ok=True)
session = sqlalchemy.orm.Session.object_session(self)
Expand Down
34 changes: 9 additions & 25 deletions montreal_forced_aligner/diarization/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@

import numpy as np
import sqlalchemy
from _kalpy.ivector import Plda, ivector_normalize_length, ivector_subtract_mean
from _kalpy.matrix import FloatVector
from _kalpy.ivector import Plda
from kalpy.data import Segment
from kalpy.ivector.data import IvectorArchive
from kalpy.ivector.plda import PldaScorer
from scipy.spatial import distance
from sklearn import cluster, manifold, metrics, neighbors, preprocessing
from sqlalchemy.orm import joinedload
Expand Down Expand Up @@ -83,7 +82,7 @@
class PldaClassificationArguments(MfaArguments):
"""Arguments for :class:`~montreal_forced_aligner.diarization.multiprocessing.PldaClassificationFunction`"""

plda: Plda
plda_path: Path
train_ivector_path: Path
num_utts_path: Path
use_xvector: bool
Expand Down Expand Up @@ -129,7 +128,7 @@ def visualize_clusters(
tsne_iterations = 500
mds_iterations = 150
if metric_type is DistanceMetric.plda:
metric = plda.log_likelihood_distance
metric = plda.log_likelihood_distance_vectorized
if manifold_algorithm is ManifoldAlgorithm.mds:
if metric_type is DistanceMetric.cosine:
to_fit = preprocessing.normalize(ivectors, norm="l2")
Expand Down Expand Up @@ -291,7 +290,7 @@ def cluster_matrix(
to_fit = ivectors
score_metric_params = None
if score_metric == "plda" and cluster_type is not ClusterType.affinity:
score_metric = plda.log_likelihood_distance
score_metric = plda.log_likelihood_distance_vectorized
if cluster_type is ClusterType.affinity:
affinity = metric
if metric is DistanceMetric.cosine:
Expand Down Expand Up @@ -486,27 +485,15 @@ class PldaClassificationFunction(KaldiFunction):

def __init__(self, args: PldaClassificationArguments):
super().__init__(args)
self.plda = args.plda
self.plda_path = args.plda_path
self.train_ivector_path = args.train_ivector_path
self.num_utts_path = args.num_utts_path
self.use_xvector = args.use_xvector

def _run(self):
"""Run the function"""

ivector_archive = IvectorArchive(
self.train_ivector_path, num_utterances_file_name=self.num_utts_path
)
speaker_ivectors = []
speaker_ids = []
num_utts = []
for speaker_id, ivector, utts in ivector_archive:
speaker_ids.append(speaker_id)
num_utts.append(utts)
ivector_normalize_length(ivector)
speaker_ivectors.append(FloatVector(ivector))
ivector_subtract_mean(speaker_ivectors)
speaker_ivectors = self.plda.transform_ivectors(speaker_ivectors, num_utts)
plda_scorer = PldaScorer(self.plda_path)
plda_scorer.load_speaker_ivectors(self.train_ivector_path, self.num_utts_path)
with self.session() as session:
job: Job = (
session.query(Job)
Expand All @@ -521,10 +508,7 @@ def _run(self):
.order_by(Utterance.kaldi_id)
)
for u_id, u_ivector in utterances:
ivector = FloatVector()
ivector.from_numpy(u_ivector)
ind, score = self.plda.classify_utterance(ivector, speaker_ivectors, num_utts)
speaker = speaker_ids[ind]
speaker, score = plda_scorer.classify_speaker(u_ivector)
self.callback((u_id, speaker, score))


Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/diarization/speaker_diarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def plda_classification_arguments(self) -> List[PldaClassificationArguments]:
j.id,
getattr(self, "session" if config.USE_THREADING else "db_string", ""),
self.working_log_directory.joinpath(f"plda_classification.{j.id}.log"),
self.plda,
self.plda_path,
self.speaker_ivector_path,
self.num_utts_path,
self.use_xvector,
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/dictionary/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
if TYPE_CHECKING:
from montreal_forced_aligner.abc import MetaDict

DEFAULT_PUNCTUATION = list(r'、。।,?!!@<>→"”()“„–,.:;—¿?¡:)!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‹›«»~′$+=‘۔')
DEFAULT_PUNCTUATION = list(r'、。।,?!!@<>→"”()“„–,.:;—¿?¡:)!\\&%#*~【】,…‥「」『』〝〟″⟨⟩♪・‚‘‹›«»~′$+=‘۔')

DEFAULT_WORD_BREAK_MARKERS = list(r'?!!(),,.:;¡¿?“„"”&~%#—…‥、。【】$+=〝〟″‹›«»・⟨⟩「」『』')

Expand Down
4 changes: 2 additions & 2 deletions montreal_forced_aligner/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def mfcc_options(self) -> MetaDict:
"sample_frequency": self._meta["features"].get("sample_frequency", 16000),
"frame_shift": self._meta["features"].get("frame_shift", 10),
"frame_length": self._meta["features"].get("frame_length", 25),
"dither": self._meta["features"].get("dither", 0.0),
"dither": self._meta["features"].get("dither", 0.0001),
"preemphasis_coefficient": self._meta["features"].get("preemphasis_coefficient", 0.97),
"snip_edges": self._meta["features"].get("snip_edges", True),
"num_mel_bins": self._meta["features"].get("num_mel_bins", 23),
Expand Down Expand Up @@ -882,7 +882,7 @@ def mfcc_options(self) -> MetaDict:
"""Parameters to use in computing MFCC features."""
return {
"use_energy": self._meta["features"].get("use_energy", False),
"dither": self._meta["features"].get("dither", 0.0),
"dither": self._meta["features"].get("dither", 0.0001),
"energy_floor": self._meta["features"].get("energy_floor", 1.0),
"num_coefficients": self._meta["features"].get("num_coefficients", 13),
"num_mel_bins": self._meta["features"].get("num_mel_bins", 23),
Expand Down
3 changes: 2 additions & 1 deletion montreal_forced_aligner/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ def construct_output_tiers(
if include_original_text:
data[speaker_name]["utterances"] = []
if (
data[speaker_name]["words"]
cleanup_textgrids
and data[speaker_name]["words"]
and w_begin - data[speaker_name]["words"][-1].end < 0.02
and clitic_marker
and (
Expand Down
2 changes: 1 addition & 1 deletion montreal_forced_aligner/tokenization/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def __call__(self, text) -> typing.Generator[str]:
"""
if self.ignore_case:
text = text.lower()
text = text.replace("i̇", "i") # Turkish normalization
if self.bracket_regex:
for word_object in self.bracket_regex.finditer(text):
word = word_object.group(0)
Expand Down Expand Up @@ -458,7 +459,6 @@ def _compile_regexes(self) -> None:
self.final_clitic_regex = re.compile(rf"(?<=\w)({'|'.join(final_clitics)})$")

def _dictionary_sanitize(self, text):

words = self.sanitize_function(text)
normalized_text = []
normalized_character_text = []
Expand Down
9 changes: 2 additions & 7 deletions tests/test_commandline_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def test_download_error():


def test_download_acoustic():

command = ["model", "download", "acoustic", "german_mfa", "--ignore_cache"]

result = click.testing.CliRunner(mix_stderr=False).invoke(
Expand All @@ -57,7 +56,7 @@ def test_download_acoustic():
path = AcousticModel.get_pretrained_path("german_mfa")
assert path.exists()

assert AcousticModel(path).version == "2.0.0rc4.dev19+ged818cb.d20220404"
assert AcousticModel(path).version == "3.0.0"

command = ["model", "download", "acoustic", "german_mfa", "--version", "2.0.0"]

Expand All @@ -73,11 +72,10 @@ def test_download_acoustic():
path = AcousticModel.get_pretrained_path("german_mfa")
assert path.exists()

assert AcousticModel(path).version != "2.0.0rc4.dev19+ged818cb.d20220404"
assert AcousticModel(path).version != "3.0.0"


def test_download_g2p():

command = [
"model",
"download",
Expand All @@ -98,7 +96,6 @@ def test_download_g2p():


def test_download_dictionary():

command = [
"model",
"download",
Expand All @@ -120,7 +117,6 @@ def test_download_dictionary():


def test_download_list_acoustic():

command = ["model", "download", "acoustic", "--ignore_cache"]

result = click.testing.CliRunner(mix_stderr=False).invoke(
Expand All @@ -135,7 +131,6 @@ def test_download_list_acoustic():


def test_download_list_dictionary():

command = [
"model",
"download",
Expand Down

0 comments on commit 91fd82d

Please sign in to comment.