From bfaa0fbf0e6038efc0153c0b45fadb61f422feb3 Mon Sep 17 00:00:00 2001 From: antoni Date: Sat, 3 Feb 2024 20:48:54 +0200 Subject: [PATCH] Cut labels to fit the audio data Had to cut the labels, probably around two from each string at the end to fit the audio data better. Really do not know why exactly. related to: https://github.com/anthonio9/penn/issues/7 --- penn/data/preprocess/core.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/penn/data/preprocess/core.py b/penn/data/preprocess/core.py index 3eb5f34..6a02fa3 100644 --- a/penn/data/preprocess/core.py +++ b/penn/data/preprocess/core.py @@ -274,7 +274,7 @@ def gset(): # FOR sampling rates like 11025, 22050, 44100, resampling isn't necessary if GSET_SAMPLE_RATE / penn.SAMPLE_RATE % 1 != 0: - printf("Resampling to penn.SAMPLE_RATE") + print("Resampling to penn.SAMPLE_RATE") pitch_list = np.vsplit(pitch, pitch.shape[0]) pitch_list_final = [] @@ -316,6 +316,13 @@ def gset(): if voiced.shape[0] == 1: voiced = voiced[0, :] + else: + overload = np.abs(audio.shape[-1] // penn.HOPSIZE - pitch.shape[-1]) + # this is a bad, ugly hack, but well, it is what it is, has to be enabled if resampling isn't enabled + pitch = pitch[..., :-overload] + voiced = voiced[..., :-overload] + + assert pitch.shape[-1] == audio.shape[-1] // penn.HOPSIZE # Save to cache np.save(output_directory / f'{stem}-pitch.npy', pitch)