From bfaa0fbf0e6038efc0153c0b45fadb61f422feb3 Mon Sep 17 00:00:00 2001
From: antoni <antoni.s.jankowski@gmail.com>
Date: Sat, 3 Feb 2024 20:48:54 +0200
Subject: [PATCH] Cut labels to fit the audio data

Had to cut the labels, probably around two from each string at the end
to fit the audio data better. Really do not know why exactly.

related to: https://github.com/anthonio9/penn/issues/7
---
 penn/data/preprocess/core.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/penn/data/preprocess/core.py b/penn/data/preprocess/core.py
index 3eb5f34..6a02fa3 100644
--- a/penn/data/preprocess/core.py
+++ b/penn/data/preprocess/core.py
@@ -274,7 +274,7 @@ def gset():
 
         # FOR sampling rates like 11025, 22050, 44100, resampling isn't necessary
         if GSET_SAMPLE_RATE / penn.SAMPLE_RATE % 1 != 0:
-            printf("Resampling to penn.SAMPLE_RATE")
+            print("Resampling to penn.SAMPLE_RATE")
 
             pitch_list = np.vsplit(pitch, pitch.shape[0])
             pitch_list_final = []
@@ -316,6 +316,13 @@ def gset():
 
             if voiced.shape[0] == 1:
                 voiced = voiced[0, :]
+        else:
+            overload = np.abs(audio.shape[-1] // penn.HOPSIZE - pitch.shape[-1])
+            # this is a bad, ugly hack, but well, it is what it is, has to be enabled if resampling isn't enabled
+            pitch = pitch[..., :-overload]
+            voiced = voiced[..., :-overload]
+
+        assert pitch.shape[-1] == audio.shape[-1] // penn.HOPSIZE
 
         # Save to cache
         np.save(output_directory / f'{stem}-pitch.npy', pitch)