From 2cd2456d75419004784227374788a957b001f8f6 Mon Sep 17 00:00:00 2001 From: Sam Thursfield Date: Sat, 12 Jan 2019 01:36:49 +0100 Subject: [PATCH] gstdec: Avoid leaking memory when reading audio data We were reading audio data with the Gst.Buffer.extract_dup() method. This allocates new memory using g_malloc() and returns it to the caller. The memory needs to be freed with g_free(), however the PyGObject bindings do not do this. We can avoid problem by reading the audio data directory from the underlying Gst.Memory object. In this case the Python interpreter is responsible for copying the data and so it is able to correctly free the memory after it's no longer needed. I tested this by calling pyacoustid.fingerprint() on 34 .MP3 files in sequence, and I saw the following difference: - memory usage without the patch: 557052 KB - memory usage with the patch: 52752 KB The generated acoustid fingerprints were identical with and without the patch. --- audioread/gstdec.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/audioread/gstdec.py b/audioread/gstdec.py index 8069797..b6f1d5a 100644 --- a/audioread/gstdec.py +++ b/audioread/gstdec.py @@ -311,7 +311,20 @@ def _new_sample(self, sink): # New data is available from the pipeline! Dump it into our # queue (or possibly block if we're full). buf = sink.emit('pull-sample').get_buffer() - self.queue.put(buf.extract_dup(0, buf.get_size())) + + # We can't use Gst.Buffer.extract() to read the data as it crashes + # when called through PyGObject. We also can't use + # Gst.Buffer.extract_dup() because we have no way in Python to free + # the memory that it returns. Instead we get access to the actual + # data via Gst.Memory.map(). + mem = buf.get_all_memory() + success, info = mem.map(Gst.MapFlags.READ) + if success: + data = info.data + mem.unmap(info) + self.queue.put(data) + else: + raise GStreamerError("Unable to map buffer memory while reading the file.") return Gst.FlowReturn.OK def _unkown_type(self, uridecodebin, decodebin, caps):