From 2cd2456d75419004784227374788a957b001f8f6 Mon Sep 17 00:00:00 2001
From: Sam Thursfield <sam@afuera.me.uk>
Date: Sat, 12 Jan 2019 01:36:49 +0100
Subject: [PATCH] gstdec: Avoid leaking memory when reading audio data

We were reading audio data with the Gst.Buffer.extract_dup() method.
This allocates new memory using g_malloc() and returns it to the caller.
The memory needs to be freed with g_free(), however the PyGObject
bindings do not do this.

We can avoid problem by reading the audio data directory from the
underlying Gst.Memory object. In this case the Python interpreter is
responsible for copying the data and so it is able to correctly free
the memory after it's no longer needed.

I tested this by calling pyacoustid.fingerprint() on 34 .MP3 files in
sequence, and I saw the following difference:

  - memory usage without the patch: 557052 KB
  - memory usage with the patch: 52752 KB

The generated acoustid fingerprints were identical with and without the
patch.
---
 audioread/gstdec.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/audioread/gstdec.py b/audioread/gstdec.py
index 8069797..b6f1d5a 100644
--- a/audioread/gstdec.py
+++ b/audioread/gstdec.py
@@ -311,7 +311,20 @@ def _new_sample(self, sink):
             # New data is available from the pipeline! Dump it into our
             # queue (or possibly block if we're full).
             buf = sink.emit('pull-sample').get_buffer()
-            self.queue.put(buf.extract_dup(0, buf.get_size()))
+
+            # We can't use Gst.Buffer.extract() to read the data as it crashes
+            # when called through PyGObject. We also can't use
+            # Gst.Buffer.extract_dup() because we have no way in Python to free
+            # the memory that it returns. Instead we get access to the actual
+            # data via Gst.Memory.map().
+            mem = buf.get_all_memory()
+            success, info = mem.map(Gst.MapFlags.READ)
+            if success:
+                data = info.data
+                mem.unmap(info)
+                self.queue.put(data)
+            else:
+                raise GStreamerError("Unable to map buffer memory while reading the file.")
         return Gst.FlowReturn.OK
 
     def _unkown_type(self, uridecodebin, decodebin, caps):