-
Notifications
You must be signed in to change notification settings - Fork 443
/
preprocess.py
142 lines (120 loc) · 5.51 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
'''
Author: Ji-Sung Kim
Project: deepjazz
Purpose: Parse, cleanup and process data.
Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with
express permission.
'''
from __future__ import print_function
from music21 import *
from collections import defaultdict, OrderedDict
from itertools import groupby, izip_longest
from grammar import *
#----------------------------HELPER FUNCTIONS----------------------------------#
''' Helper function to parse a MIDI file into its measures and chords '''
def __parse_midi(data_fn):
# Parse the MIDI data for separate melody and accompaniment parts.
midi_data = converter.parse(data_fn)
# Get melody part, compress into single voice.
melody_stream = midi_data[5] # For Metheny piece, Melody is Part #5.
melody1, melody2 = melody_stream.getElementsByClass(stream.Voice)
for j in melody2:
melody1.insert(j.offset, j)
melody_voice = melody1
for i in melody_voice:
if i.quarterLength == 0.0:
i.quarterLength = 0.25
# Change key signature to adhere to comp_stream (1 sharp, mode = major).
# Also add Electric Guitar.
melody_voice.insert(0, instrument.ElectricGuitar())
melody_voice.insert(0, key.KeySignature(sharps=1, mode='major'))
# The accompaniment parts. Take only the best subset of parts from
# the original data. Maybe add more parts, hand-add valid instruments.
# Should add least add a string part (for sparse solos).
# Verified are good parts: 0, 1, 6, 7 '''
partIndices = [0, 1, 6, 7]
comp_stream = stream.Voice()
comp_stream.append([j.flat for i, j in enumerate(midi_data)
if i in partIndices])
# Full stream containing both the melody and the accompaniment.
# All parts are flattened.
full_stream = stream.Voice()
for i in xrange(len(comp_stream)):
full_stream.append(comp_stream[i])
full_stream.append(melody_voice)
# Extract solo stream, assuming you know the positions ..ByOffset(i, j).
# Note that for different instruments (with stream.flat), you NEED to use
# stream.Part(), not stream.Voice().
# Accompanied solo is in range [478, 548)
solo_stream = stream.Voice()
for part in full_stream:
curr_part = stream.Part()
curr_part.append(part.getElementsByClass(instrument.Instrument))
curr_part.append(part.getElementsByClass(tempo.MetronomeMark))
curr_part.append(part.getElementsByClass(key.KeySignature))
curr_part.append(part.getElementsByClass(meter.TimeSignature))
curr_part.append(part.getElementsByOffset(476, 548,
includeEndBoundary=True))
cp = curr_part.flat
solo_stream.insert(cp)
# Group by measure so you can classify.
# Note that measure 0 is for the time signature, metronome, etc. which have
# an offset of 0.0.
melody_stream = solo_stream[-1]
measures = OrderedDict()
offsetTuples = [(int(n.offset / 4), n) for n in melody_stream]
measureNum = 0 # for now, don't use real m. nums (119, 120)
for key_x, group in groupby(offsetTuples, lambda x: x[0]):
measures[measureNum] = [n[1] for n in group]
measureNum += 1
# Get the stream of chords.
# offsetTuples_chords: group chords by measure number.
chordStream = solo_stream[0]
chordStream.removeByClass(note.Rest)
chordStream.removeByClass(note.Note)
offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream]
# Generate the chord structure. Use just track 1 (piano) since it is
# the only instrument that has chords.
# Group into 4s, just like before.
chords = OrderedDict()
measureNum = 0
for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]):
chords[measureNum] = [n[1] for n in group]
measureNum += 1
# Fix for the below problem.
# 1) Find out why len(measures) != len(chords).
# ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't
# actually show up, while the accompaniment's beat 1 right after does.
# Actually on second thought: melody/comp start on Ab, and resolve to
# the same key (Ab) so could actually just cut out last measure to loop.
# Decided: just cut out the last measure.
del chords[len(chords) - 1]
assert len(chords) == len(measures)
return measures, chords
''' Helper function to get the grammatical data from given musical data. '''
def __get_abstract_grammars(measures, chords):
# extract grammars
abstract_grammars = []
for ix in xrange(1, len(measures)):
m = stream.Voice()
for i in measures[ix]:
m.insert(i.offset, i)
c = stream.Voice()
for j in chords[ix]:
c.insert(j.offset, j)
parsed = parse_melody(m, c)
abstract_grammars.append(parsed)
return abstract_grammars
#----------------------------PUBLIC FUNCTIONS----------------------------------#
''' Get musical data from a MIDI file '''
def get_musical_data(data_fn):
measures, chords = __parse_midi(data_fn)
abstract_grammars = __get_abstract_grammars(measures, chords)
return chords, abstract_grammars
''' Get corpus data from grammatical data '''
def get_corpus_data(abstract_grammars):
corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')]
values = set(corpus)
val_indices = dict((v, i) for i, v in enumerate(values))
indices_val = dict((i, v) for i, v in enumerate(values))
return corpus, values, val_indices, indices_val