-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbigram.py
120 lines (90 loc) · 3.51 KB
/
bigram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -*- coding: utf-8 -*-
"""MusicProcessing.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1hLB2_cwYOk1GB4_yldgFS8BdIOVSMR1C
"""
# Commented out IPython magic to ensure Python compatibility.
from google.colab import drive
drive.mount("/content/gdrive")
# %cd gdrive/Shareddrives/COS\ 401
# Commented out IPython magic to ensure Python compatibility.
# %cd dataset
# Commented out IPython magic to ensure Python compatibility.
import csv
import numpy
# %cd csv_train/
# Commented out IPython magic to ensure Python compatibility.
import os
import pandas as pd
# %cd ../
import math
note_order = {'C0': 0, 'C#' : 1, 'Db': 1, 'C2': 2, 'D0': 2, 'D#': 3,
'Eb': 3, 'E0': 4, 'E#': 5, 'Fb': 4, 'F0': 5, 'F#': 6,
'Gb': 6, 'G0': 7, 'G#': 8, 'Ab': 8, 'A0': 9,
'A#': 10, 'Bb': 10, 'B0': 11, 'B#': 0, 'Cb': 11, "rest": math.inf,
'F2': 7, 'C2': 2, 'D2': 4, 'G2': 9, 'A2': 11, 'B-2': 9, 'B2': 1,
'E2': 6, 'A-2': 7, 'C-2': 10, 'D-2': 0, 'E-2': 2, 'F-2': 3, "[]": -1}
def get_note_from_name(note_name):
return note_order[note_name]
def get_standardized_note(note_name, key_fifths):
if note_name == "rest":
return -1
note = get_note_from_name(note_name)
return (note - 7*int(key_fifths) % 12) % 12
# standardize notes and chords in all train files
standardized_pds = []
for song in os.listdir("csv_train"):
file_pd = pd.read_csv("csv_train/" + str(song))
new_notes = []
print(song)
for i in range(len(file_pd["note_root"])):
new_notes.append(get_standardized_note(file_pd["note_root"][i], file_pd["key_fifths"][i]))
file_pd["standardized_note"] = new_notes
new_chords = []
for i in range(len(file_pd["chord_root"])):
new_chords.append(get_standardized_note(file_pd["chord_root"][i], file_pd["key_fifths"][i]))
file_pd["standardized_chord"] = new_chords
standardized_pds.append(file_pd)
# n-gram model for chords
import re
from nltk.tokenize import word_tokenize
from collections import defaultdict, Counter
from nltk import ngrams
import random
class MarkovChain:
def __init__(self):
self.lookup_dict = defaultdict(list)
def add_document_bigrams(self, n_list):
bigrams = self.__get_bigrams(n_list)
for bigram in bigrams:
self.lookup_dict[bigram[0]].append(bigram[1])
def __get_bigrams(self, notes):
if len(notes) < 1:
return
for i in range(len(notes) - 1):
yield [ notes[i], notes[i + 1] ]
def generate_text(self, first_word):
if len(self.lookup_dict) > 0:
next_word = random.choice(Counter(self.lookup_dict[first_word]).most_common()[:3])[0]
print("Next word suggestions:", next_word)
return next_word
# else:
# next_word = random.choice(Counter(self.lookup_dict[(0, "major")]).most_common()[:3])[0]
# print("Next word suggestions:", next_word)
# return next_word
return
# for file in standardized_files, add everything to the dict
# then generate notes
notes_markov = MarkovChain()
chords_markov = MarkovChain()
for file_pd in standardized_pds:
notes_markov.add_document_bigrams(file_pd["standardized_note"])
for file_pd in standardized_pds:
chords_markov.add_document_bigrams(list(zip(file_pd["standardized_chord"], file_pd["chord_type"])))
# next_note = notes_markov.generate_text(0)
# for i in range(100):
# next_note = notes_markov.generate_text(next_note)
next_chord = chords_markov.generate_text((6, "minor"))
for i in range(8):
next_chord = chords_markov.generate_text(next_chord)