bigram.py

# -*- coding: utf-8 -*-
"""MusicProcessing.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1hLB2_cwYOk1GB4_yldgFS8BdIOVSMR1C
"""

# Commented out IPython magic to ensure Python compatibility.
from google.colab import drive
drive.mount("/content/gdrive")
# %cd gdrive/Shareddrives/COS\ 401

# Commented out IPython magic to ensure Python compatibility.
# %cd dataset

# Commented out IPython magic to ensure Python compatibility.
import csv
import numpy
# %cd csv_train/

# Commented out IPython magic to ensure Python compatibility.
import os
import pandas as pd
# %cd ../

import math
note_order = {'C0': 0, 'C#' : 1, 'Db': 1, 'C2': 2, 'D0': 2, 'D#': 3,
              'Eb': 3, 'E0': 4, 'E#': 5, 'Fb': 4, 'F0': 5, 'F#': 6,
              'Gb': 6, 'G0': 7, 'G#': 8, 'Ab': 8, 'A0': 9,
              'A#': 10, 'Bb': 10, 'B0': 11, 'B#': 0, 'Cb': 11, "rest": math.inf,
              'F2': 7, 'C2': 2, 'D2': 4, 'G2': 9, 'A2': 11, 'B-2': 9, 'B2': 1,
              'E2': 6, 'A-2': 7, 'C-2': 10, 'D-2': 0, 'E-2': 2, 'F-2': 3, "[]": -1}

def get_note_from_name(note_name):
  return note_order[note_name]


def get_standardized_note(note_name, key_fifths):
  if note_name == "rest":
    return -1
  note = get_note_from_name(note_name)
  return (note - 7*int(key_fifths) % 12) % 12

# standardize notes and chords in all train files

standardized_pds = []
for song in os.listdir("csv_train"):
  file_pd = pd.read_csv("csv_train/" + str(song))
  new_notes = []
  print(song)

  for i in range(len(file_pd["note_root"])):
    new_notes.append(get_standardized_note(file_pd["note_root"][i], file_pd["key_fifths"][i]))

  file_pd["standardized_note"] = new_notes

  new_chords = []

  for i in range(len(file_pd["chord_root"])):
    new_chords.append(get_standardized_note(file_pd["chord_root"][i], file_pd["key_fifths"][i]))

  file_pd["standardized_chord"] = new_chords

  standardized_pds.append(file_pd)

# n-gram model for chords

import re
from nltk.tokenize import word_tokenize
from collections import defaultdict, Counter
from nltk import ngrams
import random

class MarkovChain:
  def __init__(self):
    self.lookup_dict = defaultdict(list)
    
  def add_document_bigrams(self, n_list):
    bigrams = self.__get_bigrams(n_list)
    for bigram in bigrams:
      self.lookup_dict[bigram[0]].append(bigram[1])

  def __get_bigrams(self, notes):
    if len(notes) < 1:
      return
    for i in range(len(notes) - 1):
      yield [ notes[i], notes[i + 1] ]
        
  def generate_text(self, first_word):
    if len(self.lookup_dict) > 0:
      next_word = random.choice(Counter(self.lookup_dict[first_word]).most_common()[:3])[0]
      print("Next word suggestions:", next_word)
      return next_word
    # else:
    #   next_word = random.choice(Counter(self.lookup_dict[(0, "major")]).most_common()[:3])[0]
    #   print("Next word suggestions:", next_word)
    #   return next_word
    return

# for file in standardized_files, add everything to the dict
# then generate notes

notes_markov = MarkovChain()
chords_markov = MarkovChain()
for file_pd in standardized_pds:
  notes_markov.add_document_bigrams(file_pd["standardized_note"])

for file_pd in standardized_pds:
  chords_markov.add_document_bigrams(list(zip(file_pd["standardized_chord"], file_pd["chord_type"])))

# next_note = notes_markov.generate_text(0)
# for i in range(100):
#   next_note = notes_markov.generate_text(next_note)

next_chord = chords_markov.generate_text((6, "minor"))
for i in range(8):
  next_chord = chords_markov.generate_text(next_chord)