360 lines
11 KiB
Ruby
360 lines
11 KiB
Ruby
|
|
module JamRuby
|
||
|
|
|
||
|
|
# this is probably a one-off class used to map Tency-named stems into JamKazam-named stems
|
||
|
|
class TencyStemMapping
|
||
|
|
|
||
|
|
@@log = Logging.logger[TencyStemMapping]
|
||
|
|
|
||
|
|
def s3_manager
|
||
|
|
@s3_manager ||= S3Manager.new('jamkazam-tency', APP_CONFIG.aws_access_key_id, APP_CONFIG.aws_secret_access_key)
|
||
|
|
end
|
||
|
|
|
||
|
|
def initialize
|
||
|
|
@originals_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music - Original Folder for Normalization Map"
|
||
|
|
@mapping_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music"
|
||
|
|
@original_songs = {}
|
||
|
|
@mapping_songs = {}
|
||
|
|
@mappings = {}
|
||
|
|
end
|
||
|
|
|
||
|
|
def create_map
|
||
|
|
tency_originals
|
||
|
|
tency_maps
|
||
|
|
|
||
|
|
dump
|
||
|
|
end
|
||
|
|
|
||
|
|
def create_mapping_map
|
||
|
|
tency_maps
|
||
|
|
|
||
|
|
dump_map
|
||
|
|
end
|
||
|
|
|
||
|
|
def hydrate
|
||
|
|
@original_songs = YAML.load_file('original_songs.yml')
|
||
|
|
@mapping_songs = YAML.load_file('mapping_songs.yml')
|
||
|
|
end
|
||
|
|
|
||
|
|
def parse_sanitized_filename(filename)
|
||
|
|
instrument = nil
|
||
|
|
part = nil
|
||
|
|
|
||
|
|
basename = File.basename(filename)
|
||
|
|
stem = basename.index('Stem')
|
||
|
|
|
||
|
|
if stem
|
||
|
|
stripped = basename[(stem + 'Stem'.length)..-5] # takes of 'stem' and '.wav'
|
||
|
|
stripped.strip!
|
||
|
|
dash = stripped.index('-')
|
||
|
|
|
||
|
|
if dash == 0
|
||
|
|
stripped = stripped[1..-1].strip!
|
||
|
|
# now we should have something like "Vocal - Lead" (instrument - part)
|
||
|
|
instrument, part = stripped.split('-')
|
||
|
|
instrument.strip! if instrument
|
||
|
|
part.strip! if part
|
||
|
|
else
|
||
|
|
"no or misplaced dash for #{filename}"
|
||
|
|
end
|
||
|
|
|
||
|
|
else
|
||
|
|
raise "no stem for #{filename}"
|
||
|
|
end
|
||
|
|
|
||
|
|
[instrument, part]
|
||
|
|
end
|
||
|
|
|
||
|
|
# For all the tracks that I have labeled manually as
|
||
|
|
# Instrument = Upright Bass and Part = Upright Bass,
|
||
|
|
# can you please change both the Instrument and Part to Double Bass instead?
|
||
|
|
#
|
||
|
|
def check_mappings
|
||
|
|
missing_instrument = 0
|
||
|
|
missing_part = 0
|
||
|
|
part_names = []
|
||
|
|
|
||
|
|
hydrate
|
||
|
|
@mapping_songs.each do |cache_id, data|
|
||
|
|
mapped_filename = data[:filename]
|
||
|
|
@@log.debug("parsing #{mapped_filename}")
|
||
|
|
instrument, part = parse_sanitized_filename(mapped_filename)
|
||
|
|
@@log.debug("parsed #{instrument} (#{part})")
|
||
|
|
missing_instrument = missing_instrument + 1 unless instrument
|
||
|
|
missing_part = missing_part + 1 unless part
|
||
|
|
part_names << mapped_filename unless part
|
||
|
|
end
|
||
|
|
|
||
|
|
@@log.info("SUMMARY")
|
||
|
|
@@log.info("-------")
|
||
|
|
@@log.info("missing instruments:#{missing_instrument} missing parts: #{missing_part}")
|
||
|
|
@@log.info("files with no parts: #{part_names}")
|
||
|
|
|
||
|
|
# files with no parts:
|
||
|
|
# ["Huey Lewis And The News - Heart And Soul - 31957/Heart And Soul Stem - Synth 2.wav",
|
||
|
|
# "ZZ Top - Tush - 20852/Tush Stem - Clicktrack.wav",
|
||
|
|
# "Crosby Stills And Nash - Teach Your Children - 15440/Teach Your Children Stem - Bass Guitar.wav",
|
||
|
|
# /Brad Paisley - She's Everything - 19886/She's Everything Stem - Clicktrack.wav",
|
||
|
|
# "Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Lap Steel.wav",
|
||
|
|
# Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Acoustic Guitar.wav"
|
||
|
|
|
||
|
|
end
|
||
|
|
|
||
|
|
def track_mapping(basename, instr_part)
|
||
|
|
instrument = instr_part[:instrument]
|
||
|
|
part = instr_part[:part]
|
||
|
|
|
||
|
|
basename.downcase!
|
||
|
|
|
||
|
|
info = @mappings[basename]
|
||
|
|
|
||
|
|
unless info
|
||
|
|
info = {matches:[]}
|
||
|
|
@mappings[basename] = info
|
||
|
|
end
|
||
|
|
|
||
|
|
info[:matches] << instr_part
|
||
|
|
end
|
||
|
|
|
||
|
|
def correlate
|
||
|
|
mapped = 0
|
||
|
|
unmapped = 0
|
||
|
|
unmapped_details = []
|
||
|
|
no_instrument = []
|
||
|
|
common_unknown_instruments = {}
|
||
|
|
|
||
|
|
hydrate
|
||
|
|
@mapping_songs.each do |cache_id, data|
|
||
|
|
# go through each track hand-mapped, and find it's matching song if any.
|
||
|
|
|
||
|
|
mapped_filename = data[:filename]
|
||
|
|
found_original = @original_songs[cache_id]
|
||
|
|
if found_original
|
||
|
|
# mapping made
|
||
|
|
|
||
|
|
original_filename = found_original[:filename]
|
||
|
|
original_basename = File.basename(original_filename).downcase
|
||
|
|
|
||
|
|
mapped = mapped + 1
|
||
|
|
|
||
|
|
instrument, part = parse_sanitized_filename(mapped_filename)
|
||
|
|
instr_part = JamTrackImporter.determine_instrument(instrument, part)
|
||
|
|
|
||
|
|
instr_part[:instrument]
|
||
|
|
|
||
|
|
if instr_part[:instrument]
|
||
|
|
|
||
|
|
# track the mapping of this one
|
||
|
|
track_mapping(original_basename, instr_part)
|
||
|
|
|
||
|
|
else
|
||
|
|
@@log.error("unable to determine instrument for #{File.basename(mapped_filename)}")
|
||
|
|
no_instrument << ({filename: File.basename(mapped_filename), instrument: instrument, part: part})
|
||
|
|
common_unknown_instruments["#{instrument}-(#{part})"] = 1
|
||
|
|
end
|
||
|
|
|
||
|
|
else
|
||
|
|
unmapped = unmapped + 1
|
||
|
|
unmapped_details << {filename: mapped_filename}
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
puts("SUMMARY")
|
||
|
|
puts("-------")
|
||
|
|
puts("MAPPED:#{mapped} UNMAPPED:#{unmapped}")
|
||
|
|
unmapped_details.each do |unmapped_detail|
|
||
|
|
puts "UNMAPPED FILE: #{File.basename(unmapped_detail[:filename])}"
|
||
|
|
end
|
||
|
|
puts("UNKNOWN INSTRUMENT: #{no_instrument.length}")
|
||
|
|
no_instrument.each do |item|
|
||
|
|
puts("UNKNOWN INSTRUMENT: #{item[:filename]}")
|
||
|
|
end
|
||
|
|
common_unknown_instruments.each do |key, value|
|
||
|
|
puts("#{key}")
|
||
|
|
end
|
||
|
|
@mappings.each do |basename, mapping|
|
||
|
|
matches = mapping[:matches]
|
||
|
|
counts = matches.each_with_object(Hash.new(0)) { |word,counts| counts[word] += 1 }
|
||
|
|
ordered_matches = counts.sort_by {|k, v| -v}
|
||
|
|
output = ""
|
||
|
|
ordered_matches.each do |match|
|
||
|
|
detail = match[0]
|
||
|
|
count = match[1]
|
||
|
|
output << "#{detail[:instrument]}(#{detail[:part]})/#{count}, "
|
||
|
|
end
|
||
|
|
|
||
|
|
puts "map detail: #{basename}: #{output}"
|
||
|
|
|
||
|
|
mapping[:ordered] = ordered_matches
|
||
|
|
mapping[:detail] = output
|
||
|
|
end
|
||
|
|
CSV.open("mapping.csv", "wb") do |csv|
|
||
|
|
@mappings.each do |basename, mapping|
|
||
|
|
item = mapping[:ordered]
|
||
|
|
|
||
|
|
trust_worthy = item.length == 1
|
||
|
|
unless trust_worthy
|
||
|
|
# if the 1st item is at least 4 'counts' more than the next item, we can consider it trust_worthy
|
||
|
|
if item[0][1] - 4 > item[1][1]
|
||
|
|
trust_worthy = true
|
||
|
|
end
|
||
|
|
end
|
||
|
|
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1], trust_worthy ]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
CSV.open("determinate-single-matches.csv", "wb") do |csv|
|
||
|
|
@mappings.each do |basename, mapping|
|
||
|
|
if mapping[:ordered].length == 1 && mapping[:ordered][0][1] == 1
|
||
|
|
item = mapping[:ordered]
|
||
|
|
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
CSV.open("determinate-multi-matches.csv", "wb") do |csv|
|
||
|
|
@mappings.each do |basename, mapping|
|
||
|
|
if mapping[:ordered].length == 1 && mapping[:ordered][0][1] > 1
|
||
|
|
item = mapping[:ordered]
|
||
|
|
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
CSV.open("ambiguous-matches.csv", "wb") do |csv|
|
||
|
|
@mappings.each do |basename, mapping|
|
||
|
|
if mapping[:ordered].length > 1
|
||
|
|
csv << [ basename, mapping[:detail] ]
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def dump
|
||
|
|
File.open('original_songs.yml', 'w') {|f| f.write(YAML.dump(@original_songs)) }
|
||
|
|
File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) }
|
||
|
|
end
|
||
|
|
def dump_map
|
||
|
|
File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) }
|
||
|
|
end
|
||
|
|
|
||
|
|
def md5(filepath)
|
||
|
|
Digest::MD5.file(filepath).hexdigest
|
||
|
|
end
|
||
|
|
|
||
|
|
def tency_original_check
|
||
|
|
songs = Pathname.new(@originals_folder).children.select { |c| c.directory? }
|
||
|
|
songs.each do |song|
|
||
|
|
dirs = Pathname.new(song).children.select {|c| c.directory? }
|
||
|
|
|
||
|
|
@@log.debug "SONG #{song}"
|
||
|
|
dirs.each do |dir|
|
||
|
|
@@log.debug "#{dir.basename.to_s}"
|
||
|
|
end
|
||
|
|
@@log.debug ""
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def tency_originals
|
||
|
|
songs = Pathname.new(@originals_folder).children.select { |c| c.directory? }
|
||
|
|
songs.each do |filename|
|
||
|
|
id = parse_id(filename.basename.to_s )
|
||
|
|
files = Pathname.new(filename).children.select {|c| c.file? }
|
||
|
|
|
||
|
|
# also look into any 1st level folders we might find
|
||
|
|
|
||
|
|
dirs = Pathname.new(filename).children.select {|c| c.directory? }
|
||
|
|
dirs.each do |dir|
|
||
|
|
more_tracks = Pathname.new(dir).children.select {|c| c.file? }
|
||
|
|
files = files + more_tracks
|
||
|
|
end
|
||
|
|
|
||
|
|
files.each do |file|
|
||
|
|
@@log.debug("processing original track #{file.to_s}")
|
||
|
|
md5 = md5(file.to_s)
|
||
|
|
song = {md5:md5, filename:file.to_s, id:id}
|
||
|
|
@original_songs[cache_id(id, md5)] = song
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
end
|
||
|
|
|
||
|
|
def tency_maps
|
||
|
|
songs = Pathname.new(@mapping_folder).children.select { |c| c.directory? }
|
||
|
|
songs.each do |song_filename|
|
||
|
|
id = parse_id_mapped(song_filename.basename.to_s )
|
||
|
|
@@log.debug "processing song #{song_filename.to_s}"
|
||
|
|
|
||
|
|
tracks = Pathname.new(song_filename).children.select {|c| c.file? }
|
||
|
|
tracks.each do |track|
|
||
|
|
if track.to_s.include? "Stem"
|
||
|
|
@@log.debug("processing mapped track #{track.to_s}")
|
||
|
|
md5 = md5(track.to_s)
|
||
|
|
|
||
|
|
song = {md5:md5, filename:track.to_s}
|
||
|
|
@mapping_songs[cache_id(id, md5)] = song
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def cache_id(id, md5)
|
||
|
|
"#{id}-#{md5}"
|
||
|
|
end
|
||
|
|
|
||
|
|
def parse_id(filename)
|
||
|
|
#amy-winehouse_you-know-i-m-no-good-feat-ghostface-killah_11767
|
||
|
|
|
||
|
|
index = filename.rindex('_')
|
||
|
|
if index
|
||
|
|
id = filename[(index + 1)..-1]
|
||
|
|
|
||
|
|
if id.end_with?('/')
|
||
|
|
id = id[0...-1]
|
||
|
|
end
|
||
|
|
|
||
|
|
id = id.to_i
|
||
|
|
|
||
|
|
if id == 0
|
||
|
|
raise "no valid ID in filename: #{filename}"
|
||
|
|
end
|
||
|
|
else
|
||
|
|
raise "no _ in filename: #{filename}"
|
||
|
|
end
|
||
|
|
id
|
||
|
|
end
|
||
|
|
|
||
|
|
def parse_id_mapped(filename)
|
||
|
|
#Flyleaf - I'm So Sick - 15771
|
||
|
|
|
||
|
|
index = filename.rindex('-')
|
||
|
|
if index
|
||
|
|
id = filename[(index + 1)..-1]
|
||
|
|
|
||
|
|
if id.end_with?('/')
|
||
|
|
id = id[0...-1]
|
||
|
|
end
|
||
|
|
|
||
|
|
id.strip!
|
||
|
|
|
||
|
|
id = id.to_i
|
||
|
|
|
||
|
|
if id == 0
|
||
|
|
raise "no valid ID in filename: #{filename}"
|
||
|
|
end
|
||
|
|
else
|
||
|
|
raise "no - in filename: #{filename}"
|
||
|
|
end
|
||
|
|
id
|
||
|
|
end
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
def tency_originals2
|
||
|
|
s3_manager.list_directories('mapper').each do |song_folder|
|
||
|
|
@@log.debug("searching through tency directory. song folder:'#{song_folder}'")
|
||
|
|
|
||
|
|
id = parse_id(song_folder)
|
||
|
|
@@log.debug("ID #{id}")
|
||
|
|
|
||
|
|
top_folder = s3_manager.list_directories(song_folder)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|