From 1279b16ec0e4d08c288789cec2804f577cd68f0c Mon Sep 17 00:00:00 2001 From: Seth Call Date: Sun, 26 Oct 2025 14:33:00 -0500 Subject: [PATCH] Update manifest maker --- admin/app/admin/jam_track_manifest.rb | 0 web/lib/tasks/jam_tracks.rake | 153 ++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 admin/app/admin/jam_track_manifest.rb diff --git a/admin/app/admin/jam_track_manifest.rb b/admin/app/admin/jam_track_manifest.rb new file mode 100644 index 000000000..e69de29bb diff --git a/web/lib/tasks/jam_tracks.rake b/web/lib/tasks/jam_tracks.rake index 626cbe506..4fcbe041d 100644 --- a/web/lib/tasks/jam_tracks.rake +++ b/web/lib/tasks/jam_tracks.rake @@ -466,5 +466,158 @@ namespace :jam_tracks do end end + task gen_jamtrack_manifest: :environment do |task, arg| + + # DOWNLOAD_AUDIO = + max = ENV['MAX_JAMTRACKS'].to_i + if max == 0 + max = nil + end + puts "MAX JAMTRACKS #{max}" + + jam_tracks = JamTrack.includes([:jam_track_tracks, + {genres_jam_tracks: :genres}, + {jam_track_tracks: :instrument}, + :genres]).where(status: 'Production').order('original_artist, name') + private_bucket = Rails.application.config.aws_bucket + s3_manager = S3Manager.new(private_bucket, Rails.application.config.aws_access_key_id, Rails.application.config.aws_secret_access_key) + + tmp_dir = Dir.mktmpdir + FileUtils.mkdir_p tmp_dir + + puts "tmp_dir=#{tmp_dir}" + + csv_file = File.join(tmp_dir, "manifest.csv") + top_folder = File.join(tmp_dir, "audio") + FileUtils.mkdir_p(top_folder) + + CSV.open(csv_file, "wb") do |csv| + header = ['JamTrackId', 'TrackId', 'Artist', 'Song', 'Instrument', 'Part', 'Type', 'Genre', 'LocalOgg', 'LocalMeta', 's3_path_url_44', 's3_path_url_48', 'AudioExists'] + csv << header + + jam_tracks.each do |jam_track| + song = jam_track.name + jam_track.jam_track_tracks.each do |jam_track_track| + instrument = jam_track_track.instrument_id + part = jam_track_track.part ? jam_track_track.part : '' + + + # construct the meta file for this track: + meta = {} + meta[:jam_track_id] = jam_track.id + meta[:track_id] = jam_track_track.id + meta[:artist] = jam_track.original_artist + meta[:song] = jam_track.name + meta[:instrument] = instrument + meta[:part] = part + meta[:type] = jam_track_track.track_type + genre = jam_track.genres.first + meta[:genre] = genre ? genre.description : '' + meta[:s3_path_url_44] = "s3://#{private_bucket}/" + jam_track_track.url_44 + meta[:s3_path_url_48] = "s3://#{private_bucket}/" + jam_track_track.url_48 + + meta_json = File.join(tmp_dir, "meta_#{jam_track.id}_#{jam_track_track.id}.json") + File.open(meta_json, "w") do |f| + f.write(JSON.pretty_generate(meta)) + end + + # find the first a-z, 0-9 character and use that + first_character = song[0].downcase + folder = File.join(top_folder, first_character) + if File.exist?(folder) == false + FileUtils.mkdir_p(folder) + end + # folder structure is: + # audio/a/song-instrument-part-type.ogg + # audio/a/song-instrument-part-type.ogg.meta + # run 'part' through a sanitizer to make it filesystem safe + part = part.gsub(/[^0-9A-Za-z]/, '_') + base_name = File.join(folder, "#{jam_track.id}_#{jam_track_track.id}_#{instrument}_#{part}_#{jam_track_track.track_type}".downcase) + ogg_file = "#{base_name}.ogg" + + exists = s3_manager.exists?(jam_track_track.url_48) + + + row = [] + row << jam_track.id + row << jam_track_track.id + row << jam_track.original_artist + row << jam_track.name + row << instrument + row << part + row << jam_track_track.track_type + genre = jam_track.genres.first + row << (genre ? genre.description : '') + row << ogg_file.gsub(tmp_dir, '') + row << ogg_file.gsub(tmp_dir, '') + ".meta" + row << "s3://#{private_bucket}/" + jam_track_track.url_44 + row << "s3://#{private_bucket}/" + jam_track_track.url_48 + row << exists + + csv << row + + + FileUtils.cp(meta_json, "#{ogg_file}.meta") + # cleanup meta + FileUtils.rm(meta_json) + end + + if max > 0 + max -= 1 + if max == 0 + puts "Max of jamtracks reached" + break + end + end + end + end + + + # dump the 1st 10 lines to stdout for quick verification + File.open(csv_file, "r") do|f| + 10.times do |i| + puts f.readline + end + end + + s3_manager.upload('jam_track_manifests/manifest.csv', csv_file, content_type: 'text/csv') + + folder_to_zip = top_folder + output_tar_path = Dir.mktmpdir + + output_tar_file = "#{output_tar_path}/archive.tar.gz" + + safe_output = Shellwords.escape(output_tar_file) + + # -c = create + # -z = compress with gzip + # -f = to a file + # -P = (Optional, but useful) Preserve absolute paths. + # By default, 'tar' strips the leading '/' for security. + # A common pattern is to cd into the parent dir first. + + # Safer way: cd to the directory to get relative paths + parent_dir = File.dirname(folder_to_zip) + folder_name = File.basename(folder_to_zip) + safe_parent = Shellwords.escape(parent_dir) + safe_folder_name = Shellwords.escape(folder_name) + + # This command is safer as it creates the archive with relative paths + command = "tar -czf #{safe_output} -C #{safe_parent} #{safe_folder_name}" + success = system(command) + + if success + puts "Successfully created tar.gz file." + else + puts "Failed to create tar.gz file." + end + + + s3_manager.upload('jam_track_manifests/all-tracks.tar.gz', safe_output, content_type: 'application/gzip') + + puts "tar.gz output=#{safe_output}" + + puts "tmp_dir=#{tmp_dir}" + end end