diff --git a/db/manifest b/db/manifest index 67ef13c9e..0e1256774 100755 --- a/db/manifest +++ b/db/manifest @@ -189,4 +189,5 @@ allow_unspecified_rsvps.sql music_session_cancel_flag.sql fix_sms_query_cancel_flag.sql fix_sms_query_cancel_flag2.sql -next_session_scheduled_default.sql \ No newline at end of file +next_session_scheduled_default.sql +max_mind_releases.sql \ No newline at end of file diff --git a/db/up/max_mind_releases.sql b/db/up/max_mind_releases.sql new file mode 100644 index 000000000..bdd2a4cf4 --- /dev/null +++ b/db/up/max_mind_releases.sql @@ -0,0 +1,27 @@ +-- released_at is when maxmind released this data +CREATE TABLE max_mind_releases ( + id VARCHAR(64) PRIMARY KEY DEFAULT uuid_generate_v4(), + released_at DATE, + geo_ip_124_url VARCHAR(2000), + geo_ip_124_md5 VARCHAR(255), + geo_ip_124_size INTEGER, + geo_ip_134_url VARCHAR(2000), + geo_ip_134_md5 VARCHAR(255), + geo_ip_134_size INTEGER, + geo_ip_139_url VARCHAR(2000), + geo_ip_139_md5 VARCHAR(255), + geo_ip_139_size INTEGER, + geo_ip_142_url VARCHAR(2000), + geo_ip_142_md5 VARCHAR(255), + geo_ip_142_size INTEGER, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +-- the 1st available release +INSERT INTO max_mind_releases VALUES (DEFAULT, DATE '2014-07-01', +'maxmind/2014-07-01/GeoIP-124_20140701.zip', '93430c4b34b366030054a97c1b595f6f', 1997587, +'maxmind/2014-07-01/GeoIP-134_20140701.zip', '893c8674656271dac4964d5a56325203', 48198205, +'maxmind/2014-07-01/GeoIP-139_20140701.zip', '8487b681cc14ea9f603b52db5763a77a', 62399148, +'maxmind/2014-07-01/GeoIP-142_20140701.zip', '2fb4288fa3004ad68a06388f716e4ee5', 2265920, +DEFAULT, DEFAULT); \ No newline at end of file diff --git a/ruby/Gemfile b/ruby/Gemfile index c4acf6d44..204201bab 100644 --- a/ruby/Gemfile +++ b/ruby/Gemfile @@ -46,6 +46,7 @@ gem 'builder' gem 'fog' gem 'rest-client' gem 'iso-639' +gem 'rubyzip' group :test do gem 'simplecov', '~> 0.7.1' diff --git a/ruby/lib/jam_ruby.rb b/ruby/lib/jam_ruby.rb index 89e4fc634..cf17de2a0 100755 --- a/ruby/lib/jam_ruby.rb +++ b/ruby/lib/jam_ruby.rb @@ -18,6 +18,7 @@ require 'builder' require 'cgi' require 'resque_mailer' require 'rest-client' +require 'zip' require "jam_ruby/constants/limits" require "jam_ruby/constants/notification_types" @@ -64,6 +65,7 @@ require "jam_ruby/app/uploaders/perf_data_uploader" require "jam_ruby/app/uploaders/recorded_track_uploader" require "jam_ruby/app/uploaders/mix_uploader" require "jam_ruby/app/uploaders/music_notation_uploader" +require "jam_ruby/app/uploaders/max_mind_release_uploader" require "jam_ruby/lib/desk_multipass" require "jam_ruby/amqp/amqp_connection_manager" require "jam_ruby/database" @@ -72,6 +74,7 @@ require "jam_ruby/models/feedback" require "jam_ruby/models/feedback_observer" require "jam_ruby/models/max_mind_geo" require "jam_ruby/models/max_mind_isp" +require "jam_ruby/models/max_mind_release" require "jam_ruby/models/band_genre" require "jam_ruby/models/genre" require "jam_ruby/models/user" diff --git a/ruby/lib/jam_ruby/app/uploaders/max_mind_release_uploader.rb b/ruby/lib/jam_ruby/app/uploaders/max_mind_release_uploader.rb new file mode 100644 index 000000000..e801059e8 --- /dev/null +++ b/ruby/lib/jam_ruby/app/uploaders/max_mind_release_uploader.rb @@ -0,0 +1,40 @@ +class MaxMindReleaseUploader < CarrierWave::Uploader::Base + # include CarrierWaveDirect::Uploader + include CarrierWave::MimeTypes + process :set_content_type + + after :store, :update_extras + + def initialize(*args) + super + JamRuby::UploaderConfiguration.set_aws_private_configuration(self) + end + + # Add a white list of extensions which are allowed to be uploaded. + def extension_white_list + %w(zip) + end + + def store_dir + nil + end + + # important; this code assumes that the mounted_as ends in _url, and the corresponding _md5 field has the same prefix + # this is true for max_mind_release, but not necessarily other models; so careful copy/pasting + def update_extras(file) + puts file.inspect + mounted = mounted_as.to_s + md5_field = mounted[0, mounted.rindex('_url')] + '_md5' + size_field = mounted[0, mounted.rindex('_url')] + '_size' + puts "size_field #{size_field}, file.size #{file.size}" + model[size_field.to_sym] = file.size + model[md5_field.to_sym] = ::Digest::MD5.file(file).hexdigest + + end + + + + def filename + File.join(model.store_dir, mounted_as.to_s + '.zip') + end +end diff --git a/db/geodata/README.txt b/ruby/lib/jam_ruby/geodata/README.txt similarity index 100% rename from db/geodata/README.txt rename to ruby/lib/jam_ruby/geodata/README.txt diff --git a/db/geodata/iso3166.csv b/ruby/lib/jam_ruby/geodata/iso3166.csv similarity index 100% rename from db/geodata/iso3166.csv rename to ruby/lib/jam_ruby/geodata/iso3166.csv diff --git a/db/geodata/region_codes.csv b/ruby/lib/jam_ruby/geodata/region_codes.csv similarity index 100% rename from db/geodata/region_codes.csv rename to ruby/lib/jam_ruby/geodata/region_codes.csv diff --git a/db/geodata/supplement.sql b/ruby/lib/jam_ruby/geodata/supplement.sql similarity index 100% rename from db/geodata/supplement.sql rename to ruby/lib/jam_ruby/geodata/supplement.sql diff --git a/ruby/lib/jam_ruby/models/country.rb b/ruby/lib/jam_ruby/models/country.rb index 0cf5486a6..18e8a97b3 100644 --- a/ruby/lib/jam_ruby/models/country.rb +++ b/ruby/lib/jam_ruby/models/country.rb @@ -7,7 +7,12 @@ module JamRuby self.order('countryname asc').all end - def self.import_from_iso3166(file) + def self.find_iso3166 + gem_dir = Gem::Specification.find_by_name("jam_ruby").gem_dir + File.join(gem_dir, 'lib', 'jam_ruby', 'geo_data', 'iso3166.csv') + end + + def self.import_from_iso3166(file = find_iso3166) self.delete_all File.open(file, 'r:ISO-8859-1') do |io| csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false}) diff --git a/ruby/lib/jam_ruby/models/max_mind_release.rb b/ruby/lib/jam_ruby/models/max_mind_release.rb new file mode 100644 index 000000000..1c77dd974 --- /dev/null +++ b/ruby/lib/jam_ruby/models/max_mind_release.rb @@ -0,0 +1,146 @@ +module JamRuby + class MaxMindRelease < ActiveRecord::Base + + include S3ManagerMixin + + @@log = Logging.logger[MaxMindRelease] + + mount_uploader :geo_ip_124_url, MaxMindReleaseUploader + mount_uploader :geo_ip_134_url, MaxMindReleaseUploader + mount_uploader :geo_ip_139_url, MaxMindReleaseUploader + mount_uploader :geo_ip_142_url, MaxMindReleaseUploader + + def store_dir + "maxmind/#{released_at}" + end + + def import + + # you can only import a maxmind release that has released_at specified + unless released_at + raise "released_at not set in import" + end + + working_dir = dated_working_dir + + @@log.debug("downloading and unzipping geoip-142") + geo_ip_142_files = download_and_unzip(working_dir, :geo_ip_142_url, self[:geo_ip_142_md5]) + + @@log.debug("downloading and unzipping geoip-139") + geo_ip_139_files = download_and_unzip(working_dir, :geo_ip_139_url, self[:geo_ip_139_md5]) + + @@log.debug("downloading and unzipping geoip-134") + geo_ip_134_files = download_and_unzip(working_dir, :geo_ip_134_url, self[:geo_ip_134_md5]) + + @@log.debug("downloading and unzipping geoip-124") + geo_ip_124_files = download_and_unzip(working_dir, :geo_ip_124_url, self[:geo_ip_124_md5]) + + MaxMindIsp.import_from_max_mind(geo_ip_142_files['GeoIPISP-142.csv']) + MaxMindGeo.import_from_max_mind(geo_ip_139_files['GeoIPCity.csv']) + GeoIpBlocks.import_from_max_mind(geo_ip_134_files['GeoIPCity-134-Blocks.csv']) + GeoIpLocations.import_from_max_mind(geo_ip_134_files['GeoIPCity-134-Location.csv']) + JamIsp.import_from_max_mind(geo_ip_124_files['GeoIPISP.csv']) + Country.import_from_iso3166 + Region.import_from_region_codes + + + end + + def download_and_unzip(working_dir, field, md5) + downloaded_filename = download(working_dir, field, md5) + + unzip(working_dir, downloaded_filename) + end + + def download(working_dir, field, md5) + + filename = File.basename(self[field]) + downloaded_filename = File.join(working_dir, filename) + @@log.debug("working on field=#{field}, filename #{downloaded_filename}") + + if File.exists?(downloaded_filename) + if matched_md5(downloaded_filename, md5) + @@log.debug("#{downloaded_filename} file has matching md5") + return downloaded_filename + else + @@log.debug("#{downloaded_filename} exists but has wrong md5. deleting.") + File.delete(downloaded_filename) + end + end + + uri = URI(sign_url(field)) + open downloaded_filename, 'wb' do |io| + Net::HTTP.start(uri.host, uri.port) do |http| + request = Net::HTTP::Get.new uri + http.request request do |response| + response_code = response.code.to_i + unless response_code >= 200 && response_code <= 299 + raise "bad status code: #{response_code}. body: #{response.body}" + end + response.read_body do |chunk| + io.write chunk + end + end + end + end + + @@log.debug("downloaded #{downloaded_filename}") + downloaded_filename + end + + def unzip(working_dir, downloaded_filename) + result = {} + + # overwrites existing files + Zip.on_exists_proc = true + + # get the file without extension, to make the output folder name + extension = File.extname(downloaded_filename) + name = File.basename(downloaded_filename, extension) + + output_dir = File.join(working_dir, name) + Dir.mkdir(output_dir) unless Dir.exists?(output_dir) + + Zip::File.open(downloaded_filename) do |zip_file| + # Handle entries one by one + zip_file.each do |entry| + # Extract to file/directory/symlink + + entry_output_dir = File.join(Dir.pwd, output_dir, File.dirname(entry.name)) + res = FileUtils.mkdir_p(entry_output_dir) + + output_filename = File.join(output_dir, entry.name) + entry.extract(output_filename) + result[File.basename(entry.name)] = output_filename + end + end + + result + end + + def dated_working_dir + # you need a valid working directory from config + working_dir = APP_CONFIG.max_mind_working_dir + unless Dir.exists?(working_dir) + raise "maxmind release working_dir does not exist=#{working_dir}" + end + + # append date, antd download everything to there + working_dir = File.join(working_dir, released_at.to_s) + + unless Dir.exists?(working_dir) + Dir.mkdir(working_dir) + end + working_dir + end + + def sign_url(expiration_time = 120, field) + resolve_url(field, 'application/zip', expiration_time) + end + + def resolve_url(url_field, mime_type, expiration_time) + self[url_field].start_with?('http') ? self[url_field] : s3_manager.sign_url(self[url_field], {:expires => expiration_time, :response_content_type => mime_type, :secure => false}) + end + + end +end diff --git a/ruby/lib/jam_ruby/models/region.rb b/ruby/lib/jam_ruby/models/region.rb index 0aa27ea87..56ef29443 100644 --- a/ruby/lib/jam_ruby/models/region.rb +++ b/ruby/lib/jam_ruby/models/region.rb @@ -7,7 +7,12 @@ module JamRuby self.where(countrycode: country).order('regionname asc').all end - def self.import_from_region_codes(file) + def self.find_region_codes + gem_dir = Gem::Specification.find_by_name("jam_ruby").gem_dir + File.join(gem_dir, 'lib', 'jam_ruby', 'geo_data', 'region_codes.csv') + end + + def self.import_from_region_codes(file = find_region_codes) self.delete_all File.open(file, 'r:ISO-8859-1') do |io| csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false}) diff --git a/ruby/spec/factories.rb b/ruby/spec/factories.rb index 7cf0753bb..dfeb9951a 100644 --- a/ruby/spec/factories.rb +++ b/ruby/spec/factories.rb @@ -556,4 +556,8 @@ FactoryGirl.define do latency_tester.save end end + + factory :max_mind_release, :class => JamRuby::MaxMindRelease do + released_at Time.now.to_date + end end diff --git a/ruby/spec/jam_ruby/models/max_mind_releases_spec.rb b/ruby/spec/jam_ruby/models/max_mind_releases_spec.rb new file mode 100644 index 000000000..e42034f37 --- /dev/null +++ b/ruby/spec/jam_ruby/models/max_mind_releases_spec.rb @@ -0,0 +1,71 @@ +require 'spec_helper' + +describe MaxMindRelease do + + include UsesTempFiles + + GEOISP_124 = 'geoisp_124.csv' + + in_directory_with_file(GEOISP_124) + + before(:all) do + @original_storage = MaxMindReleaseUploader.storage = :fog + end + + after(:all) do + MaxMindReleaseUploader.storage = @original_storage + end + + let(:zipfile) {fake_geo_124_zip(File.new(GEOISP_124))} + let(:release) {FactoryGirl.create(:max_mind_release)} + + before(:each) do + content_for_file('abc') + + Dir.mkdir(APP_CONFIG.max_mind_working_dir) unless Dir.exists?(APP_CONFIG.max_mind_working_dir) + end + + it "unzip" do + result = release.unzip(APP_CONFIG.max_mind_working_dir, zipfile.path) + result.include?('GeoIPISP.csv').should be_true + output = result['GeoIPISP.csv'] + File.exists?(output).should be_true + IO.read(output).should == 'abc' + end + + it "downloads", aws: true do + uploader = MaxMindReleaseUploader.new(release, :geo_ip_124_url) + zipfile.open + uploader.store!(zipfile) # uploads the file to s3 + release.save! + release[:geo_ip_124_url].should == File.join(release.store_dir, 'geo_ip_124_url.zip') + release[:geo_ip_124_md5].should == Digest::MD5.file(zipfile).hexdigest + release[:geo_ip_124_size].should == zipfile.size + + downloaded_filename = release.download(release.dated_working_dir, :geo_ip_124_url, release[:geo_ip_124_md5]) + + Digest::MD5.file(downloaded_filename ).hexdigest.should == Digest::MD5.file(zipfile).hexdigest + end + + #it "uploads to s3 with correct name, and then downloads via signed URL" do + # pending "use" + # jam_track = FactoryGirl.create(:jam_track) + # uploader = JamTrackUploader.new(jam_track, :url) + # uploader.store!(File.open(JKA_NAME)) # uploads file + # jam_track.save! + # + # # verify that the uploader stores the correct path + # jam_track[:url].should == jam_track.store_dir + '/' + jam_track.filename + # + # # verify it's on S3 + # s3 = S3Manager.new(APP_CONFIG.aws_bucket, APP_CONFIG.aws_access_key_id, APP_CONFIG.aws_secret_access_key) + # s3.exists?(jam_track[:url]).should be_true + # s3.length(jam_track[:url]).should == 'abc'.length + # + # # download it via signed URL, and check contents + # url = jam_track.sign_url + # downloaded_contents = open(url).read + # downloaded_contents.should == 'abc' + #end + +end \ No newline at end of file diff --git a/ruby/spec/support/utilities.rb b/ruby/spec/support/utilities.rb index 23b37a3e6..5e3d38e60 100644 --- a/ruby/spec/support/utilities.rb +++ b/ruby/spec/support/utilities.rb @@ -98,6 +98,9 @@ def app_config true end + def max_mind_working_dir + 'tmp' + end private def audiomixer_workspace_path @@ -147,4 +150,15 @@ def wipe_s3_test_bucket end end end +end + +# creates a maxmind 'GEO-124' zip file using the supplied CSV +def fake_geo_124_zip(geoisp_csv) + zipfile = Tempfile.new(['fake_geo_124', '.zip']) + + Zip::File.open(Pathname.new(zipfile.path).realpath.to_s, Zip::File::CREATE) do |zipfile| + zipfile.add('fake_geo_124/GeoIPISP.csv', Pathname.new(geoisp_csv.path).realpath.to_s) + end + + zipfile end \ No newline at end of file diff --git a/web/config/application.rb b/web/config/application.rb index ebc2bb375..cd4ca0e40 100644 --- a/web/config/application.rb +++ b/web/config/application.rb @@ -243,5 +243,7 @@ if defined?(Bundler) config.ftue_network_test_max_clients = 8 # the maximum amount of allowable latency config.ftue_maximum_gear_latency = 20 + + config.max_mind_working_dir = 'tmp' end end diff --git a/web/lib/tasks/import_max_mind.rake b/web/lib/tasks/import_max_mind.rake index c23e11b41..e3ab39a78 100644 --- a/web/lib/tasks/import_max_mind.rake +++ b/web/lib/tasks/import_max_mind.rake @@ -1,4 +1,17 @@ namespace :db do + + desc "Imports a maxmind release from S3. If you specify a RELEASE env var, it should be like 2014-07-01 (YYYY-MM-DD). Otherwise latest found max_mind_releases in db is used." + task import_maxmind: :environment do |task, args| + specific_release = ENV['RELEASE'] + if specific_release + release = MaxMindRelease.find_by_released_at(Date.parse(specific_release)) + else + release = MaxMindRelease.order('released_at DESC').first + end + + release.import + end + desc "Import a maxmind geo (139) database; run like this: rake db:import_maxmind_geo file=" task import_maxmind_geo: :environment do MaxMindGeo.import_from_max_mind ENV['file'] diff --git a/web/script/package/post-install.sh b/web/script/package/post-install.sh index c6534c54f..a7d09f359 100755 --- a/web/script/package/post-install.sh +++ b/web/script/package/post-install.sh @@ -12,12 +12,14 @@ cp /var/lib/$NAME/script/package/$NAME.conf /etc/init/$NAME.conf mkdir -p /var/lib/$NAME/log mkdir -p /var/lib/$NAME/tmp +mkdir -p /var/tmp/$NAME mkdir -p /etc/$NAME mkdir -p /var/log/$NAME chown -R $USER:$GROUP /var/lib/$NAME chown -R $USER:$GROUP /etc/$NAME chown -R $USER:$GROUP /var/log/$NAME +chown -R $USER:$GROUP /var/tmp/$NAME # make log folders for jobs mkdir -p /var/log/any-job-worker