diff --git a/Gemfile b/Gemfile index 7f01fde4b..e449a487d 100644 --- a/Gemfile +++ b/Gemfile @@ -24,6 +24,7 @@ gem 'aws-sdk', '1.8.0' gem 'carrierwave' gem 'aasm', '3.0.16' gem 'devise', '>= 1.1.2' +gem 'postgres-copy' if devenv gem 'jam_db', :path=> "#{workspace}/jam-db/target/ruby_package" gem 'jampb', :path => "#{workspace}/jam-pb/target/ruby/jampb" diff --git a/lib/jam_ruby.rb b/lib/jam_ruby.rb index 0f0a2a02b..12ef8a628 100755 --- a/lib/jam_ruby.rb +++ b/lib/jam_ruby.rb @@ -10,6 +10,7 @@ require "will_paginate/active_record" require "action_mailer" require "devise" require "sendgrid" +require 'postgres-copy' require "jam_ruby/constants/limits" require "jam_ruby/constants/notification_types" require "jam_ruby/constants/validation_messages" @@ -28,6 +29,8 @@ require "jam_ruby/app/uploaders/artifact_uploader" require "jam_ruby/app/uploaders/perf_data_uploader" require "jam_ruby/lib/s3_manager" require "jam_ruby/message_factory" +require "jam_ruby/models/max_mind_geo" +require "jam_ruby/models/max_mind_isp" require "jam_ruby/models/genre" require "jam_ruby/models/user" require "jam_ruby/models/user_observer" diff --git a/lib/jam_ruby/models/max_mind_geo.rb b/lib/jam_ruby/models/max_mind_geo.rb new file mode 100644 index 000000000..0c8f2c082 --- /dev/null +++ b/lib/jam_ruby/models/max_mind_geo.rb @@ -0,0 +1,38 @@ +module JamRuby + class MaxMindGeo < ActiveRecord::Base + + self.table_name = 'max_mind_geo' + + + def self.import_from_max_mind(file) + # File Geo-124 + # Format: + # startIpNum,endIpNum,country,region,city,postalCode,latitude,longitude,dmaCode,areaCode + + MaxMindGeo.transaction do + MaxMindGeo.delete_all + File.open(file, 'r:ISO-8859-1') do |io| + MaxMindGeo.pg_copy_from io, :map => { 'startIpNum' => 'ip_bottom', 'endIpNum' => 'ip_top', 'country' => 'country', 'region' => 'region', 'city' => 'city'}, :columns => [:startIpNum, :endIpNum, :country, :region, :city] do |row| + row[0] = ip_address_to_int(row[0]) + row[1] = ip_address_to_int(row[1]) + row.delete_at(5) + row.delete_at(5) + row.delete_at(5) + row.delete_at(5) + row.delete_at(5) + end + end + end + end + + + # Make an IP address fit in a signed int. Just divide it by 2, as the least significant part + # just can't possibly matter. We can verify this if needed. My guess is the entire bottom octet is + # actually irrelevant + def self.ip_address_to_int(ip) + ip.split('.').inject(0) {|total,value| (total << 8 ) + value.to_i} / 2 + end + end + + +end \ No newline at end of file diff --git a/lib/jam_ruby/models/max_mind_isp.rb b/lib/jam_ruby/models/max_mind_isp.rb new file mode 100644 index 000000000..2c7d6ed9a --- /dev/null +++ b/lib/jam_ruby/models/max_mind_isp.rb @@ -0,0 +1,57 @@ +module JamRuby + class MaxMindIsp < ActiveRecord::Base + + self.table_name = 'max_mind_isp' + + def self.import_from_max_mind(file) + + # File Geo-142 + # Format: + # "beginIp","endIp","countryCode","ISP" + + MaxMindIsp.transaction do + MaxMindIsp.delete_all + File.open(file, 'r:ISO-8859-1') do |io| + io.gets # eat the copyright line. gah, why do they have that in their file?? + MaxMindIsp.pg_copy_from io, :map => { 'beginIp' => 'ip_bottom', 'endIp' => 'ip_top', 'countryCode' => 'country', 'ISP' => 'isp'}, :columns => [:beginIp, :endIp, :countryCode, :ISP] do |row| + row[0] = ip_address_to_int(strip_quotes(row[0])) + row[1] = ip_address_to_int(strip_quotes(row[1])) + row[2] = row[2] + row[3] = row[3..-1].join(',') # this is because the parser just cuts on any ',' and ignores double quotes. essentially postgres-copy isn't a great csv parser -- or I need to configure it better + while row.length > 4 + row.delete_at(4) + end + + end + end + end + end + + # Make an IP address fit in a signed int. Just divide it by 2, as the least significant part + # just can't possibly matter. We can verify this if needed. My guess is the entire bottom octet is + # actually irrelevant + def self.ip_address_to_int(ip) + ip.split('.').inject(0) {|total,value| (total << 8 ) + value.to_i} / 2 + end + + private + + def self.strip_quotes str + return nil if str.nil? + + if str.chr == '"' + str = str[1..-1] + end + + if str.rindex('"') == str.length - 1 + str = str.chop + end + + return str + end + + def self.escape str + str.gsub(/\"/, '""') + end + end +end \ No newline at end of file diff --git a/spec/jam_ruby/models/max_mind_geo_spec.rb b/spec/jam_ruby/models/max_mind_geo_spec.rb new file mode 100644 index 000000000..ba49462de --- /dev/null +++ b/spec/jam_ruby/models/max_mind_geo_spec.rb @@ -0,0 +1,39 @@ +require 'spec_helper' + +describe MaxMindGeo do + + include UsesTempFiles + + GEO_CSV='small.csv' + + in_directory_with_file(GEO_CSV) + + before do + + content_for_file('startIpNum,endIpNum,country,region,city,postalCode,latitude,longitude,dmaCode,areaCode +0.116.0.0,0.119.255.255,"AT","","","",47.3333,13.3333,, +1.0.0.0,1.0.0.255,"AU","","","",-27.0000,133.0000,, +1.0.1.0,1.0.1.255,"CN","07","Fuzhou","",26.0614,119.3061,,'.encode(Encoding::ISO_8859_1)) + + MaxMindGeo.import_from_max_mind(GEO_CSV) + end + + let(:first) { MaxMindGeo.find_by_ip_bottom(MaxMindGeo.ip_address_to_int('0.116.0.0')) } + let(:second) { MaxMindGeo.find_by_ip_bottom(MaxMindGeo.ip_address_to_int('1.0.0.0')) } + let(:third) { MaxMindGeo.find_by_ip_bottom(MaxMindGeo.ip_address_to_int('1.0.1.0')) } + + it { MaxMindGeo.count.should == 3 } + + it { first.country.should == 'AT' } + it { first.ip_bottom.should == MaxMindGeo.ip_address_to_int('0.116.0.0') } + it { first.ip_top.should == MaxMindGeo.ip_address_to_int('0.119.255.255') } + + it { second.country.should == 'AU' } + it { second.ip_bottom.should == MaxMindGeo.ip_address_to_int('1.0.0.0') } + it { second.ip_top.should == MaxMindGeo.ip_address_to_int('1.0.0.255') } + + it { third.country.should == 'CN' } + it { third.ip_bottom.should == MaxMindGeo.ip_address_to_int('1.0.1.0') } + it { third.ip_top.should == MaxMindGeo.ip_address_to_int('1.0.1.255') } +end + diff --git a/spec/jam_ruby/models/max_mind_isp_spec.rb b/spec/jam_ruby/models/max_mind_isp_spec.rb new file mode 100644 index 000000000..b61f86cfc --- /dev/null +++ b/spec/jam_ruby/models/max_mind_isp_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe MaxMindIsp do + + include UsesTempFiles + + ISP_CSV='small.csv' + + in_directory_with_file(ISP_CSV) + + before do + + content_for_file('Copyright (c) 2011 MaxMind Inc. All Rights Reserved. +"beginIp","endIp","countryCode","ISP" +"1.0.0.0","1.0.0.255","AU","APNIC Debogon Project" +"1.0.1.0","1.0.1.255","CN","Chinanet Fujian Province Network" +"1.0.4.0","1.0.7.255","AU","Bigred,inc"'.encode(Encoding::ISO_8859_1)) + + MaxMindIsp.import_from_max_mind(ISP_CSV) + end + + let(:first) { MaxMindIsp.find_by_ip_bottom(MaxMindIsp.ip_address_to_int('1.0.0.0')) } + let(:second) { MaxMindIsp.find_by_ip_bottom(MaxMindIsp.ip_address_to_int('1.0.1.0')) } + let(:third) { MaxMindIsp.find_by_ip_bottom(MaxMindIsp.ip_address_to_int('1.0.4.0')) } + + it { MaxMindIsp.count.should == 3 } + + it { first.country.should == 'AU' } + it { first.ip_bottom.should == MaxMindIsp.ip_address_to_int('1.0.0.0') } + it { first.ip_top.should == MaxMindIsp.ip_address_to_int('1.0.0.255') } + it { first.isp.should == 'APNIC Debogon Project' } + + it { second.country.should == 'CN' } + it { second.ip_bottom.should == MaxMindIsp.ip_address_to_int('1.0.1.0') } + it { second.ip_top.should == MaxMindIsp.ip_address_to_int('1.0.1.255') } + it { second.isp.should == 'Chinanet Fujian Province Network' } + + it { third.country.should == 'AU' } + it { third.ip_bottom.should == MaxMindIsp.ip_address_to_int('1.0.4.0') } + it { third.ip_top.should == MaxMindIsp.ip_address_to_int('1.0.7.255') } + it { third.isp.should == 'Bigred,inc' } +end +