125 lines
4.0 KiB
Ruby
125 lines
4.0 KiB
Ruby
require 'csv'
|
|
|
|
module JamRuby
|
|
class MaxMindIsp < ApplicationRecord
|
|
|
|
self.table_name = 'max_mind_isp'
|
|
|
|
@@log = Logging.logger[MaxMindIsp]
|
|
|
|
def self.import_from_max_mind(options)
|
|
|
|
file = options[:file]
|
|
use_copy = options[:use_copy]
|
|
# File Geo-142
|
|
# Format:
|
|
# "beginIp","endIp","countryCode","ISP"
|
|
|
|
# drop indexes on start, then add them back when done
|
|
|
|
start = Time.now
|
|
|
|
MaxMindIsp.delete_all
|
|
if use_copy
|
|
Database.copy(MaxMind.table_name, file)
|
|
else
|
|
File.open(file, 'r:ISO-8859-1') do |io|
|
|
s = io.gets.strip # eat the copyright line. gah, why do they have that in their file??
|
|
unless s.eql? 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
puts s
|
|
puts 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
raise 'file does not start with expected copyright (line 1): Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
end
|
|
|
|
s = io.gets.strip # eat the headers line
|
|
unless s.eql? '"beginIp","endIp","countryCode","ISP"'
|
|
puts s
|
|
puts '"beginIp","endIp","countryCode","ISP"'
|
|
raise 'file does not start with expected header (line 2): "beginIp","endIp","countryCode","ISP"'
|
|
end
|
|
|
|
saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : 0
|
|
count = 0
|
|
|
|
stmt = "insert into #{MaxMindIsp.table_name} (ip_bottom, ip_top, country, isp) values"
|
|
|
|
vals = ''
|
|
sep = ''
|
|
i = 0
|
|
n = 20 # going from 20 to 40 only changed things a little bit, and 512 was slower... and 1024 was even slower (weird)
|
|
|
|
csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false})
|
|
csv.each do |row|
|
|
raise "file does not have expected number of columns (4): #{row.length}" unless row.length == 4
|
|
|
|
ip_bottom = ip_address_to_int(strip_quotes(row[0]))
|
|
ip_top = ip_address_to_int(strip_quotes(row[1]))
|
|
country = row[2]
|
|
isp = row[3]
|
|
|
|
vals = vals+sep+"(#{ip_bottom}, #{ip_top}, '#{country}', #{quote_value(isp, nil)})"
|
|
sep = ','
|
|
i += 1
|
|
|
|
if count == 0 or i >= n
|
|
MaxMindIsp.connection.execute stmt+vals
|
|
count += i
|
|
vals = ''
|
|
sep = ''
|
|
i = 0
|
|
|
|
if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level > 1
|
|
ActiveRecord::Base.logger.debug "... logging inserts into #{MaxMindIsp.table_name} suspended ..."
|
|
ActiveRecord::Base.logger.level = 1
|
|
end
|
|
|
|
if ActiveRecord::Base.logger and count%10000 < n
|
|
ActiveRecord::Base.logger.level = saved_level
|
|
ActiveRecord::Base.logger.debug "... inserted #{count} into #{MaxMindIsp.table_name} ..."
|
|
ActiveRecord::Base.logger.level = 1
|
|
end
|
|
end
|
|
end
|
|
|
|
if i > 0
|
|
MaxMindIsp.connection.execute stmt+vals
|
|
count += i
|
|
end
|
|
|
|
if ActiveRecord::Base.logger
|
|
ActiveRecord::Base.logger.level = saved_level
|
|
ActiveRecord::Base.logger.debug "loaded #{count} records into #{MaxMindIsp.table_name}"
|
|
end
|
|
end
|
|
end
|
|
|
|
elapsed = Time.now - start
|
|
@@log.debug("#{MaxMindIsp.table_name} import took #{elapsed} seconds")
|
|
|
|
end
|
|
|
|
# Make an IP address fit in a signed int. Just divide it by 2, as the least significant part
|
|
# just can't possibly matter. We can verify this if needed. My guess is the entire bottom octet is
|
|
# actually irrelevant
|
|
def self.ip_address_to_int(ip)
|
|
ip.split('.').inject(0) { |total, value| (total << 8) + value.to_i }
|
|
end
|
|
|
|
private
|
|
|
|
def self.strip_quotes str
|
|
return nil if str.nil?
|
|
|
|
if str.start_with? '"'
|
|
str = str[1..-1]
|
|
end
|
|
|
|
if str.end_with? '"'
|
|
str = str.chop
|
|
end
|
|
|
|
return str
|
|
end
|
|
end
|
|
end
|