My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/ruby
#
# This script is used to fill in the gravitar and badge information for a
# user profile. It uses http://stackoverflow.com/users so that it can pull
# in the information without tons of hits to the server.
#

require 'rubygems'
require 'hpricot'
require 'mechanize'
require "mysql"
require 'pp'

def parse_page(body)
my = Mysql::new("localhost", "username", "password", "so_database")
st = my.prepare("update so_user set gravatar_hash = ?, gold_badge_count = ?, silver_badge_count = ?, bronze_badge_count = ? where id = ?")

user_page = Hpricot(body)
(user_page/"div[@class='user-info']").each do |user_info|

user_link = (user_info/"div[@class='user-gravatar32']/a")
user_id = user_link != nil ? user_link[0][:href].split('/')[2] : "NA"

gravitar_img = (user_info/"div[@class='user-gravatar32']/a/img")
gravitar_hash = gravitar_img[0][:src].split('/')[4].split('?')[0] if gravitar_img != nil

user_rep = (user_info/"div[@class='user-details']/span[@class='reputation-score']")
user_repf = user_rep[0].inner_html if user_rep != nil

user_bc = (user_info/"div[@class='user-details']/span[@title~=gold]/span[@class='badgecount']")
user_gold = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0

user_bc = (user_info/"div[@class='user-details']/span[@title~=silver]/span[@class='badgecount']")
user_silver = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0

user_bc = (user_info/"div[@class='user-details']/span[@title~=bronze]/span[@class='badgecount']")
user_bronze = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0

puts "#{user_id}: #{gravitar_hash}, #{user_repf}, #{user_gold}, #{user_silver}, #{user_bronze}"
st.execute(gravitar_hash, user_gold, user_silver, user_bronze, user_id)
end
end

def get_next_url(body)
user_page = Hpricot(body)
(user_page/"div[@class='pager']/a").each do |pager|
pager_span = (pager/"span[@class='page-numbers next']")
if pager_span != nil && pager_span[0] != nil
return pager[:href]
end
end
return nil
end

mech = WWW::Mechanize.new
mech.user_agent_alias = 'GeeQE V1.0'

base_url = 'http://stackoverflow.com'
url = base_url + '/users'

while url != nil
puts "Pulling: #{url}"

body = mech.get(url).body()
parse_page(body)
url = base_url + get_next_url(body)

sleep rand(20) + 1
end

Change log

r2 by casron on Sep 27, 2009   Diff
initial import
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 2385 bytes, 69 lines
Powered by Google Project Hosting