My favorites
▼
|
Sign in
geeqe
Geek Question Explorer
Project Home
Downloads
Wiki
Issues
Source
Checkout
Browse
Changes
Source path:
svn
/
trunk
/
scripts
/
useroverviewpuller.rb
r3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/ruby
#
# This script is used to fill in the gravitar and badge information for a
# user profile. It uses http://stackoverflow.com/users so that it can pull
# in the information without tons of hits to the server.
#
require 'rubygems'
require 'hpricot'
require 'mechanize'
require "mysql"
require 'pp'
def parse_page(body)
my = Mysql::new("localhost", "username", "password", "so_database")
st = my.prepare("update so_user set gravatar_hash = ?, gold_badge_count = ?, silver_badge_count = ?, bronze_badge_count = ? where id = ?")
user_page = Hpricot(body)
(user_page/"div[@class='user-info']").each do |user_info|
user_link = (user_info/"div[@class='user-gravatar32']/a")
user_id = user_link != nil ? user_link[0][:href].split('/')[2] : "NA"
gravitar_img = (user_info/"div[@class='user-gravatar32']/a/img")
gravitar_hash = gravitar_img[0][:src].split('/')[4].split('?')[0] if gravitar_img != nil
user_rep = (user_info/"div[@class='user-details']/span[@class='reputation-score']")
user_repf = user_rep[0].inner_html if user_rep != nil
user_bc = (user_info/"div[@class='user-details']/span[@title~=gold]/span[@class='badgecount']")
user_gold = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0
user_bc = (user_info/"div[@class='user-details']/span[@title~=silver]/span[@class='badgecount']")
user_silver = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0
user_bc = (user_info/"div[@class='user-details']/span[@title~=bronze]/span[@class='badgecount']")
user_bronze = user_bc != nil && user_bc[0] != nil ? user_bc[0].inner_html : 0
puts "#{user_id}: #{gravitar_hash}, #{user_repf}, #{user_gold}, #{user_silver}, #{user_bronze}"
st.execute(gravitar_hash, user_gold, user_silver, user_bronze, user_id)
end
end
def get_next_url(body)
user_page = Hpricot(body)
(user_page/"div[@class='pager']/a").each do |pager|
pager_span = (pager/"span[@class='page-numbers next']")
if pager_span != nil && pager_span[0] != nil
return pager[:href]
end
end
return nil
end
mech = WWW::Mechanize.new
mech.user_agent_alias = 'GeeQE V1.0'
base_url = 'http://stackoverflow.com'
url = base_url + '/users'
while url != nil
puts "Pulling: #{url}"
body = mech.get(url).body()
parse_page(body)
url = base_url + get_next_url(body)
sleep rand(20) + 1
end
Show details
Hide details
Change log
r2
by casron on Sep 27, 2009
Diff
initial import
Go to:
/trunk/scripts
/trunk/scripts/load.rb
/trunk/scripts/schema.sql
...nk/scripts/useroverviewpuller.rb
Project members,
sign in
to write a code review
Older revisions
All revisions of this file
File info
Size: 2385 bytes, 69 lines
View raw file
Powered by
Google Project Hosting