My favorites | Sign in
Project Home Wiki Issues Source
Checkout   Browse   Changes  
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
require 'rubygems'
require 'libxml'
require "mysql"
require "pp"

include LibXML

class BadgeCallbacks
include XML::SaxParser::Callbacks

@my
@st

def initialize(my)
@my = my
@st = @my.prepare("insert into badge(id, user_id, name, created) values(?, ?, ?, ?)")
end

def on_start_element(element, attributes)
if element == 'row'
@st.execute(attributes['Id'], attributes['UserId'], attributes['Name'], attributes['Date'])
end
end
end

class CommentCallbacks
include XML::SaxParser::Callbacks

@my
@st

def initialize(my)
@my = my
@st = @my.prepare("insert into comment(id, post_id, user_id, score, comment_text, created) values(?, ?, ?, ?, ?, ?)")
end

def on_start_element(element, attributes)
if element == 'row'
@st.execute(attributes['Id'], attributes['PostId'], attributes['UserId'] == nil ? -1 : attributes['UserId'], attributes['Score'], attributes['Text'], attributes['CreationDate'])
end
end
end

class VoteCallbacks
include XML::SaxParser::Callbacks

@my
@st

def initialize(my)
@my = my
@st = @my.prepare("insert into vote(id, post_id, vote_type_id, created) values(?, ?, ?, ?)")
end

def on_start_element(element, attributes)
if element == 'row'
@st.execute(attributes['Id'], attributes['PostId'], attributes['VoteTypeId'], attributes['CreationDate'])
end
end
end

class UserCallbacks
include XML::SaxParser::Callbacks

@my
@st

def initialize(my)
@my = my
@st = @my.prepare("insert into so_user(id, reputation, display_name, last_access_date, website_url, location, age, about_me, views, up_votes, down_votes, created) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
end

def on_start_element(element, attributes)
if element == 'row'
@st.execute(attributes['Id'], attributes['Reputation'], attributes['DisplayName'], attributes['LastAccessDate'], attributes['WebsiteUrl'], attributes['Location'], attributes['Age'] == nil ? -1 : attributes['Age'], attributes['AboutMe'], attributes['Views'], attributes['UpVotes'], attributes['DownVotes'], attributes['CreationDate'])
end
end
end


class PostCallbacks
include XML::SaxParser::Callbacks

@my
@post_st
@tag_insert_st
@tag_select_st

def initialize(my)
@my = my
@post_st = @my.prepare("insert into post(id, post_type_id, accepted_answer_id, parent_id, score, view_count, body_text, owner_id, last_editor_user_id, last_editor_display_name, last_edit_date, last_activity_date, title, answer_count, comment_count, favorite_count, created) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
@tag_insert_st = @my.prepare("insert into tag(name) values(?)")
@tag_select_st = @my.prepare("select id from tag where name = ?")
@post_ot_tag_insert_st = @my.prepare("insert into post_to_tag(post_id, tag_id) values(?, ?)")
end

def on_start_element(element, attributes)
if element == 'row'
@post_st.execute(attributes['Id'], attributes['PostTypeId'], attributes['AcceptedAnswerId'], attributes['ParentId'], attributes['Score'], attributes['ViewCount'], attributes['Body'], attributes['OwnerUserId'] == nil ? -1 : attributes['OwnerUserId'], attributes['LastEditorUserId'], attributes['LastEditorDisplayName'], attributes['LastEditDate'], attributes['LastActivityDate'], attributes['Title'] == nil ? '' : attributes['Title'], attributes['AnswerCount'] == nil ? 0 : attributes['AnswerCount'], attributes['CommentCount'] == nil ? 0 : attributes['CommentCount'], attributes['FavoriteCount'] == nil ? 0 : attributes['FavoriteCount'], attributes['CreationDate'])
post_id = attributes['Id']

tags = attributes['Tags'] == nil ? '' : attributes['Tags']
tags.scan(/<(.*?)>/).each do |tag_name|
tag_id = insert_or_find_tag(tag_name[0])
@post_ot_tag_insert_st.execute(post_id, tag_id)
end
end
end

def insert_or_find_tag(tag_name)
@tag_select_st.execute(tag_name)
if @tag_select_st.num_rows > 0
@tag_select_st.fetch[0]
else
@tag_insert_st.execute(tag_name)
@tag_insert_st.insert_id
end
end
end

if ARGV.size != 5
puts "Usage load.rb <XML file path> <db host> <db user> <db pass> <db name>"
exit 1
end

my = Mysql::new(ARGV[1], ARGV[2], ARGV[3], ARGV[4])

puts "Loading badges"

parser = XML::SaxParser.file(ARGV[0] + '/badges.xml')
parser.callbacks = BadgeCallbacks.new(my)
parser.parse

puts "Loading comments"

parser = XML::SaxParser.file(ARGV[0] + '/comments.xml')
parser.callbacks = CommentCallbacks.new(my)
parser.parse

puts "Loading votes"

parser = XML::SaxParser.file(ARGV[0] + '/votes.xml')
parser.callbacks = VoteCallbacks.new(my)
parser.parse

puts "Loading users"

parser = XML::SaxParser.file(ARGV[0] + '/users.xml')
parser.callbacks = UserCallbacks.new(my)
parser.parse

puts "Loading posts"

parser = XML::SaxParser.file(ARGV[0] + '/posts.xml')
parser.callbacks = PostCallbacks.new(my)
parser.parse

Change log

r3 by casron on Oct 22, 2009   Diff
* Added tags table
* Split tags out into their own table in
the loader script
* Added command line args to loader script
Go to: 
Project members, sign in to write a code review

Older revisions

r2 by casron on Sep 27, 2009   Diff
initial import
All revisions of this file

File info

Size: 4939 bytes, 156 lines
Powered by Google Project Hosting