My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#
#SMS test via Google Voice
#
#John Nagle
# nagle@animats.com
#
import googlevoice
import sys
import BeautifulSoup


def extractsms(htmlsms) :
"""
extractsms -- extract SMS messages from BeautifulSoup tree of Google Voice SMS HTML.

Output is a list of dictionaries, one per message.
"""
msgitems = [] # accum message items here
# Extract all conversations by searching for a DIV with an ID at top level.
tree = BeautifulSoup.BeautifulSoup(htmlsms) # parse HTML into tree
conversations = tree.findAll("div",attrs={"id" : True},recursive=False)
for conversation in conversations :
# For each conversation, extract each row, which is one SMS message.
rows = conversation.findAll(attrs={"class" : "gc-message-sms-row"})
for row in rows : # for all rows
# For each row, which is one message, extract all the fields.
msgitem = {"id" : conversation["id"]} # tag this message with conversation ID
spans = row.findAll("span",attrs={"class" : True}, recursive=False)
for span in spans : # for all spans in row
cl = span["class"].replace('gc-message-sms-', '')
msgitem[cl] = (" ".join(span.findAll(text=True))).strip() # put text in dict
msgitems.append(msgitem) # add msg dictionary to list
return msgitems

voice = googlevoice.Voice()
voice.login()

for msg in extractsms(voice.sms_html()):
print str(msg)

Change log

fd81b359574a by jqu...@desktop on Nov 11, 2009   Diff
moved parse_sms into examples
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 1535 bytes, 39 lines
Powered by Google Project Hosting