My favorites
▼
|
Sign in
inet-lab
Miscellaneous snippets of code in Python, Ruby, C#, etc.
Project Home
Downloads
Wiki
Issues
Source
Checkout
Browse
Changes
Source path:
svn
/
trunk
/
utils
/
Temp
/
parsegab.py
r208
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#! /usr/bin/env python
# -*- coding: utf-8 -*-
################################################################################
# parsegab.py, parse Google contacts dump
# Copyright (C) 2009 Konstantin Ignatiev, ignatiev@users.sourceforge.net
#
# This program is distributed under the terms of GNU General Public License,
# version 2, as published by the Free Software Foundation.
# See <URL:http://www.fsf.org/licenses/gpl.txt>
#
# [Based on template py_utility.py, version 2.0]
#
# ########## Informal subversion log ###########################################
# 142 2009-12-21 Initial version
################################################################################
# ./parsegab.py google.csv -O googlefixed.csv
import os, re, sys, getopt
import csv, StringIO
# from readcsvfile import readcsvfile
VERSION = "0.1"
class Opts : pass
go = Opts ()
def main () :
# Enable this if you want to print full option list when program
# invoked without arguments
if len(sys.argv) <= 1 :
print_help_and_exit()
try:
# http://docs.python.org/lib/module-getopt.html
opts, args = getopt.gnu_getopt(sys.argv[1:], "hO:",
["help","output=","version"])
except getopt.GetoptError, (msg):
print_help_and_exit(msg)
out_file = False
for opt, val in opts :
if opt in ["-h","--help"] :
print_help_and_exit ()
elif opt in ["-O","--output"] :
out_file = val
elif opt == "--version" :
print "%s, version %s" % (os.path.basename(sys.argv[0]), VERSION)
sys.exit(0)
else :
print "CLI Option '%s' was not added to the code" % opt
sys.exit(1)
if out_file == False :
print_help_and_exit ("Output file NOT given")
else :
print "Output : %s" % out_file
if len(args) < 1 :
print_help_and_exit ( "Input files NOT given")
elif len(args) == 1 :
print "Input : %s" % args[0]
else :
print_help_and_exit( "%d input files: %s" % (len(args), ", ".join(args)))
def readiterator(file) :
fh = open ( file, "rb" )
for line in fh :
if line == '\x00' : continue
if line[:2] == '\xff\xfe' :
line = line[2:] + "\x00"
else :
line = line[1:] + "\x00"
res = unicode ( line, "utf_16_le" )
yield res.encode ( "utf-8" )
fh.close ()
class MyFileWriter:
def __init__ ( self, file ) :
self.fh = open ( file, "wb" )
self.lineno = 0
def write (self, line) :
if self.lineno == 0 :
self.fh.write ( '\xff\xfe' )
self.lineno += 1
self.fh.write ( line.encode ( "utf_16_le" ) )
def close (self) :
self.fh.close()
class MyCSVWriter:
def __init__ (self,file_writer) :
self.stream = file_writer
self.queue = StringIO.StringIO ()
self.writer = csv.writer(self.queue)
def writerow (self,row) :
self.writer.writerow([s.encode("utf-8") for s in row])
self.stream.write(unicode(self.queue.getvalue(),"utf-8"))
self.queue.truncate(0)
def close(self) :
self.stream.close ()
reader = csv.reader(readiterator(args[0]))
csvwriter = MyCSVWriter(MyFileWriter(out_file))
headers = reader.next ()
if False :
hout_fname = "headers.txt"
hout = open ( hout_fname, "w" )
for ii in range(len(headers)) :
hout.write ( "%3d %s\n" % (ii + 1, headers[ii]) )
hout.close ()
print "Dumped file %s" % hout_fname
headers.extend ( ["Phone 2 - Type", "Phone 2 - Value",
"Phone 3 - Type", "Phone 3 - Value"] )
csvwriter.writerow ( headers )
i_NOTES = headers.index ("Notes")
def add_phone_type ( line, _type, phone ) :
type = _type[:1].upper() + _type[1:].lower()
assert len(line) == len(headers)
for ii in range(1,4) :
i_typ = headers.index ("Phone %d - Type" % ii)
i_val = headers.index ("Phone %d - Value" % ii)
if line[i_typ] == "" :
line[i_typ] = type
line[i_val] = phone
return
raise "Failed"
def debug_line(line) :
for ii in range(len(line)) :
print "%3d %r" % (ii + 1, line[ii])
re_repos = re.compile ( "\s*(home|work|mobile):\s*(\d+)\s*", re.I )
for rline in reader :
line = [unicode(x,"utf-8") for x in rline]
if line[0] == u"foobar" : # u"Троицкая Лада" : # "KARPOVSKY CELL" :
debug_line(line)
line.extend(["","","",""])
def reposition (m) :
print "%r: Adding %s phone %s" % (line[0],m.group(1),m.group(2))
add_phone_type (line,m.group(1),m.group(2))
return ""
line[i_NOTES] = re_repos.sub ( reposition, line[i_NOTES].replace("\n", " ") )
assert line[i_NOTES] == ""
csvwriter.writerow ( line )
csvwriter.close ()
print "Finished creating file %s" % out_file
def print_help_and_exit(msg=False) :
exe = os.path.basename(sys.argv[0])
if msg:
print "ERROR: %s\n" % msg
print "Usage: %s [options] <input files>" % exe
print "... or use %s -h for more detailed help" % exe
else :
print '''\
%s : manually fixing information in Google Address Book, version '%s'
Usage:
%s -O <output CSV file> <input CSV file>
Options are:
-O, --output=<file> Output file
--version print CVS version info
-h, --help Print this help\
''' % (exe,VERSION,exe)
if msg :
sys.exit(2)
else :
sys.exit(0)
main ()
Show details
Hide details
Change log
r142
by kostya on Dec 21, 2009
Diff
Added parsegab.py
Go to:
/trunk/utils/Temp/parsegab.py
Project members,
sign in
to write a code review
Older revisions
All revisions of this file
File info
Size: 5853 bytes, 193 lines
View raw file
File properties
svn:executable
*
Powered by
Google Project Hosting