My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#! /usr/bin/env python
# -*- coding: utf-8 -*-
################################################################################
# parsegab.py, parse Google contacts dump
# Copyright (C) 2009 Konstantin Ignatiev, ignatiev@users.sourceforge.net
#
# This program is distributed under the terms of GNU General Public License,
# version 2, as published by the Free Software Foundation.
# See <URL:http://www.fsf.org/licenses/gpl.txt>
#
# [Based on template py_utility.py, version 2.0]
#
# ########## Informal subversion log ###########################################
# 142 2009-12-21 Initial version
################################################################################

# ./parsegab.py google.csv -O googlefixed.csv

import os, re, sys, getopt
import csv, StringIO
# from readcsvfile import readcsvfile

VERSION = "0.1"
class Opts : pass
go = Opts ()

def main () :
# Enable this if you want to print full option list when program
# invoked without arguments
if len(sys.argv) <= 1 :
print_help_and_exit()

try:
# http://docs.python.org/lib/module-getopt.html
opts, args = getopt.gnu_getopt(sys.argv[1:], "hO:",
["help","output=","version"])
except getopt.GetoptError, (msg):
print_help_and_exit(msg)

out_file = False
for opt, val in opts :
if opt in ["-h","--help"] :
print_help_and_exit ()
elif opt in ["-O","--output"] :
out_file = val
elif opt == "--version" :
print "%s, version %s" % (os.path.basename(sys.argv[0]), VERSION)
sys.exit(0)
else :
print "CLI Option '%s' was not added to the code" % opt
sys.exit(1)

if out_file == False :
print_help_and_exit ("Output file NOT given")
else :
print "Output : %s" % out_file

if len(args) < 1 :
print_help_and_exit ( "Input files NOT given")
elif len(args) == 1 :
print "Input : %s" % args[0]
else :
print_help_and_exit( "%d input files: %s" % (len(args), ", ".join(args)))

def readiterator(file) :
fh = open ( file, "rb" )
for line in fh :
if line == '\x00' : continue
if line[:2] == '\xff\xfe' :
line = line[2:] + "\x00"
else :
line = line[1:] + "\x00"
res = unicode ( line, "utf_16_le" )
yield res.encode ( "utf-8" )
fh.close ()

class MyFileWriter:
def __init__ ( self, file ) :
self.fh = open ( file, "wb" )
self.lineno = 0

def write (self, line) :
if self.lineno == 0 :
self.fh.write ( '\xff\xfe' )
self.lineno += 1
self.fh.write ( line.encode ( "utf_16_le" ) )

def close (self) :
self.fh.close()

class MyCSVWriter:
def __init__ (self,file_writer) :
self.stream = file_writer
self.queue = StringIO.StringIO ()
self.writer = csv.writer(self.queue)

def writerow (self,row) :
self.writer.writerow([s.encode("utf-8") for s in row])
self.stream.write(unicode(self.queue.getvalue(),"utf-8"))
self.queue.truncate(0)

def close(self) :
self.stream.close ()

reader = csv.reader(readiterator(args[0]))
csvwriter = MyCSVWriter(MyFileWriter(out_file))

headers = reader.next ()

if False :
hout_fname = "headers.txt"
hout = open ( hout_fname, "w" )
for ii in range(len(headers)) :
hout.write ( "%3d %s\n" % (ii + 1, headers[ii]) )
hout.close ()
print "Dumped file %s" % hout_fname

headers.extend ( ["Phone 2 - Type", "Phone 2 - Value",
"Phone 3 - Type", "Phone 3 - Value"] )

csvwriter.writerow ( headers )

i_NOTES = headers.index ("Notes")

def add_phone_type ( line, _type, phone ) :
type = _type[:1].upper() + _type[1:].lower()
assert len(line) == len(headers)
for ii in range(1,4) :
i_typ = headers.index ("Phone %d - Type" % ii)
i_val = headers.index ("Phone %d - Value" % ii)
if line[i_typ] == "" :
line[i_typ] = type
line[i_val] = phone
return
raise "Failed"

def debug_line(line) :
for ii in range(len(line)) :
print "%3d %r" % (ii + 1, line[ii])

re_repos = re.compile ( "\s*(home|work|mobile):\s*(\d+)\s*", re.I )
for rline in reader :
line = [unicode(x,"utf-8") for x in rline]

if line[0] == u"foobar" : # u"Троицкая Лада" : # "KARPOVSKY CELL" :
debug_line(line)

line.extend(["","","",""])

def reposition (m) :
print "%r: Adding %s phone %s" % (line[0],m.group(1),m.group(2))
add_phone_type (line,m.group(1),m.group(2))
return ""

line[i_NOTES] = re_repos.sub ( reposition, line[i_NOTES].replace("\n", " ") )
assert line[i_NOTES] == ""

csvwriter.writerow ( line )

csvwriter.close ()
print "Finished creating file %s" % out_file


def print_help_and_exit(msg=False) :
exe = os.path.basename(sys.argv[0])

if msg:
print "ERROR: %s\n" % msg
print "Usage: %s [options] <input files>" % exe
print "... or use %s -h for more detailed help" % exe
else :
print '''\

%s : manually fixing information in Google Address Book, version '%s'

Usage:

%s -O <output CSV file> <input CSV file>

Options are:

-O, --output=<file> Output file

--version print CVS version info
-h, --help Print this help\
''' % (exe,VERSION,exe)

if msg :
sys.exit(2)
else :
sys.exit(0)

main ()

Change log

r142 by kostya on Dec 21, 2009   Diff
Added parsegab.py
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 5853 bytes, 193 lines

File properties

svn:executable
*
Powered by Google Project Hosting