| Issue 82: | Another UnicodeDecodeError in icqt.py | |
| Back to list |
Posted by guest at 2007-07-16 10:29:07 Another patch is needed according to my experience on line 179. So the complete patch summarizing also bugs 319 - 321 for me is: --- contact.py.orig Mon Jul 16 11:58:12 2007 +++ contact.py Mon Jul 16 11:53:57 2007 @@ -142,7 +142,9 @@ self.contactList.legacyList.deauthContact(self.jid) def updateNickname(self, nickname, push=True): - if self.nickname != nickname: + # hopet - http://www.blathersource.org/bugs_view.php?projid=pyicq-t&bugid=320 + # if self.nickname != nickname: + if unicode(self.nickname, errors='replace') != "nickname": self.nickname = nickname # will re-remove this if it's removed from JEP-0172. #self.sendNickname() --- legacy/buddies.py.orig Mon Jul 16 11:57:25 2007 +++ legacy/buddies.py Mon Jul 16 11:55:09 2007 @@ -229,7 +229,9 @@ self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", contact) self.session.pytrans.xdb.setListEntry("roster", self.session.jabberID, contact.lower(), payload=self.xdbcontacts[contact.lower()]) else: - if nick and self.xdbcontacts[contact.lower()].get('nickname','') != nick: + # hopet - http://www.blathersource.org/bugs_view.php?projid=pyicq-t&bugid=319 + # if nick and self.xdbcontacts[contact.lower()].get('nickname','') != nick: + if nick and unicode(self.xdbcontacts[contact.lower()].get('nickname',''), errors='replace') != nick: self.xdbcontacts[contact.lower()]['nickname'] = nick c.updateNickname(nick, push=True) self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", nick) --- legacy/icqt.py.orig Mon Jul 16 11:57:35 2007 +++ legacy/icqt.py Mon Jul 16 11:52:45 2007 @@ -176,6 +176,8 @@ if user.caps: self.oscarcon.legacyList.setCapabilities(user.name, user.caps) + # hopet + status = unicode(status, errors='replace') status = status.encode("utf-8", "replace") if user.flags.count("away"): self.getAway(user.name).addCallback(self.sendAwayPresence, user) @@ -327,7 +327,10 @@ status = msg[0] + ": " + status status = status.decode(charset, 'replace') - LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, msg[0], status)) + # hopet - http://www.blathersource.org/bugs_view.php?projid=pyicq-t&bugid=321 + # LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, msg[0], status)) + utfmsg = unicode(msg[0], errors='replace') + LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, utfmsg, status)) if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" Posted by guest at 2007-07-16 10:31:07 Those comment linkes can of course be removed - that was just my note to be able to find it later on. Posted by jadestorm at 2007-07-16 12:39:22 Hi "guest", do you have any interest in becoming an active developer of PyICQt? You have been submitting some great patches here. I haven't had time to look at them (what else is new...) but you appear to be doing some great work. I'd have pinged you directly but you are posting as guest and I don't know how to contact you. If you are interested, drop me a note at jadestorm@nc.rr.com. Posted by guest at 2007-07-16 13:48:23 I don't feel like I'm the right one to be an active developer - I'm not pythonist and I don't have time to work on this, except for minimum changes which are necessary to make it work. Further I'm not even sure whether the changes are correct and they definitely need to be reviewed. Petr (hopet@ics.muni.cz) Posted by jadestorm at 2007-07-16 14:18:09 Fair enough. If it makes you feel any better, I actually didn't know python before I started on PyAIMt and PyICQt. =) Anyway. Take care! Posted by guest at 2007-07-16 16:32:13 You can find a better version of the patch below. However, it's still not perfect as it screws up the encoding of non-ASCII characters. While trying to sort that out, I've got to some ICQ blacklist and having 1 attempt per 10 mins is far from encouraging for the development ;-). I will try to update it later on if I don't get not too disgusted... diff -ruN src.orig/contact.py src/contact.py --- src.orig/contact.py Mon Jul 16 11:58:12 2007 +++ src/contact.py Mon Jul 16 17:39:52 2007 @@ -142,7 +142,7 @@ self.contactList.legacyList.deauthContact(self.jid) def updateNickname(self, nickname, push=True): - if self.nickname != nickname: + if unicode(self.nickname, errors='replace') != "nickname": self.nickname = nickname # will re-remove this if it's removed from JEP-0172. #self.sendNickname() diff -ruN src.orig/legacy/buddies.py src/legacy/buddies.py --- src.orig/legacy/buddies.py Mon Jul 16 11:57:25 2007 +++ src/legacy/buddies.py Mon Jul 16 17:39:48 2007 @@ -229,7 +229,7 @@ self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", contact) self.session.pytrans.xdb.setListEntry("roster", self.session.jabberID, contact.lower(), payload=self.xdbcontacts[contact.lower()]) else: - if nick and self.xdbcontacts[contact.lower()].get('nickname','') != nick: + if nick and unicode(self.xdbcontacts[contact.lower()].get('nickname',''), errors='replace') != nick: self.xdbcontacts[contact.lower()]['nickname'] = nick c.updateNickname(nick, push=True) self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", nick) diff -ruN src.orig/legacy/icqt.py src/legacy/icqt.py --- src.orig/legacy/icqt.py Mon Jul 16 11:57:35 2007 +++ src/legacy/icqt.py Mon Jul 16 18:17:33 2007 @@ -151,6 +151,10 @@ status = status.decode("utf-16be", "replace") elif encoding == "iso-8859-1": status = status.decode("iso-8859-1", "replace") + else: + # this is what we typically get for ICQ clients in the Czech Republic + status = status.decode('utf-8', 'replace') + print status.encode('iso-8859-2', 'replace') if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" if user.idleTime: @@ -327,7 +331,8 @@ status = msg[0] + ": " + status status = status.decode(charset, 'replace') - LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, msg[0], status)) + utfmsg = unicode(msg[0], errors='replace') + LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, utfmsg, status)) if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" Posted by guest at 2007-07-16 17:09:58 In the previous patch, I've forgotten to remove a debug print, so the corrected version is below. Also the comment has been changed to something more appropriate (see the following discussion). I've started to investigate what actually happens and the crux is probably in the change of Oscar protocol. Thus the src/tlib/oscar.py:357 is unable to extract statusencoding and returns None, resulting in wrong conversion later on. So the solution implemented in this patch is just temporary workaround and has to be fixed later on, therefore marked with XXX. diff -ruN src.orig/contact.py src/contact.py --- src.orig/contact.py Mon Jul 16 11:58:12 2007 +++ src/contact.py Mon Jul 16 17:39:52 2007 @@ -142,7 +142,7 @@ self.contactList.legacyList.deauthContact(self.jid) def updateNickname(self, nickname, push=True): - if self.nickname != nickname: + if unicode(self.nickname, errors='replace') != "nickname": self.nickname = nickname # will re-remove this if it's removed from JEP-0172. #self.sendNickname() diff -ruN src.orig/legacy/buddies.py src/legacy/buddies.py --- src.orig/legacy/buddies.py Mon Jul 16 11:57:25 2007 +++ src/legacy/buddies.py Mon Jul 16 17:39:48 2007 @@ -229,7 +229,7 @@ self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", contact) self.session.pytrans.xdb.setListEntry("roster", self.session.jabberID, contact.lower(), payload=self.xdbcontacts[contact.lower()]) else: - if nick and self.xdbcontacts[contact.lower()].get('nickname','') != nick: + if nick and unicode(self.xdbcontacts[contact.lower()].get('nickname',''), errors='replace') != nick: self.xdbcontacts[contact.lower()]['nickname'] = nick c.updateNickname(nick, push=True) self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", nick) diff -ruN src.orig/legacy/icqt.py src/legacy/icqt.py --- src.orig/legacy/icqt.py Mon Jul 16 11:57:35 2007 +++ src/legacy/icqt.py Mon Jul 16 19:03:30 2007 @@ -151,6 +151,9 @@ status = status.decode("utf-16be", "replace") elif encoding == "iso-8859-1": status = status.decode("iso-8859-1", "replace") + else: + # XXX: this is a fallback solution in case that the client status encoding has not been extracted, to avoid raising an exception + status = status.decode('utf-8', 'replace') if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" if user.idleTime: @@ -327,7 +330,8 @@ status = msg[0] + ": " + status status = status.decode(charset, 'replace') - LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, msg[0], status)) + utfmsg = unicode(msg[0], errors='replace') + LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, utfmsg, status)) if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" Posted by guest at 2007-07-16 18:03:47 So, digging slightly deeper, the problem is probably here: [2007-07-16 19:22:46] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9f' -- [2007-07-16 19:22:46] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\x9ft' -- [2007-07-16 19:22:46] unknown extended status type: 13 data: '\x00\r\x04\x04F\x97H\xaa' -- [2007-07-16 19:22:47] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9bf\xd7' -- [2007-07-16 19:22:47] unknown extended status type: 8 data: '\x00\x08\x01\x10\xda\x05\xb5\xc1\x81\xc7<=\xb0\x11\xf0q\xaa$\x94\ xaf' -- [2007-07-16 19:22:47] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa6\n' -- [2007-07-16 19:22:47] unknown extended status type: 13 data: '\x00\r\x04\x04F\x97<\xfd' -- [2007-07-16 19:22:47] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\x89\x85' -- [2007-07-16 19:22:48] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9f' -- [2007-07-16 19:22:48] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9f' -- [2007-07-16 19:25:11] unknown extended status type: 13 data: '\x00\r\x04\x04F\x97<\xfd' -- [2007-07-16 19:25:11] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9\xf7' -- [2007-07-16 19:25:12] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9\xf8' -- [2007-07-16 19:25:12] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9\xf8' -- [2007-07-16 19:25:38] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9\xf8' -- [2007-07-16 19:26:34] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa8\xc3' -- [2007-07-16 19:26:35] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa7\x1d' -- [2007-07-16 19:26:35] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\xa9f' -- [2007-07-16 19:26:35] unknown extended status type: 8 data: '\x00\x08\x01\x10\xfaN\n\x15\xcc\xe6 8\xe0xs\xd8@\x84\xd3\xdb' -- [2007-07-16 19:26:35] unknown extended status type: 8 data: '\x00\x08\x01\x10h?\x80\x03\xb9b(\xb4\xa0\xd4~\xc3\xe1u\xd4\xdc' [2007-07-16 19:26:35] unknown extended status type: 13 data: '\x00\r\x04\x04F\x9b\x99/' The most frequent and annoying is type 13, which should be added to src/tlib/oscar.py:369 and appropriately parsed. Posted by guest at 2007-07-18 15:38:51 Hi, i'm getting now "decoding Unicode is not supported" exceptions when using your fixes. Transport is working better with your patches (no more utf8 decoding errors) but for some users, transport doesn't work at all anymore. Any suggestions? Posted by guest at 2007-07-19 08:19:33 I'm still trying to sort that out, though I don't have enough time to do that full on. I've already figured out that some clients are sending the extended messages of the same Oscar type (0x0d or 0x08) as ascii (Miranda) while others as UTF-16. The encoding is probably stored somewhere in the Oscar protocol but I haven't found that yet. Posted by guest at 2007-07-19 09:08:30 Here's the patch that should improve on "decoding Unicode is not supported". Furthermore, it has a heuristics for the Czech ICQ clients (which totally sucks btw). diff -ruN src.orig/config.py src/config.py --- src.orig/config.py Mon Jun 4 03:10:36 2007 +++ src/config.py Thu Jul 19 09:56:50 2007 @@ -34,6 +34,8 @@ sessionGreeting = "" registerMessage = "" crossChat = bool(False) +#debugLevel = 3 # 0->None, 1->Traceback, 2->WARN,ERROR, 3->INFO,WARN,ERROR +#debugFile = "/var/log/ejabberd/debug-pyicq-t.log" debugLevel = 0 # 0->None, 1->Traceback, 2->WARN,ERROR, 3->INFO,WARN,ERROR debugFile = "" disableRegister = bool(False) Binary files src.orig/config.pyc and src/config.pyc differ diff -ruN src.orig/contact.py src/contact.py --- src.orig/contact.py Mon Jul 16 11:58:12 2007 +++ src/contact.py Thu Jul 19 10:44:39 2007 @@ -142,7 +142,11 @@ self.contactList.legacyList.deauthContact(self.jid) def updateNickname(self, nickname, push=True): - if self.nickname != nickname: + try: + decodednickname = unicode(self.nickname, errors='replace') + except: + decodednickname = self.nickname + if decodednickname != "nickname": self.nickname = nickname # will re-remove this if it's removed from JEP-0172. #self.sendNickname() Binary files src.orig/contact.pyc and src/contact.pyc differ diff -ruN src.orig/legacy/buddies.py src/legacy/buddies.py --- src.orig/legacy/buddies.py Mon Jul 16 11:57:25 2007 +++ src/legacy/buddies.py Thu Jul 19 11:06:11 2007 @@ -229,7 +229,12 @@ self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", contact) self.session.pytrans.xdb.setListEntry("roster", self.session.jabberID, contact.lower(), payload=self.xdbcontacts[contact.lower()]) else: - if nick and self.xdbcontacts[contact.lower()].get('nickname','') != nick: + decodednickname = self.xdbcontacts[contact.lower()].get('nickname','') + try: + decodednickname = unicode(decodednickname, errors='replace') + except: + pass + if nick and decodednickname != nick: self.xdbcontacts[contact.lower()]['nickname'] = nick c.updateNickname(nick, push=True) self.session.sendRosterImport(icq2jid(contact), "subscribe", "both", nick) Binary files src.orig/legacy/buddies.pyc and src/legacy/buddies.pyc differ diff -ruN src.orig/legacy/icqt.py src/legacy/icqt.py --- src.orig/legacy/icqt.py Mon Jul 16 11:57:35 2007 +++ src/legacy/icqt.py Thu Jul 19 10:48:29 2007 @@ -151,6 +151,26 @@ status = status.decode("utf-16be", "replace") elif encoding == "iso-8859-1": status = status.decode("iso-8859-1", "replace") + elif encoding == "icq51pseudounicode": + # XXX: ICQ 5.1 CZ clients seem to wrap UTF-8 (assuming it's CP1250) into UTF-16 + # while e.g. Miranda sends it as ascii + print 'Original status: ' + repr(status) + skipdecode = 'false' + try: + status = status.decode('utf-16be', 'strict') + except: + print "Conversion from UTF-16 failed, skipping the rest" + skipdecode = 'true' + if skipdecode == 'false': + status = status.encode('cp1250', 'replace') + status = status.decode('utf-8', 'replace') + print 'Decoded status: ' + repr(status) + printstatus = status.encode('iso-8859-2', 'replace') + print 'iso-8859-2 status: ' + printstatus + "\n\n" + else: + # this is a fallback solution in case that the client status encoding has not been extracted, to avoid raising an exception + status = status.decode('utf-8', 'replace') + LogEvent(WARN, self.session.jabberID, "Unknown status message encoding for %s" % user.name) if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" if user.idleTime: @@ -327,7 +347,8 @@ status = msg[0] + ": " + status status = status.decode(charset, 'replace') - LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, msg[0], status)) + utfmsg = unicode(msg[0], errors='replace') + LogEvent(INFO, self.session.jabberID, "Away (%s, %s) message %s" % (charset, utfmsg, status)) if status == "Away" or status=="I am currently away from the computer." or status=="I am away from my computer right now.": status = "" Binary files src.orig/legacy/icqt.pyc and src/legacy/icqt.pyc differ diff -ruN src.orig/tlib/oscar.py src/tlib/oscar.py --- src.orig/tlib/oscar.py Mon Jun 4 03:16:03 2007 +++ src/tlib/oscar.py Tue Jul 17 14:16:21 2007 @@ -366,6 +366,11 @@ else: self.url=None log.msg(" extracted itunes URL: %s"%(repr(self.url))) + elif exttype == 0x0d or exttype == 0x08: + #XXX attempt to resolve problem with new ICQ clients: this needs to be verified by reverse engineering of the protocol + self.statusencoding = "icq51pseudounicode" + log.msg(" status message encoding: %s"%(str(self.statusencoding))) + # XXX: there should be probably more information available for extraction here else: log.msg(" unknown extended status type: %d\ndata: %s"%(ord(v[1]), repr(v[:ord(v[3])+4]))) #v=v[ord(v[3])+4:] Binary files src.orig/tlib/oscar.pyc and src/tlib/oscar.pyc differ Posted by guest at 2007-07-19 21:02:02 Now we have: [2007-07-19 22:59:04] Connection Lost! <legacy.icqt.OA instance at 0x407bc96c> [2007-07-19 22:59:04] {64: '\x00\x00\x08\xfc', 65: 'http://ftp.icq.com/pub/ICQ_Win95_98_NT4/ICQ_5/icq5_1_setup.exe', 66: 'http://cf.icq.com/cf/icq5/product_versions.html', 67: '5.33.2299', 68: '\x00\x00\x08\xfc', 5: '64.12.26.102:5190', 6: "\xe2\xf2pV\x8a\xc1\x1a\x0b\x83\xba\xe1\xfd\x11\xe7\xb2uI\x1f\\r`\xf5\xa8\x c3\x1bJ\xe5d\x14\xa3\xca\xf1\x14\x85&\xf9'\xc6\x9d\x98,\xc0\x8f\xdeJ\xda9\xd c\xc6\xbeO\xce\x08(\x83D\xe0\xe2S2f\xf8\xedp\xd6\xdfh\x06\xd1I\xd8\xf8\xbd\x99\x b8Y\xb8o7H\xb0;\xaa\xf3w1J\x9e]\xc5\xf8\x95\x1a\xbe\xf1\x9e\xec\x0b\x0e\x0b\xe2\ xd3\xe4v2\xd5<\xd0\x80\x8a\xf6jL\xfd\xb33\xe5S>\x84\x91\xee\r\xe9\xa2:\x11 X\xf8\xff\x08\xbdL\xde\x1d\xc0\xa7D\xe3\xc1\xdf\\`\x91\xc2\x87\xbcRS\xe14\xdfw\x bdHC\x8f\xcdq\xe0FzQX\xd0\xe5\xfd\x1d\x17\x9f\xcb\x0c#\x07Ws\xeal\xcc\t*R{p\x1fh -K8\x0c\xa5XlH\x8c~\xaa\xb4\x81\x9b\xe3\x03&\xa1\xf1\x16\x164\xc0\xe4\x80\xb 3Y\xefU\x98\x0e:X]\xd6\x00\xff\xaf\x08\xe9\xc1\x9e\xf9;\x03*:\xfc\xd1x-W?\xb6\xb 5\xa8x\x06\xea/\x88\xa5\xfc\xc0SB\x1eG\xbc\xc9", 71: '5.33.2299', 70: 'http://cf.icq.com/cf/icq5/product_versions.html', 69: 'http://ftp.icq.com/pub/ICQ_Win95_98_NT4/ICQ_5/icq5_1_setup.exe'} something strange is going on here... Posted by guest at 2007-07-20 08:30:03 Hm - can you send a traceback where the Python crashes - e.g. by running python PyICQt.py provided it crashes at all. If it doesn't crash, you may try setting debugLevel = 3 debugFile = "/var/log/ejabberd/debug-pyicq-t.log" in src/config.py and try locating the problem in /var/log/ejabberd/debug-pyicq-t.log. Posted by guest at 2007-07-20 09:26:20 The recent reported problem may be related to bug #299 - though there's no solution there. Posted by guest at 2007-07-20 13:57:20 Hi, i'm using -t -D and -l on the commandline, makes the same like you want i guess ;) The output is here: http://www.darkman.de/~sven/pyicq-debug.log HTH and thanks! BTW: do you have a jid? ;) Posted by guest at 2007-07-24 19:26:15 Yes I have: hopet@arwen.ics.muni.cz Posted by guest at 2007-08-25 04:42:06 To all: pls dont post patch to topic, you can attach patch file to bug topic.
Jan 12, 2008
Project Member
#1
volk...@gmail.com
Status:
Fixed
|