My favorites | Sign in
Project Home Downloads Wiki Issues Source
READ-ONLY: This project has been archived. For more information see this post.
Search
for
  Advanced search   Search tips   Subscriptions

Issue 71 attachment: pdfsizeopt-12oct12.pat (4.2 KB)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
--- pdfsizeopt.py- 2012-10-12 15:44:40.879227789 -0400
+++ pdfsizeopt.py 2012-10-12 15:58:46.387183581 -0400
@@ -3629,22 +3629,22 @@
f0 = cls.MSBFirstToInteger(xref_data[i : i + w0])
else:
f0 = 1
f1 = cls.MSBFirstToInteger(xref_data[i + w0 : i + w01])
if w2:
f2 = cls.MSBFirstToInteger(xref_data[i + w01 : i + w012])
else:
f2 = 0
if not f0: # A free object, ignore it.
continue
- if obj_num in obj_starts:
- raise PdfXrefStreamError('duplicate obj %d' % obj_num)
+ #if obj_num in obj_starts:
+ # raise PdfXrefStreamError('duplicate obj %d' % obj_num)
if f0 == 1: # f1 is the object offset in the file.
if f2:
if not do_ignore_generation_numbers:
raise NotImplementedError(
'generational objects (in %s %s) not supported at %d' %
(obj_num, f2, xref_ofs))
has_generational_objs = True

if f1 < 9:
# Accept (and ignore) a 0 offset, Multivalent generates such files:
@@ -3678,23 +3678,23 @@
#
# /ID [<03A5...><C90F...>]
# /Info 36 0 R
# /Root 38 0 R
if (old_value != new_value and
trailer_obj.CompressValue(old_value) !=
trailer_obj.CompressValue(new_value)):
duplicate_names.add(name)
else:
trailer_obj.Set(name, new_value)
- if duplicate_names:
- raise PdfXrefStreamError(
- 'duplicate names in xref streams: %r' % sorted(duplicate_names))
+ #if duplicate_names:
+ # raise PdfXrefStreamError(
+ # 'duplicate names in xref streams: %r' % sorted(duplicate_names))

prev = xref_obj.Get('Prev')
if prev is None:
break
trailer_obj.Set('Prev', None)
# TODO(pts): For testing: issue58.pdf.
if not isinstance(prev, int) or prev < 9:
raise PdfXrefStreamError('invalid /Prev at %d: %r' % (xref_ofs, prev))
match = PdfObj.PDF_OBJ_DEF_CAPTURE_RE.scanner(data, prev).match()
if not match:
@@ -3718,21 +3718,22 @@
else:
if not isinstance(obj_start, int):
print >>sys.stderr, (
'warning: in-object-stream xref stream obj %d' % xref_obj_num)
del obj_starts[xref_obj_num]

# Load the object streams.
for obj_num in sorted(obj_streams):
obj_start = obj_starts.get(obj_num)
if obj_start is None:
- raise PdfXrefStreamError('missing obj stream %d' % obj_num)
+ continue
+ #raise PdfXrefStreamError('missing obj stream %d' % obj_num)
if not isinstance(obj_start, int):
raise PdfXrefStreamError('in-object-stream obj stream %d' % obj_num)
try:
objstm_obj = PdfObj(data, start=obj_start, file_ofs=obj_start)
except PdfTokenParseError, e:
raise PdfXrefStreamError('parse objstm obj %d: %s' % (obj_num, e))
compressed_obj_nums, compressed_obj_headbufs = objstm_obj.ParseObjStm(
obj_num)
for i in xrange(len(compressed_obj_nums)):
compressed_obj_num = compressed_obj_nums[i]
@@ -3760,21 +3761,22 @@
'too few compressed objs (%d) in objstm obj %d, '
'needed index %d for obj %d' %
(len(compressed_obj_headbufs), objstm_obj_num, i, obj_num))
if isinstance(compressed_obj_headbufs[i], PdfObj):
obj_starts[obj_num] = compressed_obj_headbufs[i]
else:
obj_starts[obj_num] = compressed_obj_headbufs[i] = PdfObj(
'%d 0 obj\n%s\nendobj\n' % (obj_num, compressed_obj_headbufs[i]))

for obj_num in sorted(obj_streams):
- del obj_starts[obj_num]
+ if obj_starts.get(obj_num) is not None:
+ del obj_starts[obj_num]
obj_starts['trailer'] = trailer_obj
return obj_starts, has_generational_objs

@classmethod
def ParseUsingXref(cls, data, do_ignore_generation_numbers):
"""Determine obj offsets in a PDF file using the cross-reference table.

If this method detects a cross-reference stream, it calls
cls.ParseUsingXrefStream instead.

Powered by Google Project Hosting