forked from python-openxml/python-docx
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpackage.py
113 lines (90 loc) · 3.78 KB
/
package.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# encoding: utf-8
"""WordprocessingML Package class and related objects"""
from __future__ import absolute_import, division, print_function, unicode_literals
from docx.image.image import Image
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.package import OpcPackage
from docx.opc.packuri import PackURI
from docx.parts.image import ImagePart
from docx.shared import lazyproperty
class Package(OpcPackage):
"""Customizations specific to a WordprocessingML package"""
def after_unmarshal(self):
"""Called by loading code after all parts and relationships have been loaded.
This method affords the opportunity for any required post-processing.
"""
self._gather_image_parts()
def get_or_add_image_part(self, image_descriptor):
"""Return |ImagePart| containing image specified by *image_descriptor*.
The image-part is newly created if a matching one is not already present in the
collection.
"""
return self.image_parts.get_or_add_image_part(image_descriptor)
@lazyproperty
def image_parts(self):
"""|ImageParts| collection object for this package."""
return ImageParts()
def _gather_image_parts(self):
"""Load the image part collection with all the image parts in package."""
for rel in self.iter_rels():
if rel.is_external:
continue
if rel.reltype != RT.IMAGE:
continue
if rel.target_part in self.image_parts:
continue
self.image_parts.append(rel.target_part)
class ImageParts(object):
"""Collection of |ImagePart| objects corresponding to images in the package"""
def __init__(self):
self._image_parts = []
def __contains__(self, item):
return self._image_parts.__contains__(item)
def __iter__(self):
return self._image_parts.__iter__()
def __len__(self):
return self._image_parts.__len__()
def append(self, item):
self._image_parts.append(item)
def get_or_add_image_part(self, image_descriptor):
"""Return |ImagePart| object containing image identified by *image_descriptor*.
The image-part is newly created if a matching one is not present in the
collection.
"""
image = Image.from_file(image_descriptor)
matching_image_part = self._get_by_sha1(image.sha1)
if matching_image_part is not None:
return matching_image_part
return self._add_image_part(image)
def _add_image_part(self, image):
"""
Return an |ImagePart| instance newly created from image and appended
to the collection.
"""
partname = self._next_image_partname(image.ext)
image_part = ImagePart.from_image(image, partname)
self.append(image_part)
return image_part
def _get_by_sha1(self, sha1):
"""
Return the image part in this collection having a SHA1 hash matching
*sha1*, or |None| if not found.
"""
for image_part in self._image_parts:
if image_part.sha1 == sha1:
return image_part
return None
def _next_image_partname(self, ext):
"""
The next available image partname, starting from
``/word/media/image1.{ext}`` where unused numbers are reused. The
partname is unique by number, without regard to the extension. *ext*
does not include the leading period.
"""
def image_partname(n):
return PackURI('/word/media/image%d.%s' % (n, ext))
used_numbers = [image_part.partname.idx for image_part in self]
for n in range(1, len(self)+1):
if n not in used_numbers:
return image_partname(n)
return image_partname(len(self)+1)