Chapter 13 finished

hougr6 · Jul 5, 2017 · d71b927 · d71b927
1 parent 59fa594
commit d71b927
Show file tree

Hide file tree

Showing 35 changed files with 45,623 additions and 0 deletions.
diff --git a/13-pdf-word/README.md b/13-pdf-word/README.md
@@ -0,0 +1,26 @@
+# PDF文档
+ `pip install PyPDF2`
+ <pre>
+ import PyPDF2
+ </pre>
+
+# Word文档
+## 安装 
+`pip install python-docx`
+
+## Word文档简介 
+* Document：表示整个文档
+* Paragraph：表示段落
+* Run：表示相同样式文本
+
+## 使用
+`import docx
+doc = docx.Document(filename)`
+
+## Word样式
+* 段落样式：可以应用于Paragraph
+* 字符样式：可以应用于Run对象
+* 链接样式：可以应用于Paragraph和Run对象
+
+
+
diff --git a/13-pdf-word/addBreakToDocx.py b/13-pdf-word/addBreakToDocx.py
@@ -0,0 +1,8 @@
+import docx
+
+doc = docx.Document()
+doc.add_paragraph('This is on the first page!')
+doc.paragraphs[0].runs[0].add_break(docx.text.run.WD_BREAK.PAGE)
+doc.add_paragraph('This is on the second page!')
+
+doc.save('twoPage.docx')
diff --git a/13-pdf-word/addContentToDocx.py b/13-pdf-word/addContentToDocx.py
@@ -0,0 +1,10 @@
+import docx
+
+doc = docx.Document()
+doc.add_paragraph('Hello world!')
+
+paraObj1 = doc.add_paragraph('This is a second paragraph.')
+paraObj2 = doc.add_paragraph('This is a yet another paragraph.')
+paraObj1.add_run(' This text is being added to the second paragraph.')
+
+doc.save('multipleParagraphs.docx')
diff --git a/13-pdf-word/addHeaderToDocx.py b/13-pdf-word/addHeaderToDocx.py
@@ -0,0 +1,10 @@
+import docx
+
+doc = docx.Document()
+doc.add_heading('Head 0', 0)
+doc.add_heading('Head 1', 1)
+doc.add_heading('Head 2', 2)
+doc.add_heading('Head 3', 3)
+doc.add_heading('Head 4', 4)
+
+doc.save('headings.docx')
diff --git a/13-pdf-word/batchEncryptPdf.py b/13-pdf-word/batchEncryptPdf.py
@@ -0,0 +1,35 @@
+import PyPDF2
+import os
+
+path = ''
+password = ''
+
+# get all pdf file in pointed directory
+for dirpath, dirnames, filenames in os.walk(path):
+    # open each pdf file and get first page of it
+    for filename in filenames:
+        if not filename.endswith('.pdf'):
+            continue
+        filepath = os.path.join(dirpath, filename)
+        pdfFile = PyPDF2.open(filepath, 'rb')
+        pdfReader = PyPDF2.PdfFileReader(pdfFile)
+        # if catch Exception then decrypt the file with given pass
+        try:
+            pdfReader.getPage(0)
+        except err:
+            if pdfReader.decrypt(password):
+                pdfWriter = PyPDF2.PdfFileWriter()
+                for page in pdfReader.numPages:
+                    pdfWriter.addPage(pdfReader.getPage(0))
+                decryptPdfFile = open(destDirectory+filename+'_encrypted.pdf', 'wb')
+                pdfWriter(decryptPdfFile)
+                decryptPdfFile.close()
+            else:          
+                # if pass is error, the print message and continue
+                print(msg)
+                continue 
+        pdfFile.close()
+
+
+
+
diff --git a/13-pdf-word/combinePdfs.py b/13-pdf-word/combinePdfs.py
@@ -0,0 +1,30 @@
+#! python3
+# combinePdfs.py - Combines all the PDFs in the current working directory into 
+# a single PDF.
+
+import os
+import PyPDF2
+
+# Get all the PDF filenames.
+pdfFiles = []
+for filename in os.listdir('.'):
+    if filename.endswith('.pdf'):
+        pdfFiles.append(filename)
+pdfFiles.sort()
+
+pdfWriter = PyPDF2.PdfFileWriter()
+
+# Loop through all the PDF files.
+for filename in pdfFiles:
+    pdfFileObj = open(filename, 'rb')
+    pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
+
+    # Loop through all the pages (except the first) and add them.
+    for pageNum in range(1, pdfReader.numPages):
+        pageObj = pdfReader.getPage(pageNum)
+        pdfWriter.addPage(pageObj)
+
+# Save the resulting PDF to a file.
+pdfOutput = open('allminutes.pdf', 'wb')
+pdfWriter.write(pdfOutput)
+pdfOutput.close()
diff --git a/13-pdf-word/combineminutes.pdf b/13-pdf-word/combineminutes.pdf
diff --git a/13-pdf-word/createDocx.py b/13-pdf-word/createDocx.py
@@ -0,0 +1,5 @@
+import docx
+
+doc = docx.Document()
+doc.add_paragraph('Hello world!')
+doc.save('helloworld.docx')
diff --git a/13-pdf-word/createInvitation.py b/13-pdf-word/createInvitation.py
@@ -0,0 +1,23 @@
+import docx
+
+# create a docment
+doc = docx.Document('guestTemplete.docx')
+paragraphNum = len(doc.paragraphs)
+guestDoc = docx.Document()
+
+# open guests.txt file 
+with open('guests.txt') as f:
+    for guest in f.readlines():
+        paraSeq = 0  
+        # add each guest to the document    
+        for p in doc.paragraphs:
+            paraSeq += 1  
+            if paraSeq == 2:
+                p.add_run(guest)      
+            if paraSeq == paragraphNum:
+                p.runs[len(p.runs)-1].add_break(docx.text.run.WD_BREAK.PAGE)
+            # set text and style
+            guestDoc.add_paragraph(p.text, p.style)
+
+# save file
+guestDoc.save('guestsInvitaion.docx')
diff --git a/13-pdf-word/decryptPdfWithDict.py b/13-pdf-word/decryptPdfWithDict.py
@@ -0,0 +1,17 @@
+import PyPDF2
+
+# load PDF
+pdfFile = open('encryptTest.pdf', 'rb')
+pdfReader = PyPDF2.PdfFileReader(pdfFile)
+
+# loop dict list and try to decrypt the pdf file
+with open('dictionary.txt') as f:
+    for word in f:
+        word = word.strip().lower()
+        # if success, print the password
+        if pdfReader.decrypt(word):
+            print(word)
+
+
+
+
diff --git a/13-pdf-word/demo.docx b/13-pdf-word/demo.docx