Skip to content

Commit

Permalink
Chapter 13 finished
Browse files Browse the repository at this point in the history
  • Loading branch information
chenguohui committed Jul 5, 2017
1 parent 59fa594 commit d71b927
Show file tree
Hide file tree
Showing 35 changed files with 45,623 additions and 0 deletions.
26 changes: 26 additions & 0 deletions 13-pdf-word/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# PDF文档
`pip install PyPDF2`
<pre>
import PyPDF2
</pre>

# Word文档
## 安装
`pip install python-docx`

## Word文档简介
* Document:表示整个文档
* Paragraph:表示段落
* Run:表示相同样式文本

## 使用
`import docx
doc = docx.Document(filename)`

## Word样式
* 段落样式:可以应用于Paragraph
* 字符样式:可以应用于Run对象
* 链接样式:可以应用于Paragraph和Run对象



8 changes: 8 additions & 0 deletions 13-pdf-word/addBreakToDocx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import docx

doc = docx.Document()
doc.add_paragraph('This is on the first page!')
doc.paragraphs[0].runs[0].add_break(docx.text.run.WD_BREAK.PAGE)
doc.add_paragraph('This is on the second page!')

doc.save('twoPage.docx')
10 changes: 10 additions & 0 deletions 13-pdf-word/addContentToDocx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import docx

doc = docx.Document()
doc.add_paragraph('Hello world!')

paraObj1 = doc.add_paragraph('This is a second paragraph.')
paraObj2 = doc.add_paragraph('This is a yet another paragraph.')
paraObj1.add_run(' This text is being added to the second paragraph.')

doc.save('multipleParagraphs.docx')
10 changes: 10 additions & 0 deletions 13-pdf-word/addHeaderToDocx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import docx

doc = docx.Document()
doc.add_heading('Head 0', 0)
doc.add_heading('Head 1', 1)
doc.add_heading('Head 2', 2)
doc.add_heading('Head 3', 3)
doc.add_heading('Head 4', 4)

doc.save('headings.docx')
35 changes: 35 additions & 0 deletions 13-pdf-word/batchEncryptPdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import PyPDF2
import os

path = ''
password = ''

# get all pdf file in pointed directory
for dirpath, dirnames, filenames in os.walk(path):
# open each pdf file and get first page of it
for filename in filenames:
if not filename.endswith('.pdf'):
continue
filepath = os.path.join(dirpath, filename)
pdfFile = PyPDF2.open(filepath, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFile)
# if catch Exception then decrypt the file with given pass
try:
pdfReader.getPage(0)
except err:
if pdfReader.decrypt(password):
pdfWriter = PyPDF2.PdfFileWriter()
for page in pdfReader.numPages:
pdfWriter.addPage(pdfReader.getPage(0))
decryptPdfFile = open(destDirectory+filename+'_encrypted.pdf', 'wb')
pdfWriter(decryptPdfFile)
decryptPdfFile.close()
else:
# if pass is error, the print message and continue
print(msg)
continue
pdfFile.close()




30 changes: 30 additions & 0 deletions 13-pdf-word/combinePdfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! python3
# combinePdfs.py - Combines all the PDFs in the current working directory into
# a single PDF.

import os
import PyPDF2

# Get all the PDF filenames.
pdfFiles = []
for filename in os.listdir('.'):
if filename.endswith('.pdf'):
pdfFiles.append(filename)
pdfFiles.sort()

pdfWriter = PyPDF2.PdfFileWriter()

# Loop through all the PDF files.
for filename in pdfFiles:
pdfFileObj = open(filename, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)

# Loop through all the pages (except the first) and add them.
for pageNum in range(1, pdfReader.numPages):
pageObj = pdfReader.getPage(pageNum)
pdfWriter.addPage(pageObj)

# Save the resulting PDF to a file.
pdfOutput = open('allminutes.pdf', 'wb')
pdfWriter.write(pdfOutput)
pdfOutput.close()
Binary file added 13-pdf-word/combineminutes.pdf
Binary file not shown.
5 changes: 5 additions & 0 deletions 13-pdf-word/createDocx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import docx

doc = docx.Document()
doc.add_paragraph('Hello world!')
doc.save('helloworld.docx')
23 changes: 23 additions & 0 deletions 13-pdf-word/createInvitation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import docx

# create a docment
doc = docx.Document('guestTemplete.docx')
paragraphNum = len(doc.paragraphs)
guestDoc = docx.Document()

# open guests.txt file
with open('guests.txt') as f:
for guest in f.readlines():
paraSeq = 0
# add each guest to the document
for p in doc.paragraphs:
paraSeq += 1
if paraSeq == 2:
p.add_run(guest)
if paraSeq == paragraphNum:
p.runs[len(p.runs)-1].add_break(docx.text.run.WD_BREAK.PAGE)
# set text and style
guestDoc.add_paragraph(p.text, p.style)

# save file
guestDoc.save('guestsInvitaion.docx')
17 changes: 17 additions & 0 deletions 13-pdf-word/decryptPdfWithDict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import PyPDF2

# load PDF
pdfFile = open('encryptTest.pdf', 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFile)

# loop dict list and try to decrypt the pdf file
with open('dictionary.txt') as f:
for word in f:
word = word.strip().lower()
# if success, print the password
if pdfReader.decrypt(word):
print(word)




Binary file added 13-pdf-word/demo.docx
Binary file not shown.
Loading

0 comments on commit d71b927

Please sign in to comment.