Skip to content

Commit c62c32b

Browse files
authored
Merge branch 'larymak:main' into wifipassword
2 parents b803a53 + 61bd2cb commit c62c32b

File tree

8 files changed

+304
-1
lines changed

8 files changed

+304
-1
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "WEB-BASED PROJECTS/django-url-shortner"]
2+
path = WEB-BASED PROJECTS/django-url-shortner
3+
url = https://github.com/yeboahd24/django-url-shortner.git
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Analyze any `.docx` file for bold, underlined, italicized text
2+
This program helps you find all the bold, underlined and italicized text in a word document.
3+
4+
First create a new folder and then create a file named `extract.py` inside it and copy paste the code to it.
5+
Then you need to install `python-docx`
6+
```bash
7+
$ pip install python-docx
8+
```
9+
Copy your word document for example, `process_design_notes.docx` into the current working directory(CWD).
10+
11+
The CWD should now have two files i.e. **extract.py** and **process_design_notes.docx**.
12+
13+
Open a terminal or command prompt in CWD and type
14+
```bash
15+
#for linux
16+
python3 extract.py process_design_notes.docx
17+
#for windows
18+
python extract.py process_design_notes.docx
19+
```
20+
After typing above command the program will execute on the word document and append the extracted bold, italicized, underlined words to it.
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
""" This module uses 'python-docx' package to analyze a Word Document ('.docx')
2+
This module can be imported in other programs but the preffered way is to run it
3+
as a Script from the command line like this:
4+
5+
$ python extract.py <name-of-word-document-file>
6+
7+
The basic function of the program is to analyze the word document and
8+
collect bold, italic and underlined words from it and then
9+
after analyzing write these collected words at the end of the word document.
10+
So the program first reads the word document, collects bold, italic
11+
and underlined words from it and then writes the collected words at
12+
the very end of the same word document
13+
Copyright 2023 Kashaan Mahmood
14+
License: MIT License
15+
https://opensource.org/license/mit/
16+
"""
17+
18+
19+
from docx import Document
20+
from docx.api import Document
21+
from docx.enum.text import WD_ALIGN_PARAGRAPH
22+
23+
# global variables
24+
total_words = 0
25+
wordsList = ""
26+
27+
28+
# calculate total words in docx
29+
def get_total_words(docxFile):
30+
document = Document(docxFile)
31+
total = 0
32+
for p in document.paragraphs:
33+
for run in p.runs:
34+
total += len(run.text.split())
35+
return total
36+
37+
38+
unwanted_characters = [
39+
'"',
40+
"'",
41+
"’",
42+
"“",
43+
":",
44+
"\n",
45+
"-",
46+
"— — ",
47+
"—",
48+
".",
49+
",",
50+
";",
51+
"!",
52+
"?",
53+
]
54+
55+
56+
def remove_unwanted(words):
57+
"""remove unwanted characters from analyzed output"""
58+
for i in unwanted_characters:
59+
if i in words:
60+
words = words.replace(i, "")
61+
return words
62+
63+
64+
def analyze(docxFile):
65+
"""analyze the docx file and collect bold, italicized, and underlined words from it
66+
and return a `collect` object these selected words
67+
"""
68+
69+
document = Document(docxFile)
70+
71+
collect = [
72+
{"b": []},
73+
{"i": []},
74+
{"u": []},
75+
{"bi": []},
76+
{"bu": []},
77+
{"iu": []},
78+
{"biu": []},
79+
]
80+
81+
for p in document.paragraphs:
82+
for run in p.runs:
83+
if run.bold and run.italic and run.underline:
84+
filtered_text = remove_unwanted(run.text)
85+
collect[6]["biu"].append(filtered_text)
86+
87+
elif run.bold and run.italic:
88+
filtered_text = remove_unwanted(run.text)
89+
collect[3]["bi"].append(filtered_text)
90+
91+
elif run.bold and run.underline:
92+
filtered_text = remove_unwanted(run.text)
93+
collect[4]["bu"].append(filtered_text)
94+
95+
elif run.italic and run.underline:
96+
filtered_text = remove_unwanted(run.text)
97+
collect[5]["iu"].append(filtered_text)
98+
99+
elif run.bold:
100+
filtered_text = remove_unwanted(run.text)
101+
collect[0]["b"].append(filtered_text)
102+
103+
elif run.italic:
104+
filtered_text = remove_unwanted(run.text)
105+
collect[1]["i"].append(filtered_text)
106+
107+
elif run.underline:
108+
filtered_text = remove_unwanted(run.text)
109+
collect[2]["u"].append(filtered_text)
110+
111+
return collect
112+
113+
114+
def write_data(docxFile, data):
115+
"""gets the `collect` variable as 'data' argument from analyze()
116+
and reads and appends the 'data' to end of docx file
117+
"""
118+
119+
global wordsList
120+
121+
document = Document(docxFile)
122+
123+
def save_document():
124+
document.save(docxFile)
125+
return "saved"
126+
127+
def add_words(key):
128+
global wordsList
129+
categories = {
130+
"b": "\nBold Words:-",
131+
"i": "\n\nItalicized Words:-",
132+
"u": "\n\nUnderlined Words:-",
133+
"bi": "\n\nBold & Italicized Words:-",
134+
"bu": "\n\nBold & Underlined Words:-",
135+
"biu": "\n\nBold & Italicized & Underlined Words:-",
136+
"iu": "\n\nItalicized & Underlined Words:-",
137+
}
138+
for word in words[key]:
139+
category = categories[key]
140+
if len(wordsList) == 0 or category not in wordsList:
141+
wordsList = wordsList + f"{category}\n{word}"
142+
else:
143+
wordsList = wordsList + f", {word}"
144+
145+
title_p = document.add_paragraph(f"\n========== Extracted Words ==========\n")
146+
147+
title_p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
148+
149+
write_p = document.add_paragraph()
150+
151+
for words in data:
152+
if words.__contains__("b") and words["b"]:
153+
add_words("b")
154+
155+
elif words.__contains__("u") and words["u"]:
156+
add_words("u")
157+
158+
elif words.__contains__("bi") and words["bi"]:
159+
add_words("bi")
160+
161+
elif words.__contains__("bu") and words["bu"]:
162+
add_words("bu")
163+
164+
elif words.__contains__("iu") and words["iu"]:
165+
add_words("iu")
166+
167+
elif words.__contains__("biu") and words["biu"]:
168+
add_words("biu")
169+
170+
write_p.add_run(f"{wordsList}")
171+
172+
ending_p = document.add_paragraph("\n===================\n")
173+
ending_p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
174+
175+
save_document()
176+
return
177+
178+
179+
# function calls inside main()
180+
181+
182+
def main():
183+
global total_words
184+
185+
data = analyze(docx)
186+
write_data(docx, data)
187+
188+
189+
if __name__ == "__main__":
190+
from sys import argv
191+
import time
192+
193+
# get docx file
194+
docx = argv[1]
195+
196+
print(f"Started at {time.strftime('%X')}...")
197+
# calling main()
198+
main()
199+
print(f"Finished at {time.strftime('%X')}...")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Importing necessary libraries
2+
import csv
3+
import openpyxl
4+
import pandas as pd
5+
from openpyxl import Workbook
6+
from openpyxl.styles import Font
7+
8+
# Reading HTML file and defining paths for CSV and Excel files
9+
file = pd.read_html("./Test Report_2021-08-18_12-45-00.html")
10+
path = "./your_csv_name.csv"
11+
xlpath = 'name.xlsx'
12+
13+
# Function to write data from HTML to CSV and convert it to Excel format
14+
def write_html_csv():
15+
for index, data in enumerate(file):
16+
# Check for index value and print data
17+
if index:
18+
data.to_csv("./your_csv_name.csv", mode='a+', header=True)
19+
20+
# Creating an instance of Workbook and creating a new sheet
21+
wb = Workbook()
22+
ws = wb.active
23+
24+
# Reading CSV file and writing data to Excel
25+
with open(path, 'r') as f:
26+
for row in csv.reader(f):
27+
ws.append(row)
28+
29+
# Saving the Excel file
30+
wb.save(xlpath)
31+
32+
# Function to modify the Excel sheet by adding bold font to certain cell values
33+
def modify_excel():
34+
# Opening the Excel file
35+
wb_obj = openpyxl.load_workbook(xlpath)
36+
sheet_obj = wb_obj.active
37+
38+
# Getting the number of rows and columns in the sheet
39+
rows = sheet_obj.max_row
40+
cols = sheet_obj.max_column
41+
42+
# Looping through each cell and checking for certain values to apply font style
43+
for i in range(1, rows + 1):
44+
for j in range(1, cols + 1):
45+
if ("Test_Cases" in str(sheet_obj.cell(i, j).value)) or ("Status" in str(sheet_obj.cell(i, j).value)):
46+
x = sheet_obj.cell(i, j).coordinate
47+
y = sheet_obj.cell(i, j).row
48+
sheet_obj[x].font = Font(bold=True)
49+
50+
# Saving the modified Excel file
51+
wb_obj.save(xlpath)
52+
53+
# Running the functions and printing messages to indicate completion of tasks
54+
print("Starting task one")
55+
write_html_csv()
56+
print("Task one over")
57+
print("Starting task two")
58+
modify_excel()
59+
print("Task two over")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# HTML file into a CSV file
2+
This script is designed to convert data from an HTML file into a CSV file and then modify that CSV file in Excel format.
3+
4+
## Requirements
5+
1. Python 3.x
6+
2. openpyxl
7+
3. pandas
8+
## Usage
9+
1. Ensure that the required packages are installed.
10+
2. Update the file paths in the script to reflect your specific file names and paths.
11+
3. Run the script from the command line: python script_name.py.
12+
4. The script will perform the following tasks:
13+
5. Convert the data from the HTML file to a CSV file.
14+
6. Modify the CSV file in Excel format by bolding the font of certain cells.
15+
7. Once the script is complete, the modified Excel file will be saved in the specified file path.
16+
17+
Note: Ensure that the HTML file is in the same directory as the script or specify the correct path in the script.
18+

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,6 @@ guide [HERE](https://github.com/larymak/Python-project-Scripts/blob/main/CONTRIB
115115
| 64 | [Umbrella Reminder](https://github.com/larymak/Python-project-Scripts/tree/main/TIME%20SCRIPTS/Umbrella%20Reminder) | [Edula Vinay Kumar Reddy](https://github.com/vinayedula) |
116116
| 65 | [Image to PDF](https://github.com/larymak/Python-project-Scripts/tree/main/IMAGES%20%26%20PHOTO%20SCRIPTS/Image%20to%20PDF) | [Vedant Chainani](https://github.com/Envoy-VC) |
117117
| 66 | [KeyLogger](https://github.com/larymak/Python-project-Scripts/tree/main/OTHERS/KeyLogger) | [Akhil](https://github.com/akhil-chagarlamudi) |
118-
| 67 | [PDF Text Extractor](https://github.com/SamAddy/Python-project-Scripts/tree/main/PYTHON%20APPS/PDF-Text-Extractor) | [Samuel Addison](https://github.com/SamAddy) |
118+
| 67 | [PDF Text Extractor](https://github.com/SamAddy/Python-project-Scripts/tree/main/PYTHON%20APPS/PDF-Text-Extractor) | [Samuel Addison](https://github.com/SamAddy)
119+
| 68 | [Analyze docx file](https://github.com/larymak/Python-project-Scripts/tree/main/AUTOMATION/analyzing%20and%20writing%20.docx%20file) | [Kashaan Mahmood](https://github.com/Kashaan-M)
120+

WEB-BASED PROJECTS/StopWatch

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 98585968b0a368049ad6a992f36fc62121bbc1e6
Submodule django-url-shortner added at eeebaa8

0 commit comments

Comments
 (0)