lisa1612
diff --git a/‎AUTOMATION/PDF To Text/README.md
Lines changed: 39 additions & 11 deletions b/‎AUTOMATION/PDF To Text/README.md
Lines changed: 39 additions & 11 deletions
diff --git a/‎AUTOMATION/PDF To Text/SampleUsage.png
31.4 KB b/‎AUTOMATION/PDF To Text/SampleUsage.png
31.4 KB
diff --git a/‎AUTOMATION/PDF To Text/converted_pdf.txt
Lines changed: 0 additions & 28 deletions b/‎AUTOMATION/PDF To Text/converted_pdf.txt
Lines changed: 0 additions & 28 deletions
diff --git a/‎AUTOMATION/PDF To Text/pdfToText.py
Lines changed: 29 additions & 12 deletions b/‎AUTOMATION/PDF To Text/pdfToText.py
Lines changed: 29 additions & 12 deletions
diff --git a/‎AUTOMATION/PDF To Text/requirements.txt
Lines changed: 2 additions & 0 deletions b/‎AUTOMATION/PDF To Text/requirements.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎AUTOMATION/PDF To Text/test.pdf
-7.76 KB b/‎AUTOMATION/PDF To Text/test.pdf
-7.76 KB
@@ -1,21 +1,49 @@
-# Extracting text from PDF using Python 
+# Extracting Text from PDF using Python 
 
-Create a new folder and create a pdfToText.py file in it. Copy and paste the code in pdfToText.py in this repository to that file.
+This project is aimed at extracting text from PDF files using Python.
 
-Open the Terminal:
+## Getting Started
 
-```py
-pip install pdfminer.six
+These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
 
+### Prerequisites
+
+Before running the script, you must install the appropriate dependencies. To install these dependencies, run the following command in your terminal.
+
+```bash
+pip install -r requirements.txt
 ```
 
-In the same folder, add the pdf from which you want to extract text (Here the pdf used is test.pdf). Provide this pdf as a command line argument.
+### Using the Tool
 
-Run the script using:
+Follow these steps to use the tool:
 
-```py
-python3 pdfToText.py test.pdf
+1. Run the 'pdfToText.py' script:
 
-```
+    ```bash
+    python pdfToText.py
+    ```
+
+2. When prompted, provide the full path along with the file name of the PDF from which you want to extract text. For example:
+
+    ```bash
+    D:\FolderName\FileName.pdf
+    ```
+
+3. The data from the PDF will be extracted and stored in a .txt file in the same folder. For example:
+
+    ```bash
+    D:\FolderName\FileName.txt
+    ```
+
+### Error Handling
+
+If any error is encountered during the process, it will be printed on the screen. For resolution, check the error message and debug accordingly.
+
+Feel free to report any bugs or request features using the issue tracker.
+
+## Example Run and Output
+
+Below is a screenshot demonstrating how to run the commands in the terminal:
 
-The extracted text will be available in converted_pdf.txt
+![Sample Usage of the Script](./SampleUsage.png)
@@ -1,17 +1,34 @@
-import argparse
-import pdfminer.high_level
+from pathlib import Path
+from PyPDF2 import PdfReader
 
-# Extract text with Pdfminer.six Module
-def With_PdfMiner(pdf):
-	with open(pdf,'rb') as file_handle_1:
-		doc = pdfminer.high_level.extract_text(file_handle_1)
 
-	with open('converted_pdf.txt','w') as file_handle_2 :
-		file_handle_2.write(doc)
+def convert_pdf(filename):
+    my_file = Path(filename)
+    
+    # Check if provided PDF file exists
+    if not my_file.is_file():
+        print('Error! File Not Found!')
+        return None
+    print('PDF Found! Attempting Conversion...')
+    
+    # Exception Handling during Data Extraction from PDF File
+    try:
+        # Define .txt file which will contain the extracted data 
+        out_filename = my_file.with_suffix('.txt')
+        # Extracting Data from PDF file page-by-page and storing in TXT file
+        pdf_reader = PdfReader(filename)
+        with open(out_filename, 'w', encoding='utf-8') as extracted_data:
+            for page in pdf_reader.pages:
+                text = page.extract_text()
+                extracted_data.write(text)
+        print('PDF to TXT Conversion Successful!')
+        
+    # If any Error is encountered, Print the Error on Screen
+    except Exception as e:
+        print(f'Error Converting PDF to Text or Saving Converted Text into .txt file: {e}')
+        return None
 
 
 if __name__ == '__main__':
-	parser = argparse.ArgumentParser()
-	parser.add_argument("file", help = "PDF file from which we extract text")
-	args = parser.parse_args()
-	With_PdfMiner(args.file)
+    file = input('Enter Full Path and FileName: ')
+    convert_pdf(file)
@@ -0,0 +1,2 @@
+PyPDF2
+pathlib