kunicode
diff --git a/‎Text_Extract_Images/README.md
+56 b/‎Text_Extract_Images/README.md
+56
diff --git a/‎Text_Extract_Images/img/Output.PNG
3.43 KB b/‎Text_Extract_Images/img/Output.PNG
3.43 KB
diff --git a/‎Text_Extract_Images/img/Sample.PNG
14.3 KB b/‎Text_Extract_Images/img/Sample.PNG
14.3 KB
diff --git a/‎Text_Extract_Images/img/TextFile.PNG
13.3 KB b/‎Text_Extract_Images/img/TextFile.PNG
13.3 KB
diff --git a/‎Text_Extract_Images/requirements.txt
+2 b/‎Text_Extract_Images/requirements.txt
+2
diff --git a/‎Text_Extract_Images/text_extract.py
+56 b/‎Text_Extract_Images/text_extract.py
+56
@@ -0,0 +1,56 @@
+# Text_Extract
+
+[![forthebadge made-with-python](http://ForTheBadge.com/images/badges/made-with-python.svg)](https://www.python.org/)
+
+Text extraction form Images, OCR, Tesseract, Basic Image manipulation are all important yet very basic scripts.
+
+This script uses ```pytesseract``` for text extraction from images, considering it only recognizes text and can 
+only print it, this script additionally adds a functionality to write the text in a `txt` and/or `csv` file.
+
+## Setup instructions
+
+- Setup a `python 3.x` virtual environment.
+- `Activate` the environment
+- Install the dependencies using ```pip3 install -r requirements.txt```
+- You are all set and the [script](text_extract.py) is Ready to run.
+- Carefully follow the Instructions.
+
+## Further Readings
+
+Some newcomers for the first time struggle with Tesseract, this is a direct link to the 
+[installer](https://github.com/UB-Mannheim/tesseract/wiki)
+
+Setting up OCR can be found [here](http://bit.ly/2MClAwD)
+
+__PATH__ env variable can help in optimizing the code. 
+[This](http://bit.ly/35d3c3Q) and [this](http://bit.ly/3ba0zmZ) link will help you in order to achieve that.
+
+## Usage
+
+Just make sure that Tesseract is in proper directory, run the code according the comments and guidelines.
+
+```
+Smaple - 
+Enter the Folder name containing Images: <Name of Folder>
+Enter your desired output location: <Name of Folder>
+```
+
+## Output
+
+Output
+
+![Output](img/Output.PNG)
+
+Image containing Text
+
+![Before Compression](img/Sample.PNG)
+
+After Extraction
+
+![After Backup](img/TextFile.PNG)
+
+
+## Author(s)
+
+Made by [Vybhav Chaturvedi](https://www.linkedin.com/in/vybhav-chaturvedi-0ba82614a/)
+
@@ -0,0 +1,2 @@
+pytesseract==0.3.6
+Pillow==8.0.1
@@ -0,0 +1,56 @@
+from PIL import Image
+import pytesseract as pt
+import os
+from pathlib import Path
+
+
+current_location = (os.getcwd() + '\\')
+
+
+def extract():
+    """
+    Function for extracting text from images.
+    Additional it saves the text extracted as a txt file.
+    """
+
+    # Enter the name of folder which contains img files
+    image_location = input("Enter the Folder name containing Images: ")
+    image_path = os.path.join(current_location, image_location)
+
+    # Enter the name of folder which would contain respective txt files
+    destination = input("Enter your desired output location: ")
+    destination_path = os.path.join(current_location, destination)
+
+    # Path to Tesseract
+    tesseract_path = input("Enter the Path to Tesseract: ")
+    print('\nNOTE: '
+          'It is preferable to setup the PATH variable to Tesseract, see README. \n')
+
+    #  = r'C:\Program Files\Tesseract-OCR\tesseract'
+    pt.pytesseract.tesseract_cmd = tesseract_path
+
+    # iterating over the images in the folder
+    for imageName in os.listdir(image_path):
+
+        # Join the path and image name to obtain absolute path
+        inputPath = os.path.join(image_path, imageName)
+        img = Image.open(inputPath)
+
+        # OCR
+        text = pt.image_to_string(img, lang="eng")
+
+        # Removing extensions
+        img_file = Path(inputPath).stem
+        print(img_file)
+
+        # The output text file
+        text_file = img_file + ".txt"
+        output_path = os.path.join(destination_path, text_file)
+
+        # saving the  text for every image in a separate .txt file
+        with open(output_path, "w") as file:
+            file.write(text)
+
+
+if __name__ == '__main__':
+    extract()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+pytesseract==0.3.6`
	`2`	`+Pillow==8.0.1`