commit 9a66107b1edbca352926eab70a7af9e129d16f8c Author: acemglw Date: Mon Oct 20 20:59:48 2025 -0700 Add files via upload diff --git a/README.md b/README.md new file mode 100644 index 0000000..4d08028 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# Program Explanation + +## Overview +This program extracts text from an image containing names and scores, parses the data, and saves it into an Excel sheet. The current date and time in PST are also added to each record. + +## Steps + +1. **Text Extraction**: + - The `pytesseract.image_to_string` function is used to extract text from the image. + - The `Pillow` library is used to open and process the image. + +2. **Parsing**: + - The extracted text is split into lines. + - Each line is further split into words. The last word is assumed to be the score, and the rest are combined to form the name. + - The program ensures that the score is a valid integer before adding the data. + +3. **Date Handling**: + - The `datetime` and `pytz` libraries are used to get the current date and time in the PST timezone. + - The date is formatted as `YYYY-MM-DD HH:MM:SS`. + +4. **Excel Writing**: + - The `openpyxl` library is used to create and write data into an Excel file. + - The data is written into three columns: Player Name, Player Score, and Date (PST). + +## Requirements + +### Libraries +Install the required libraries using pip: +```bash +pip install pytesseract pillow openpyxl pytz \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..5e20aed --- /dev/null +++ b/main.py @@ -0,0 +1,60 @@ +import pytesseract +from pytesseract import Output +from PIL import Image +from openpyxl import Workbook +from datetime import datetime +import pytz + +# Configure Tesseract executable path if needed +# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' + +# Function to extract text from an image +def extract_text_from_image(image_path): + image = Image.open(image_path) + text = pytesseract.image_to_string(image, output_type=Output.STRING) + return text + +# Function to parse names and scores from the extracted text +def parse_names_and_scores(text): + data = [] + lines = text.splitlines() + for line in lines: + parts = line.split() + if len(parts) >= 2: + try: + # Assume the last part is the score and the rest is the name + score = int(parts[-1]) + name = " ".join(parts[:-1]) + data.append((name, score)) + except ValueError: + continue + return data + +# Function to save data to an Excel sheet +def save_to_excel(data, file_name): + workbook = Workbook() + sheet = workbook.active + sheet.title = "Player Data" + sheet.append(["Player Name", "Player Score", "Date (PST)"]) + + # Get current date in PST + pst = pytz.timezone('America/Los_Angeles') + today_date = datetime.now(pst).strftime('%Y-%m-%d %H:%M:%S') + + for name, score in data: + sheet.append([name, score, today_date]) + + workbook.save(file_name) + +# Main function +def main(): + image_path = "image_with_names_and_scores.png" # Replace with your image path + output_excel = "player_data.xlsx" + + text = extract_text_from_image(image_path) + parsed_data = parse_names_and_scores(text) + save_to_excel(parsed_data, output_excel) + print(f"Data has been saved to {output_excel}") + +if __name__ == "__main__": + main() \ No newline at end of file