60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
import pytesseract
|
|
from pytesseract import Output
|
|
from PIL import Image
|
|
from openpyxl import Workbook
|
|
from datetime import datetime
|
|
import pytz
|
|
|
|
# Configure Tesseract executable path if needed
|
|
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
|
|
|
# Function to extract text from an image
|
|
def extract_text_from_image(image_path):
|
|
image = Image.open(image_path)
|
|
text = pytesseract.image_to_string(image, output_type=Output.STRING)
|
|
return text
|
|
|
|
# Function to parse names and scores from the extracted text
|
|
def parse_names_and_scores(text):
|
|
data = []
|
|
lines = text.splitlines()
|
|
for line in lines:
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
try:
|
|
# Assume the last part is the score and the rest is the name
|
|
score = int(parts[-1])
|
|
name = " ".join(parts[:-1])
|
|
data.append((name, score))
|
|
except ValueError:
|
|
continue
|
|
return data
|
|
|
|
# Function to save data to an Excel sheet
|
|
def save_to_excel(data, file_name):
|
|
workbook = Workbook()
|
|
sheet = workbook.active
|
|
sheet.title = "Player Data"
|
|
sheet.append(["Player Name", "Player Score", "Date (PST)"])
|
|
|
|
# Get current date in PST
|
|
pst = pytz.timezone('America/Los_Angeles')
|
|
today_date = datetime.now(pst).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
for name, score in data:
|
|
sheet.append([name, score, today_date])
|
|
|
|
workbook.save(file_name)
|
|
|
|
# Main function
|
|
def main():
|
|
image_path = "image_with_names_and_scores.png" # Replace with your image path
|
|
output_excel = "player_data.xlsx"
|
|
|
|
text = extract_text_from_image(image_path)
|
|
parsed_data = parse_names_and_scores(text)
|
|
save_to_excel(parsed_data, output_excel)
|
|
print(f"Data has been saved to {output_excel}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |