From 527552186f95d815bf7d9eaa40ae2168d1c6dc7d Mon Sep 17 00:00:00 2001 From: zik Date: Thu, 29 May 2025 20:40:00 +0300 Subject: [PATCH] =?UTF-8?q?=D0=B2=D0=B8=D0=BA=D0=BE=D1=80=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=BE=D0=B2=D1=83=D0=B2=D0=B0=D1=82=D0=B8=20tid=20=D1=8F?= =?UTF-8?q?=D0=BA=20=D1=83=D0=BD=D1=96=D0=BA=D0=B0=D0=BB=D1=8C=D0=BD=D0=B8?= =?UTF-8?q?=D0=B9=20=D0=BA=D0=BB=D1=8E=D1=87=20=D0=B4=D0=BB=D1=8F=20=D0=B7?= =?UTF-8?q?=D0=B0=D0=BF=D0=BE=D0=B1=D1=96=D0=B3=D0=B0=D0=BD=D0=BD=D1=8F=20?= =?UTF-8?q?=D0=B4=D1=83=D0=B1=D0=BB=D1=8E=D0=B2=D0=B0=D0=BD=D0=BD=D1=8E=20?= =?UTF-8?q?=D0=BF=D0=BE=D1=97=D0=B7=D0=B4=D1=96=D0=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db.py | 94 ++++++++++++++++++++++++++++++++++--------------------- parser.py | 35 ++++++++++++++------- 2 files changed, 82 insertions(+), 47 deletions(-) diff --git a/db.py b/db.py index 8318546..7ec0d21 100644 --- a/db.py +++ b/db.py @@ -10,10 +10,10 @@ def init_db(): con.execute(''' CREATE TABLE IF NOT EXISTS trains ( id INTEGER PRIMARY KEY AUTOINCREMENT, + tid TEXT UNIQUE NOT NULL, train_number TEXT NOT NULL, days TEXT NOT NULL, - route TEXT NOT NULL, - UNIQUE(train_number, route) + route TEXT NOT NULL ); ''') con.execute(''' @@ -38,33 +38,44 @@ def init_db(): ''') con.commit() - def save_schedule(entries: List[Dict]): """ - Зберігає повний розклад без видалення попередніх записів. - entries — список словників з полями: - 'train_number', 'days', 'route', 'times'. + Зберігає повний розклад. + entries — список словників: + { + 'tid': str, + 'train_number': str, + 'days': '1111111', + 'route': str, + 'times': [ + {'station': str, 'arrival': str, 'departure': str}, ... + ] + } """ today = date.today().isoformat() with sqlite3.connect(DB_PATH) as con: train_ids = [] - # Додаємо або оновлюємо поїзди + # Додаємо або оновлюємо trains по tid for e in entries: + tid_val = e['tid'] tn = e['train_number'] days = e['days'] - route = e.get('route', '') + route = e['route'] con.execute(''' - INSERT INTO trains (train_number, days, route) - VALUES (?, ?, ?) - ON CONFLICT(train_number, route) DO UPDATE SET days = excluded.days - ''', (tn, days, route)) - tid = con.execute( - 'SELECT id FROM trains WHERE train_number = ? AND route = ?', - (tn, route) + INSERT INTO trains (tid, train_number, days, route) + VALUES (?, ?, ?, ?) + ON CONFLICT(tid) DO UPDATE SET + train_number = excluded.train_number, + days = excluded.days, + route = excluded.route + ''', (tid_val, tn, days, route)) + train_id = con.execute( + 'SELECT id FROM trains WHERE tid = ?', + (tid_val,) ).fetchone()[0] - train_ids.append(tid) + train_ids.append(train_id) - # Додаємо або оновлюємо станції та розклад + # Вставляємо записи schedules for idx, e in enumerate(entries): tid = train_ids[idx] for t in e['times']: @@ -73,10 +84,12 @@ def save_schedule(entries: List[Dict]): con.execute(''' INSERT INTO stations (name, km) VALUES (?, ?) - ON CONFLICT(name) DO UPDATE SET km = COALESCE(excluded.km, stations.km) + ON CONFLICT(name) DO UPDATE SET + km = COALESCE(excluded.km, stations.km) ''', (st, km)) - sid = con.execute( - 'SELECT id FROM stations WHERE name = ?', (st,) + station_id = con.execute( + 'SELECT id FROM stations WHERE name = ?', + (st,) ).fetchone()[0] arr = t['arrival'] dep = t['departure'] @@ -84,34 +97,45 @@ def save_schedule(entries: List[Dict]): INSERT OR REPLACE INTO schedules (train_id, station_id, arrival_time, departure_time, travel_date) VALUES (?, ?, ?, ?, ?) - ''', (tid, sid, arr, dep, today)) + ''', (tid, station_id, arr, dep, today)) con.commit() - def get_schedule(route: Optional[str] = None, travel_date: Optional[str] = None) -> List[Dict]: - """Повертає розклад поїздів. Якщо вказано route, фільтрує за ним.""" + """ + Повертає розклад поїздів. Якщо вказано route, фільтрує за ним. + Повертає список: + [{'train_number': ..., 'route': ..., 'times': [...]}, ...] + """ from datetime import date as _date travel_date = travel_date or _date.today().isoformat() with sqlite3.connect(DB_PATH) as con: if route: rows = con.execute(''' SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time - FROM schedules sc - JOIN trains tr ON sc.train_id = tr.id - JOIN stations st ON sc.station_id = st.id - WHERE sc.travel_date = ? AND tr.route = ? - ORDER BY tr.train_number, st.id + FROM schedules sc + JOIN trains tr ON sc.train_id = tr.id + JOIN stations st ON sc.station_id = st.id + WHERE sc.travel_date = ? AND tr.route = ? + ORDER BY tr.train_number, st.id ''', (travel_date, route)).fetchall() else: rows = con.execute(''' SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time - FROM schedules sc - JOIN trains tr ON sc.train_id = tr.id - JOIN stations st ON sc.station_id = st.id - WHERE sc.travel_date = ? - ORDER BY tr.train_number, st.id + FROM schedules sc + JOIN trains tr ON sc.train_id = tr.id + JOIN stations st ON sc.station_id = st.id + WHERE sc.travel_date = ? + ORDER BY tr.train_number, st.id ''', (travel_date,)).fetchall() + schedule: Dict[tuple, List[Dict]] = {} for num, rt, station, arrival, departure in rows: - schedule.setdefault((num, rt), []).append({'station': station, 'arrival': arrival, 'departure': departure}) - return [{'train_number': num, 'route': rt, 'times': times} for (num, rt), times in schedule.items()] + schedule.setdefault((num, rt), []).append({ + 'station': station, + 'arrival': arrival, + 'departure': departure + }) + return [ + {'train_number': num, 'route': rt, 'times': times} + for (num, rt), times in schedule.items() + ] diff --git a/parser.py b/parser.py index 4b1ff17..fbc3dbe 100644 --- a/parser.py +++ b/parser.py @@ -19,7 +19,7 @@ def parse_days(text: str) -> str: return '1111111' if text.startswith('крім'): days = [d.strip(' .') for d in text.split('крім', 1)[1].split(',')] - mask = [1]*7 + mask = [1] * 7 for d in days: idx = DAY_INDEX.get(d) if idx is not None: @@ -27,7 +27,7 @@ def parse_days(text: str) -> str: return ''.join(str(b) for b in mask) if text.startswith('по'): days = [d.strip(' .') for d in text.split('по', 1)[1].split(',')] - mask = [0]*7 + mask = [0] * 7 for d in days: idx = DAY_INDEX.get(d) if idx is not None: @@ -41,7 +41,7 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]: tab=1 — Київ→Ніжин, tab=2 — Ніжин→Київ. Повертає список поїздів з полями: - 'train_number', 'days', 'route', 'times' (список словників station/arrival/departure). + 'tid', 'train_number', 'days', 'route', 'times'. """ # Завантаження HTML if use_local: @@ -55,15 +55,11 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]: soup = BeautifulSoup(html, 'html.parser') prefix = f'div#tabs-trains{tab}' - # Таблиця з розкладом + # Таблиця розкладу times_table = soup.select_one(f'{prefix} table.td_center') if not times_table: raise RuntimeError(f'Не знайдено таблицю розкладу для tab={tab}') - # Парсимо маршрути (по одному на потяг) - route_tags = times_table.select('td.course') - routes = [tag.get_text(strip=True) for tag in route_tags] - # Список станцій (35) station_tags = soup.select( f'{prefix} table.left tr.on a.et, ' @@ -71,26 +67,41 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]: ) stations = [a.get_text(strip=True) for a in station_tags] - # Заголовок з номерами потягів і днями курсування + # Ряди таблиці trs = times_table.find_all('tr') + + # Рядок з номерами потягів та днями курсування header_row = next(r for r in trs if r.find('td', class_='on_right_t')) - cells = header_row.find_all('td', class_='on_right_t') + train_cells = header_row.find_all('td', class_='on_right_t') + + # Парсимо маршрути () для кожного потяга + route_tags = times_table.select('td.course') + routes = [tag.get_text(strip=True) for tag in route_tags[:len(train_cells)]] entries: List[Dict] = [] - for idx, cell in enumerate(cells): + for idx, cell in enumerate(train_cells): + # Витягнути унікальний tid з href + a_tag = cell.find('a', class_='et') + href = a_tag['href'] # наприклад ".?tid=26397" + tid = href.split('tid=')[-1] + parts = cell.get_text(separator='|', strip=True).split('|') num = parts[0].rstrip(',').strip() days = parse_days(parts[1] if len(parts) > 1 else 'щоденно') route = routes[idx] if idx < len(routes) else '' + entries.append({ + 'tid': tid, 'train_number': num, 'days': days, 'route': route, 'times': [] }) - # Рядки з часами руху + # Рядки з часами руху (повинно бути 35) time_rows = [r for r in trs if r.find('td', class_='q0') or r.find('td', class_='q1')] + + # Збирання часу для кожного поїзда та станції for idx, entry in enumerate(entries): base = idx * 3 for si, row in enumerate(time_rows):