використовувати tid як унікальний ключ для запобігання дублюванню поїздів

2025-05-29 20:40:00 +03:00 · 2025-05-29 20:40:00 +03:00 · 527552186f
commit 527552186f
parent 21024f1805
2 changed files with 82 additions and 47 deletions
--- a/db.py
+++ b/db.py
@ -10,10 +10,10 @@ def init_db():
        con.execute('''
            CREATE TABLE IF NOT EXISTS trains (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                tid TEXT UNIQUE NOT NULL,
                train_number TEXT NOT NULL,
                days TEXT NOT NULL,
-                route TEXT NOT NULL,
-                UNIQUE(train_number, route)
+                route TEXT NOT NULL
            );
        ''')
        con.execute('''
@ -38,33 +38,44 @@ def init_db():
        ''')
        con.commit()

-
 def save_schedule(entries: List[Dict]):
    """
-    Зберігає повний розклад без видалення попередніх записів.
-    entries — список словників з полями:
-      'train_number', 'days', 'route', 'times'.
+    Зберігає повний розклад.
+    entries — список словників:
+      {
+        'tid': str,
+        'train_number': str,
+        'days': '1111111',
+        'route': str,
+        'times': [
+            {'station': str, 'arrival': str, 'departure': str}, ...
+        ]
+      }
    """
    today = date.today().isoformat()
    with sqlite3.connect(DB_PATH) as con:
        train_ids = []
-        # Додаємо або оновлюємо поїзди
+        # Додаємо або оновлюємо trains по tid
        for e in entries:
+            tid_val = e['tid']
            tn = e['train_number']
            days = e['days']
-            route = e.get('route', '')
+            route = e['route']
            con.execute('''
-                INSERT INTO trains (train_number, days, route)
-                VALUES (?, ?, ?)
-                ON CONFLICT(train_number, route) DO UPDATE SET days = excluded.days
-            ''', (tn, days, route))
-            tid = con.execute(
-                'SELECT id FROM trains WHERE train_number = ? AND route = ?',
-                (tn, route)
+                INSERT INTO trains (tid, train_number, days, route)
+                VALUES (?, ?, ?, ?)
+                ON CONFLICT(tid) DO UPDATE SET
+                    train_number = excluded.train_number,
+                    days         = excluded.days,
+                    route        = excluded.route
+            ''', (tid_val, tn, days, route))
+            train_id = con.execute(
+                'SELECT id FROM trains WHERE tid = ?',
+                (tid_val,)
            ).fetchone()[0]
-            train_ids.append(tid)
+            train_ids.append(train_id)

-        # Додаємо або оновлюємо станції та розклад
+        # Вставляємо записи schedules
        for idx, e in enumerate(entries):
            tid = train_ids[idx]
            for t in e['times']:
@ -73,10 +84,12 @@ def save_schedule(entries: List[Dict]):
                con.execute('''
                    INSERT INTO stations (name, km)
                    VALUES (?, ?)
-                    ON CONFLICT(name) DO UPDATE SET km = COALESCE(excluded.km, stations.km)
+                    ON CONFLICT(name) DO UPDATE SET
+                        km = COALESCE(excluded.km, stations.km)
                ''', (st, km))
-                sid = con.execute(
-                    'SELECT id FROM stations WHERE name = ?', (st,)
+                station_id = con.execute(
+                    'SELECT id FROM stations WHERE name = ?',
+                    (st,)
                ).fetchone()[0]
                arr = t['arrival']
                dep = t['departure']
@ -84,12 +97,15 @@ def save_schedule(entries: List[Dict]):
                    INSERT OR REPLACE INTO schedules
                    (train_id, station_id, arrival_time, departure_time, travel_date)
                    VALUES (?, ?, ?, ?, ?)
-                ''', (tid, sid, arr, dep, today))
+                ''', (tid, station_id, arr, dep, today))
        con.commit()

-
 def get_schedule(route: Optional[str] = None, travel_date: Optional[str] = None) -> List[Dict]:
-    """Повертає розклад поїздів. Якщо вказано route, фільтрує за ним."""
+    """
+    Повертає розклад поїздів. Якщо вказано route, фільтрує за ним.
+    Повертає список:
+      [{'train_number': ..., 'route': ..., 'times': [...]}, ...]
+    """
    from datetime import date as _date
    travel_date = travel_date or _date.today().isoformat()
    with sqlite3.connect(DB_PATH) as con:
@ -111,7 +127,15 @@ def get_schedule(route: Optional[str] = None, travel_date: Optional[str] = None)
                 WHERE sc.travel_date = ?
                 ORDER BY tr.train_number, st.id
            ''', (travel_date,)).fetchall()
+
    schedule: Dict[tuple, List[Dict]] = {}
    for num, rt, station, arrival, departure in rows:
-        schedule.setdefault((num, rt), []).append({'station': station, 'arrival': arrival, 'departure': departure})
-    return [{'train_number': num, 'route': rt, 'times': times} for (num, rt), times in schedule.items()]
+        schedule.setdefault((num, rt), []).append({
+            'station': station,
+            'arrival': arrival,
+            'departure': departure
+        })
+    return [
+        {'train_number': num, 'route': rt, 'times': times}
+        for (num, rt), times in schedule.items()
+    ]
--- a/parser.py
+++ b/parser.py
@ -41,7 +41,7 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
      tab=1 — Київ→Ніжин,
      tab=2 — Ніжин→Київ.
    Повертає список поїздів з полями:
-      'train_number', 'days', 'route', 'times' (список словників station/arrival/departure).
+      'tid', 'train_number', 'days', 'route', 'times'.
    """
    # Завантаження HTML
    if use_local:
@ -55,15 +55,11 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
    soup = BeautifulSoup(html, 'html.parser')
    prefix = f'div#tabs-trains{tab}'

-    # Таблиця з розкладом
+    # Таблиця розкладу
    times_table = soup.select_one(f'{prefix} table.td_center')
    if not times_table:
        raise RuntimeError(f'Не знайдено таблицю розкладу для tab={tab}')

-    # Парсимо маршрути (по одному <td class="course"> на потяг)
-    route_tags = times_table.select('td.course')
-    routes = [tag.get_text(strip=True) for tag in route_tags]
-
    # Список станцій (35)
    station_tags = soup.select(
        f'{prefix} table.left tr.on a.et, '
@ -71,26 +67,41 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
    )
    stations = [a.get_text(strip=True) for a in station_tags]

-    # Заголовок з номерами потягів і днями курсування
+    # Ряди таблиці
    trs = times_table.find_all('tr')
+
+    # Рядок з номерами потягів та днями курсування
    header_row = next(r for r in trs if r.find('td', class_='on_right_t'))
-    cells = header_row.find_all('td', class_='on_right_t')
+    train_cells = header_row.find_all('td', class_='on_right_t')
+
+    # Парсимо маршрути (<td class="course">) для кожного потяга
+    route_tags = times_table.select('td.course')
+    routes = [tag.get_text(strip=True) for tag in route_tags[:len(train_cells)]]

    entries: List[Dict] = []
-    for idx, cell in enumerate(cells):
+    for idx, cell in enumerate(train_cells):
+        # Витягнути унікальний tid з href
+        a_tag = cell.find('a', class_='et')
+        href = a_tag['href']  # наприклад ".?tid=26397"
+        tid = href.split('tid=')[-1]
+
        parts = cell.get_text(separator='|', strip=True).split('|')
        num = parts[0].rstrip(',').strip()
        days = parse_days(parts[1] if len(parts) > 1 else 'щоденно')
        route = routes[idx] if idx < len(routes) else ''
+
        entries.append({
+            'tid': tid,
            'train_number': num,
            'days': days,
            'route': route,
            'times': []
        })

-    # Рядки з часами руху
+    # Рядки з часами руху (повинно бути 35)
    time_rows = [r for r in trs if r.find('td', class_='q0') or r.find('td', class_='q1')]
+
+    # Збирання часу для кожного поїзда та станції
    for idx, entry in enumerate(entries):
        base = idx * 3
        for si, row in enumerate(time_rows):