використовувати tid як унікальний ключ для запобігання дублюванню поїздів
This commit is contained in:
parent
21024f1805
commit
527552186f
94
db.py
94
db.py
@ -10,10 +10,10 @@ def init_db():
|
|||||||
con.execute('''
|
con.execute('''
|
||||||
CREATE TABLE IF NOT EXISTS trains (
|
CREATE TABLE IF NOT EXISTS trains (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
tid TEXT UNIQUE NOT NULL,
|
||||||
train_number TEXT NOT NULL,
|
train_number TEXT NOT NULL,
|
||||||
days TEXT NOT NULL,
|
days TEXT NOT NULL,
|
||||||
route TEXT NOT NULL,
|
route TEXT NOT NULL
|
||||||
UNIQUE(train_number, route)
|
|
||||||
);
|
);
|
||||||
''')
|
''')
|
||||||
con.execute('''
|
con.execute('''
|
||||||
@ -38,33 +38,44 @@ def init_db():
|
|||||||
''')
|
''')
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
|
|
||||||
def save_schedule(entries: List[Dict]):
|
def save_schedule(entries: List[Dict]):
|
||||||
"""
|
"""
|
||||||
Зберігає повний розклад без видалення попередніх записів.
|
Зберігає повний розклад.
|
||||||
entries — список словників з полями:
|
entries — список словників:
|
||||||
'train_number', 'days', 'route', 'times'.
|
{
|
||||||
|
'tid': str,
|
||||||
|
'train_number': str,
|
||||||
|
'days': '1111111',
|
||||||
|
'route': str,
|
||||||
|
'times': [
|
||||||
|
{'station': str, 'arrival': str, 'departure': str}, ...
|
||||||
|
]
|
||||||
|
}
|
||||||
"""
|
"""
|
||||||
today = date.today().isoformat()
|
today = date.today().isoformat()
|
||||||
with sqlite3.connect(DB_PATH) as con:
|
with sqlite3.connect(DB_PATH) as con:
|
||||||
train_ids = []
|
train_ids = []
|
||||||
# Додаємо або оновлюємо поїзди
|
# Додаємо або оновлюємо trains по tid
|
||||||
for e in entries:
|
for e in entries:
|
||||||
|
tid_val = e['tid']
|
||||||
tn = e['train_number']
|
tn = e['train_number']
|
||||||
days = e['days']
|
days = e['days']
|
||||||
route = e.get('route', '')
|
route = e['route']
|
||||||
con.execute('''
|
con.execute('''
|
||||||
INSERT INTO trains (train_number, days, route)
|
INSERT INTO trains (tid, train_number, days, route)
|
||||||
VALUES (?, ?, ?)
|
VALUES (?, ?, ?, ?)
|
||||||
ON CONFLICT(train_number, route) DO UPDATE SET days = excluded.days
|
ON CONFLICT(tid) DO UPDATE SET
|
||||||
''', (tn, days, route))
|
train_number = excluded.train_number,
|
||||||
tid = con.execute(
|
days = excluded.days,
|
||||||
'SELECT id FROM trains WHERE train_number = ? AND route = ?',
|
route = excluded.route
|
||||||
(tn, route)
|
''', (tid_val, tn, days, route))
|
||||||
|
train_id = con.execute(
|
||||||
|
'SELECT id FROM trains WHERE tid = ?',
|
||||||
|
(tid_val,)
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
train_ids.append(tid)
|
train_ids.append(train_id)
|
||||||
|
|
||||||
# Додаємо або оновлюємо станції та розклад
|
# Вставляємо записи schedules
|
||||||
for idx, e in enumerate(entries):
|
for idx, e in enumerate(entries):
|
||||||
tid = train_ids[idx]
|
tid = train_ids[idx]
|
||||||
for t in e['times']:
|
for t in e['times']:
|
||||||
@ -73,10 +84,12 @@ def save_schedule(entries: List[Dict]):
|
|||||||
con.execute('''
|
con.execute('''
|
||||||
INSERT INTO stations (name, km)
|
INSERT INTO stations (name, km)
|
||||||
VALUES (?, ?)
|
VALUES (?, ?)
|
||||||
ON CONFLICT(name) DO UPDATE SET km = COALESCE(excluded.km, stations.km)
|
ON CONFLICT(name) DO UPDATE SET
|
||||||
|
km = COALESCE(excluded.km, stations.km)
|
||||||
''', (st, km))
|
''', (st, km))
|
||||||
sid = con.execute(
|
station_id = con.execute(
|
||||||
'SELECT id FROM stations WHERE name = ?', (st,)
|
'SELECT id FROM stations WHERE name = ?',
|
||||||
|
(st,)
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
arr = t['arrival']
|
arr = t['arrival']
|
||||||
dep = t['departure']
|
dep = t['departure']
|
||||||
@ -84,34 +97,45 @@ def save_schedule(entries: List[Dict]):
|
|||||||
INSERT OR REPLACE INTO schedules
|
INSERT OR REPLACE INTO schedules
|
||||||
(train_id, station_id, arrival_time, departure_time, travel_date)
|
(train_id, station_id, arrival_time, departure_time, travel_date)
|
||||||
VALUES (?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?)
|
||||||
''', (tid, sid, arr, dep, today))
|
''', (tid, station_id, arr, dep, today))
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
|
|
||||||
def get_schedule(route: Optional[str] = None, travel_date: Optional[str] = None) -> List[Dict]:
|
def get_schedule(route: Optional[str] = None, travel_date: Optional[str] = None) -> List[Dict]:
|
||||||
"""Повертає розклад поїздів. Якщо вказано route, фільтрує за ним."""
|
"""
|
||||||
|
Повертає розклад поїздів. Якщо вказано route, фільтрує за ним.
|
||||||
|
Повертає список:
|
||||||
|
[{'train_number': ..., 'route': ..., 'times': [...]}, ...]
|
||||||
|
"""
|
||||||
from datetime import date as _date
|
from datetime import date as _date
|
||||||
travel_date = travel_date or _date.today().isoformat()
|
travel_date = travel_date or _date.today().isoformat()
|
||||||
with sqlite3.connect(DB_PATH) as con:
|
with sqlite3.connect(DB_PATH) as con:
|
||||||
if route:
|
if route:
|
||||||
rows = con.execute('''
|
rows = con.execute('''
|
||||||
SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time
|
SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time
|
||||||
FROM schedules sc
|
FROM schedules sc
|
||||||
JOIN trains tr ON sc.train_id = tr.id
|
JOIN trains tr ON sc.train_id = tr.id
|
||||||
JOIN stations st ON sc.station_id = st.id
|
JOIN stations st ON sc.station_id = st.id
|
||||||
WHERE sc.travel_date = ? AND tr.route = ?
|
WHERE sc.travel_date = ? AND tr.route = ?
|
||||||
ORDER BY tr.train_number, st.id
|
ORDER BY tr.train_number, st.id
|
||||||
''', (travel_date, route)).fetchall()
|
''', (travel_date, route)).fetchall()
|
||||||
else:
|
else:
|
||||||
rows = con.execute('''
|
rows = con.execute('''
|
||||||
SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time
|
SELECT tr.train_number, tr.route, st.name, sc.arrival_time, sc.departure_time
|
||||||
FROM schedules sc
|
FROM schedules sc
|
||||||
JOIN trains tr ON sc.train_id = tr.id
|
JOIN trains tr ON sc.train_id = tr.id
|
||||||
JOIN stations st ON sc.station_id = st.id
|
JOIN stations st ON sc.station_id = st.id
|
||||||
WHERE sc.travel_date = ?
|
WHERE sc.travel_date = ?
|
||||||
ORDER BY tr.train_number, st.id
|
ORDER BY tr.train_number, st.id
|
||||||
''', (travel_date,)).fetchall()
|
''', (travel_date,)).fetchall()
|
||||||
|
|
||||||
schedule: Dict[tuple, List[Dict]] = {}
|
schedule: Dict[tuple, List[Dict]] = {}
|
||||||
for num, rt, station, arrival, departure in rows:
|
for num, rt, station, arrival, departure in rows:
|
||||||
schedule.setdefault((num, rt), []).append({'station': station, 'arrival': arrival, 'departure': departure})
|
schedule.setdefault((num, rt), []).append({
|
||||||
return [{'train_number': num, 'route': rt, 'times': times} for (num, rt), times in schedule.items()]
|
'station': station,
|
||||||
|
'arrival': arrival,
|
||||||
|
'departure': departure
|
||||||
|
})
|
||||||
|
return [
|
||||||
|
{'train_number': num, 'route': rt, 'times': times}
|
||||||
|
for (num, rt), times in schedule.items()
|
||||||
|
]
|
||||||
|
|||||||
35
parser.py
35
parser.py
@ -19,7 +19,7 @@ def parse_days(text: str) -> str:
|
|||||||
return '1111111'
|
return '1111111'
|
||||||
if text.startswith('крім'):
|
if text.startswith('крім'):
|
||||||
days = [d.strip(' .') for d in text.split('крім', 1)[1].split(',')]
|
days = [d.strip(' .') for d in text.split('крім', 1)[1].split(',')]
|
||||||
mask = [1]*7
|
mask = [1] * 7
|
||||||
for d in days:
|
for d in days:
|
||||||
idx = DAY_INDEX.get(d)
|
idx = DAY_INDEX.get(d)
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
@ -27,7 +27,7 @@ def parse_days(text: str) -> str:
|
|||||||
return ''.join(str(b) for b in mask)
|
return ''.join(str(b) for b in mask)
|
||||||
if text.startswith('по'):
|
if text.startswith('по'):
|
||||||
days = [d.strip(' .') for d in text.split('по', 1)[1].split(',')]
|
days = [d.strip(' .') for d in text.split('по', 1)[1].split(',')]
|
||||||
mask = [0]*7
|
mask = [0] * 7
|
||||||
for d in days:
|
for d in days:
|
||||||
idx = DAY_INDEX.get(d)
|
idx = DAY_INDEX.get(d)
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
@ -41,7 +41,7 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
|
|||||||
tab=1 — Київ→Ніжин,
|
tab=1 — Київ→Ніжин,
|
||||||
tab=2 — Ніжин→Київ.
|
tab=2 — Ніжин→Київ.
|
||||||
Повертає список поїздів з полями:
|
Повертає список поїздів з полями:
|
||||||
'train_number', 'days', 'route', 'times' (список словників station/arrival/departure).
|
'tid', 'train_number', 'days', 'route', 'times'.
|
||||||
"""
|
"""
|
||||||
# Завантаження HTML
|
# Завантаження HTML
|
||||||
if use_local:
|
if use_local:
|
||||||
@ -55,15 +55,11 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
|
|||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
prefix = f'div#tabs-trains{tab}'
|
prefix = f'div#tabs-trains{tab}'
|
||||||
|
|
||||||
# Таблиця з розкладом
|
# Таблиця розкладу
|
||||||
times_table = soup.select_one(f'{prefix} table.td_center')
|
times_table = soup.select_one(f'{prefix} table.td_center')
|
||||||
if not times_table:
|
if not times_table:
|
||||||
raise RuntimeError(f'Не знайдено таблицю розкладу для tab={tab}')
|
raise RuntimeError(f'Не знайдено таблицю розкладу для tab={tab}')
|
||||||
|
|
||||||
# Парсимо маршрути (по одному <td class="course"> на потяг)
|
|
||||||
route_tags = times_table.select('td.course')
|
|
||||||
routes = [tag.get_text(strip=True) for tag in route_tags]
|
|
||||||
|
|
||||||
# Список станцій (35)
|
# Список станцій (35)
|
||||||
station_tags = soup.select(
|
station_tags = soup.select(
|
||||||
f'{prefix} table.left tr.on a.et, '
|
f'{prefix} table.left tr.on a.et, '
|
||||||
@ -71,26 +67,41 @@ def fetch_schedule(tab: int = 1, use_local: bool = False) -> List[Dict]:
|
|||||||
)
|
)
|
||||||
stations = [a.get_text(strip=True) for a in station_tags]
|
stations = [a.get_text(strip=True) for a in station_tags]
|
||||||
|
|
||||||
# Заголовок з номерами потягів і днями курсування
|
# Ряди таблиці
|
||||||
trs = times_table.find_all('tr')
|
trs = times_table.find_all('tr')
|
||||||
|
|
||||||
|
# Рядок з номерами потягів та днями курсування
|
||||||
header_row = next(r for r in trs if r.find('td', class_='on_right_t'))
|
header_row = next(r for r in trs if r.find('td', class_='on_right_t'))
|
||||||
cells = header_row.find_all('td', class_='on_right_t')
|
train_cells = header_row.find_all('td', class_='on_right_t')
|
||||||
|
|
||||||
|
# Парсимо маршрути (<td class="course">) для кожного потяга
|
||||||
|
route_tags = times_table.select('td.course')
|
||||||
|
routes = [tag.get_text(strip=True) for tag in route_tags[:len(train_cells)]]
|
||||||
|
|
||||||
entries: List[Dict] = []
|
entries: List[Dict] = []
|
||||||
for idx, cell in enumerate(cells):
|
for idx, cell in enumerate(train_cells):
|
||||||
|
# Витягнути унікальний tid з href
|
||||||
|
a_tag = cell.find('a', class_='et')
|
||||||
|
href = a_tag['href'] # наприклад ".?tid=26397"
|
||||||
|
tid = href.split('tid=')[-1]
|
||||||
|
|
||||||
parts = cell.get_text(separator='|', strip=True).split('|')
|
parts = cell.get_text(separator='|', strip=True).split('|')
|
||||||
num = parts[0].rstrip(',').strip()
|
num = parts[0].rstrip(',').strip()
|
||||||
days = parse_days(parts[1] if len(parts) > 1 else 'щоденно')
|
days = parse_days(parts[1] if len(parts) > 1 else 'щоденно')
|
||||||
route = routes[idx] if idx < len(routes) else ''
|
route = routes[idx] if idx < len(routes) else ''
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
|
'tid': tid,
|
||||||
'train_number': num,
|
'train_number': num,
|
||||||
'days': days,
|
'days': days,
|
||||||
'route': route,
|
'route': route,
|
||||||
'times': []
|
'times': []
|
||||||
})
|
})
|
||||||
|
|
||||||
# Рядки з часами руху
|
# Рядки з часами руху (повинно бути 35)
|
||||||
time_rows = [r for r in trs if r.find('td', class_='q0') or r.find('td', class_='q1')]
|
time_rows = [r for r in trs if r.find('td', class_='q0') or r.find('td', class_='q1')]
|
||||||
|
|
||||||
|
# Збирання часу для кожного поїзда та станції
|
||||||
for idx, entry in enumerate(entries):
|
for idx, entry in enumerate(entries):
|
||||||
base = idx * 3
|
base = idx * 3
|
||||||
for si, row in enumerate(time_rows):
|
for si, row in enumerate(time_rows):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user