160 regels
5.9 KiB
Python
160 regels
5.9 KiB
Python
#!/usr/bin/env python
|
|
|
|
import datetime
|
|
import time
|
|
import urllib.request
|
|
|
|
from bs4 import BeautifulSoup
|
|
from datetime import datetime as dt
|
|
|
|
# @brief Checks the DWD pages for wind information
|
|
# Format:
|
|
# Provides next update tine (updateTime) of the DWD page in UTC
|
|
# Provides a list of wind information (windData)
|
|
# - organized as a list of tuples
|
|
# - first element of tuple: GAFOR-IDs for the following wind information
|
|
# - second element of tuple: list of tuples of wind data
|
|
# - first element of wind data tuple: minimum altitude AMSL for this wind information
|
|
# - second element of wind data tuple: wind direction
|
|
# - third element of wind data tuple: wind speed (KT)
|
|
class DwdCrawler():
|
|
def __init__(self):
|
|
self.updateTime = None
|
|
self.windData = None
|
|
|
|
def parseGaforAreas(areas : str):
|
|
areas = areas.replace(':', '')
|
|
areas = areas.split(' ')[1]
|
|
areaIds = []
|
|
|
|
# some IDs are lists
|
|
for segment in areas.split(','):
|
|
# check if we have range definitions or single IDs
|
|
borders = segment.split('-')
|
|
if 2 == len(borders):
|
|
areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
|
|
else:
|
|
areaIds.append(int(borders[0]))
|
|
|
|
return areaIds
|
|
|
|
def parseWindTableRow(row : str, table):
|
|
# get the columns
|
|
entries = row.split('|')
|
|
|
|
# check if the line is invalid or we have the header
|
|
if 2 > len(entries) or 'AMSL' in entries[0]:
|
|
return table
|
|
|
|
# parse the wind data
|
|
windData = entries[1].strip().split(' ')[0].split('/')
|
|
if 2 != len(windData):
|
|
return table
|
|
|
|
# extend the table
|
|
altitude = entries[0].strip()
|
|
if 'FL' in altitude:
|
|
altitude = int(altitude.replace('FL', '')) * 100
|
|
else:
|
|
altitude = int(altitude.replace('FT', ''))
|
|
row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
|
|
table.append(row)
|
|
|
|
return table
|
|
|
|
def parseNextUpdateTime(line : str):
|
|
entries = line.split(' ')
|
|
if 4 <= len(entries):
|
|
utcIndex = 2
|
|
if 'UTC' in entries[len(entries) - 2]:
|
|
utcIndex = len(entries) - 3
|
|
elif 'UTC' in entries[len(entries) - 1]:
|
|
utcIndex = len(entries - 2)
|
|
|
|
currentUtc = dt.utcfromtimestamp(int(time.time()))
|
|
currentHour = int(currentUtc.strftime('%H'))
|
|
|
|
# check if we have a day overlap
|
|
if currentHour > int(entries[utcIndex].split('.')[0]):
|
|
nextDay = currentUtc + datetime.timedelta(days=1)
|
|
date = nextDay.strftime('%Y-%m-%d')
|
|
else:
|
|
date = currentUtc.strftime('%Y-%m-%d')
|
|
|
|
# create the new UTC update time
|
|
return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
|
|
|
|
def parseGaforPage(self, url : str):
|
|
with urllib.request.urlopen(url) as site:
|
|
data = site.read().decode('utf-8')
|
|
site.close()
|
|
|
|
parsed = BeautifulSoup(data, features='lxml')
|
|
|
|
# search the info about the GAFOR areas
|
|
content = None
|
|
for element in parsed.body.find_all('pre'):
|
|
content = element.text
|
|
|
|
# analyze the received data
|
|
if None != content:
|
|
windInformation = []
|
|
nextUpdate = None
|
|
windTable = []
|
|
areaIds = None
|
|
|
|
# find all relevant information
|
|
for line in content.splitlines():
|
|
if '' == line:
|
|
if 0 != len(windTable):
|
|
windInformation.append(( areaIds, windTable ))
|
|
areaIds = None
|
|
windTable = []
|
|
elif line.startswith('GAFOR-Gebiete'):
|
|
areaIds = DwdCrawler.parseGaforAreas(line)
|
|
windTable = []
|
|
elif None != areaIds:
|
|
windTable = DwdCrawler.parseWindTableRow(line, windTable)
|
|
elif 'Aktualisierung erfolgt um ' in line:
|
|
nextUpdate = DwdCrawler.parseNextUpdateTime(line)
|
|
|
|
# return the collected information
|
|
if 0 == len(windInformation) or None == nextUpdate:
|
|
return None, None
|
|
else:
|
|
return nextUpdate, windInformation
|
|
|
|
def receiveWindData(self):
|
|
self.updateTime = None
|
|
self.windData = None
|
|
|
|
with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
|
|
data = site.read().decode('utf-8')
|
|
site.close()
|
|
|
|
# find the pages of the GAFOR reports
|
|
pages = []
|
|
parsed = BeautifulSoup(data, features='lxml')
|
|
for link in parsed.body.find_all('a', title=True):
|
|
if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
|
|
# remove the jsession from the link
|
|
pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
|
|
|
|
# receive the wind data
|
|
self.updateTime = None
|
|
self.windData = []
|
|
for page in pages:
|
|
next, wind = self.parseGaforPage(page)
|
|
if None != next:
|
|
if None == self.updateTime or self.updateTime > next:
|
|
self.updateTime = next
|
|
self.windData.extend(wind)
|
|
|
|
# indicate that new wind data is available
|
|
if None != self.updateTime:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
return False
|