#!/usr/bin/env python import datetime import time import urllib.request from bs4 import BeautifulSoup from datetime import datetime as dt # @brief Checks the DWD pages for wind information # Format: # Provides next update tine (updateTime) of the DWD page in UTC # Provides a list of wind information (windData) # - organized as a list of tuples # - first element of tuple: GAFOR-IDs for the following wind information # - second element of tuple: list of tuples of wind data # - first element of wind data tuple: minimum altitude AMSL for this wind information # - second element of wind data tuple: wind direction # - third element of wind data tuple: wind speed (KT) class DwdCrawler(): def __init__(self): self.UpdateTime = None self.WindData = None def parseGaforAreas(areas : str): areas = areas.replace(':', '') areas = areas.split(' ')[1] areaIds = [] # some IDs are lists for segment in areas.split(','): # check if we have range definitions or single IDs borders = segment.split('-') if 2 == len(borders): areaIds.extend(range(int(borders[0]), int(borders[1]) + 1)) else: areaIds.append(int(borders[0])) return areaIds def parseWindTableRow(row : str, table): # get the columns entries = row.split('|') # check if the line is invalid or we have the header if 2 > len(entries) or 'AMSL' in entries[0]: return table # parse the wind data windData = entries[1].strip().split(' ')[0].split('/') if 2 != len(windData): return table # extend the table altitude = entries[0].strip() if 'FL' in altitude: altitude = int(altitude.replace('FL', '')) * 100 else: altitude = int(altitude.replace('FT', '')) if 'VRB' == windData[0]: row = ( altitude, 0, int(windData[1].replace('KT', '')) ) else: row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) ) table.append(row) return table def parseNextUpdateTime(line : str): entries = line.split(' ') if 4 <= len(entries): utcIndex = 2 if 'UTC' in entries[len(entries) - 2]: utcIndex = len(entries) - 3 elif 'UTC' in entries[len(entries) - 1]: utcIndex = len(entries - 2) currentUtc = dt.utcfromtimestamp(int(time.time())) currentHour = int(currentUtc.strftime('%H')) # check if we have a day overlap if currentHour > int(entries[utcIndex].split('.')[0]): nextDay = currentUtc + datetime.timedelta(days=1) date = nextDay.strftime('%Y-%m-%d') else: date = currentUtc.strftime('%Y-%m-%d') # create the new UTC update time return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z') def parseGaforPage(self, url : str): with urllib.request.urlopen(url) as site: data = site.read().decode('utf-8') site.close() parsed = BeautifulSoup(data, features='lxml') # search the info about the GAFOR areas content = None for element in parsed.body.find_all('pre'): content = element.text # analyze the received data if None != content: windInformation = [] nextUpdate = None windTable = [] areaIds = None # find all relevant information for line in content.splitlines(): if '' == line: if 0 != len(windTable): for id in areaIds: windInformation.append([ id, windTable ]) areaIds = None windTable = [] elif line.startswith('GAFOR-Gebiete'): areaIds = DwdCrawler.parseGaforAreas(line) windTable = [] elif None != areaIds: windTable = DwdCrawler.parseWindTableRow(line, windTable) elif 'Aktualisierung erfolgt um ' in line: nextUpdate = DwdCrawler.parseNextUpdateTime(line) # return the collected information if 0 == len(windInformation) or None == nextUpdate: return None, None else: return nextUpdate, windInformation def receiveWindData(self): self.UpdateTime = None self.WindData = None with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site: data = site.read().decode('utf-8') site.close() # find the pages of the GAFOR reports pages = [] parsed = BeautifulSoup(data, features='lxml') for link in parsed.body.find_all('a', title=True): if 'node' in link['href'] and 'Flugwetterprognose' in link['title']: # remove the jsession from the link pages.append('https://www.dwd.de/' + link['href'].split(';')[0]) # receive the wind data self.UpdateTime = None self.WindData = {} for page in pages: next, wind = self.parseGaforPage(page) if None != next: if None == self.UpdateTime or self.UpdateTime > next: self.UpdateTime = next for gafor in wind: self.WindData[gafor[0]] = gafor[1] # indicate that new wind data is available if None != self.UpdateTime: return True else: return False