|
@@ -0,0 +1,157 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+
|
|
|
+import datetime
|
|
|
+import time
|
|
|
+import urllib.request
|
|
|
+
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+from datetime import datetime as dt
|
|
|
+from threading import Thread
|
|
|
+
|
|
|
+# @brief Checks the DWD pages for wind information
|
|
|
+# Format:
|
|
|
+# Provides next update tine (updateTime) of the DWD page in UTC
|
|
|
+# Provides a list of wind information (windData)
|
|
|
+# - organized as a list of tuples
|
|
|
+# - first element of tuple: GAFOR-IDs for the following wind information
|
|
|
+# - second element of tuple: list of tuples of wind data
|
|
|
+# - first element of wind data tuple: minimum altitude AMSL for this wind information
|
|
|
+# - second element of wind data tuple: wind direction
|
|
|
+# - third element of wind data tuple: wind speed (KT)
|
|
|
+class DwdCrawler(Thread):
|
|
|
+ def __init__(self):
|
|
|
+ Thread.__init__(self)
|
|
|
+ self.dataAvailable = False
|
|
|
+ self.executing = True
|
|
|
+ self.start()
|
|
|
+
|
|
|
+ def parseGaforAreas(areas : str):
|
|
|
+ areas = areas.replace(':', '')
|
|
|
+ areas = areas.split(' ')[1]
|
|
|
+ areaIds = []
|
|
|
+
|
|
|
+ # some IDs are lists
|
|
|
+ for segment in areas.split(','):
|
|
|
+ # check if we have range definitions or single IDs
|
|
|
+ borders = segment.split('-')
|
|
|
+ if 2 == len(borders):
|
|
|
+ areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
|
|
|
+ else:
|
|
|
+ areaIds.append(int(borders[0]))
|
|
|
+
|
|
|
+ return areaIds
|
|
|
+
|
|
|
+ def parseWindTableRow(row : str, table):
|
|
|
+ # get the columns
|
|
|
+ entries = row.split('|')
|
|
|
+
|
|
|
+ # check if the line is invalid or we have the header
|
|
|
+ if 2 > len(entries) or 'AMSL' in entries[0]:
|
|
|
+ return table
|
|
|
+
|
|
|
+ # parse the wind data
|
|
|
+ windData = entries[1].strip().split(' ')[0].split('/')
|
|
|
+ if 2 != len(windData):
|
|
|
+ return table
|
|
|
+
|
|
|
+ # extend the table
|
|
|
+ altitude = entries[0].strip()
|
|
|
+ if 'FL' in altitude:
|
|
|
+ altitude = int(altitude.replace('FL', '')) * 100
|
|
|
+ else:
|
|
|
+ altitude = int(altitude.replace('FT', ''))
|
|
|
+ row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
|
|
|
+ table.append(row)
|
|
|
+
|
|
|
+ return table
|
|
|
+
|
|
|
+ def parseNextUpdateTime(line : str):
|
|
|
+ entries = line.split(' ')
|
|
|
+ if 4 <= len(entries):
|
|
|
+ utcIndex = 2
|
|
|
+ if 'UTC' in entries[len(entries) - 2]:
|
|
|
+ utcIndex = len(entries) - 3
|
|
|
+ elif 'UTC' in entries[len(entries) - 1]:
|
|
|
+ utcIndex = len(entries - 2)
|
|
|
+
|
|
|
+ currentUtc = dt.utcfromtimestamp(int(time.time()))
|
|
|
+ currentHour = int(currentUtc.strftime('%H'))
|
|
|
+
|
|
|
+ # check if we have a day overlap
|
|
|
+ if currentHour > int(entries[utcIndex].split('.')[0]):
|
|
|
+ nextDay = currentUtc + datetime.timedelta(days=1)
|
|
|
+ date = nextDay.strftime('%Y-%m-%d')
|
|
|
+ else:
|
|
|
+ date = currentUtc.strftime('%Y-%m-%d')
|
|
|
+
|
|
|
+ # create the new UTC update time
|
|
|
+ return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
|
|
|
+
|
|
|
+ def parseGaforPage(self, url : str):
|
|
|
+ with urllib.request.urlopen(url) as site:
|
|
|
+ data = site.read().decode('utf-8')
|
|
|
+ site.close()
|
|
|
+
|
|
|
+ parsed = BeautifulSoup(data, features='lxml')
|
|
|
+
|
|
|
+ # search the info about the GAFOR areas
|
|
|
+ content = None
|
|
|
+ for element in parsed.body.find_all('pre'):
|
|
|
+ content = element.text
|
|
|
+
|
|
|
+ # analyze the received data
|
|
|
+ if None != content:
|
|
|
+ windInformation = []
|
|
|
+ udpdateTime = None
|
|
|
+ windTable = []
|
|
|
+ areaIds = None
|
|
|
+
|
|
|
+ # find all relevant information
|
|
|
+ for line in content.splitlines():
|
|
|
+ if '' == line:
|
|
|
+ if 0 != len(windTable):
|
|
|
+ windInformation.append(( areaIds, windTable ))
|
|
|
+ areaIds = None
|
|
|
+ windTable = []
|
|
|
+ elif line.startswith('GAFOR-Gebiete'):
|
|
|
+ areaIds = DwdCrawler.parseGaforAreas(line)
|
|
|
+ windTable = []
|
|
|
+ elif None != areaIds:
|
|
|
+ windTable = DwdCrawler.parseWindTableRow(line, windTable)
|
|
|
+ elif 'Aktualisierung erfolgt um ' in line:
|
|
|
+ updateTime = DwdCrawler.parseNextUpdateTime(line)
|
|
|
+
|
|
|
+ # return the collected information
|
|
|
+ if 0 != len(windInformation) and None != updateTime:
|
|
|
+ return updateTime, windInformation
|
|
|
+ else:
|
|
|
+ return None
|
|
|
+
|
|
|
+ def run(self):
|
|
|
+ with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
|
|
|
+ data = site.read().decode('utf-8')
|
|
|
+ site.close()
|
|
|
+
|
|
|
+ # find the pages of the GAFOR reports
|
|
|
+ pages = []
|
|
|
+ parsed = BeautifulSoup(data, features='lxml')
|
|
|
+ for link in parsed.body.find_all('a', title=True):
|
|
|
+ if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
|
|
|
+ # remove the jsession from the link
|
|
|
+ pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
|
|
|
+
|
|
|
+ # receive the wind data
|
|
|
+ self.updateTime = None
|
|
|
+ self.windData = []
|
|
|
+ for page in pages:
|
|
|
+ next, wind = self.parseGaforPage(page)
|
|
|
+ if None != next:
|
|
|
+ if None == self.updateTime or self.updateTime > next:
|
|
|
+ self.updateTime = next
|
|
|
+ self.windData.extend(wind)
|
|
|
+
|
|
|
+ # indicate that new wind data is available
|
|
|
+ if None != self.updateTime:
|
|
|
+ self.dataAvailable = True
|
|
|
+
|
|
|
+ self.executing = False
|