123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- #!/usr/bin/env python
- import datetime
- import time
- import urllib.request
- from bs4 import BeautifulSoup
- from datetime import datetime as dt
- # @brief Checks the DWD pages for wind information
- # Format:
- # Provides next update tine (updateTime) of the DWD page in UTC
- # Provides a list of wind information (windData)
- # - organized as a list of tuples
- # - first element of tuple: GAFOR-IDs for the following wind information
- # - second element of tuple: list of tuples of wind data
- # - first element of wind data tuple: minimum altitude AMSL for this wind information
- # - second element of wind data tuple: wind direction
- # - third element of wind data tuple: wind speed (KT)
- class DwdCrawler():
- def __init__(self):
- self.UpdateTime = None
- self.WindData = None
- def parseGaforAreas(areas : str):
- areas = areas.replace(':', '')
- areas = areas.split(' ')[1]
- areaIds = []
- # some IDs are lists
- for segment in areas.split(','):
- # check if we have range definitions or single IDs
- borders = segment.split('-')
- if 2 == len(borders):
- areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
- else:
- areaIds.append(int(borders[0]))
- return areaIds
- def parseWindTableRow(row : str, table):
- # get the columns
- entries = row.split('|')
- # check if the line is invalid or we have the header
- if 2 > len(entries) or 'AMSL' in entries[0]:
- return table
- # parse the wind data
- windData = entries[1].strip().split(' ')[0].split('/')
- if 2 != len(windData):
- return table
- # extend the table
- altitude = entries[0].strip()
- if 'FL' in altitude:
- altitude = int(altitude.replace('FL', '')) * 100
- else:
- altitude = int(altitude.replace('FT', ''))
- if 'VRB' == windData[0]:
- row = ( altitude, 0, int(windData[1].replace('KT', '')) )
- else:
- row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
- table.append(row)
- return table
- def parseNextUpdateTime(line : str):
- entries = line.split(' ')
- if 4 <= len(entries):
- utcIndex = 2
- if 'UTC' in entries[len(entries) - 2]:
- utcIndex = len(entries) - 3
- elif 'UTC' in entries[len(entries) - 1]:
- utcIndex = len(entries - 2)
- currentUtc = dt.utcfromtimestamp(int(time.time()))
- currentHour = int(currentUtc.strftime('%H'))
- # check if we have a day overlap
- if currentHour > int(entries[utcIndex].split('.')[0]):
- nextDay = currentUtc + datetime.timedelta(days=1)
- date = nextDay.strftime('%Y-%m-%d')
- else:
- date = currentUtc.strftime('%Y-%m-%d')
- # create the new UTC update time
- return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
- def parseGaforPage(self, url : str):
- with urllib.request.urlopen(url) as site:
- data = site.read().decode('utf-8')
- site.close()
- parsed = BeautifulSoup(data, features='lxml')
- # search the info about the GAFOR areas
- content = None
- for element in parsed.body.find_all('pre'):
- content = element.text
- # analyze the received data
- if None != content:
- windInformation = []
- nextUpdate = None
- windTable = []
- areaIds = None
- # find all relevant information
- for line in content.splitlines():
- if '' == line:
- if 0 != len(windTable):
- windInformation.append(( areaIds, windTable ))
- areaIds = None
- windTable = []
- elif line.startswith('GAFOR-Gebiete'):
- areaIds = DwdCrawler.parseGaforAreas(line)
- windTable = []
- elif None != areaIds:
- windTable = DwdCrawler.parseWindTableRow(line, windTable)
- elif 'Aktualisierung erfolgt um ' in line:
- nextUpdate = DwdCrawler.parseNextUpdateTime(line)
- # return the collected information
- if 0 == len(windInformation) or None == nextUpdate:
- return None, None
- else:
- return nextUpdate, windInformation
- def receiveWindData(self):
- self.UpdateTime = None
- self.WindData = None
- with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
- data = site.read().decode('utf-8')
- site.close()
- # find the pages of the GAFOR reports
- pages = []
- parsed = BeautifulSoup(data, features='lxml')
- for link in parsed.body.find_all('a', title=True):
- if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
- # remove the jsession from the link
- pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
- # receive the wind data
- self.UpdateTime = None
- self.WindData = []
- for page in pages:
- next, wind = self.parseGaforPage(page)
- if None != next:
- if None == self.UpdateTime or self.UpdateTime > next:
- self.UpdateTime = next
- self.WindData.extend(wind)
- # indicate that new wind data is available
- if None != self.UpdateTime:
- return True
- else:
- return False