161 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			161 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| 
 | |
| import datetime
 | |
| import time
 | |
| import urllib.request
 | |
| 
 | |
| from bs4 import BeautifulSoup
 | |
| from datetime import datetime as dt
 | |
| 
 | |
| # @brief Checks the DWD pages for wind information
 | |
| # Format:
 | |
| #   Provides next update tine (updateTime) of the DWD page in UTC
 | |
| #   Provides a list of wind information (windData)
 | |
| #       - organized as a list of tuples
 | |
| #           - first element of tuple: GAFOR-IDs for the following wind information
 | |
| #           - second element of tuple: list of tuples of wind data
 | |
| #               - first element of wind data tuple: minimum altitude AMSL for this wind information
 | |
| #               - second element of wind data tuple: wind direction
 | |
| #               - third  element of wind data tuple: wind speed (KT)
 | |
| class DwdCrawler():
 | |
|     def __init__(self):
 | |
|         self.UpdateTime = None
 | |
|         self.WindData = None
 | |
| 
 | |
|     def parseGaforAreas(areas : str):
 | |
|         areas = areas.replace(':', '')
 | |
|         areas = areas.split(' ')[1]
 | |
|         areaIds = []
 | |
| 
 | |
|         # some IDs are lists
 | |
|         for segment in areas.split(','):
 | |
|             # check if we have range definitions or single IDs
 | |
|             borders = segment.split('-')
 | |
|             if 2 == len(borders):
 | |
|                 areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
 | |
|             else:
 | |
|                 areaIds.append(int(borders[0]))
 | |
| 
 | |
|         return areaIds
 | |
| 
 | |
|     def parseWindTableRow(row : str, table):
 | |
|         # get the columns
 | |
|         entries = row.split('|')
 | |
| 
 | |
|         # check if the line is invalid or we have the header
 | |
|         if 2 > len(entries) or 'AMSL' in entries[0]:
 | |
|             return table
 | |
| 
 | |
|         # parse the wind data
 | |
|         windData = entries[1].strip().split(' ')[0].split('/')
 | |
|         if 2 != len(windData):
 | |
|             return table
 | |
| 
 | |
|         # extend the table
 | |
|         altitude = entries[0].strip()
 | |
|         if 'FL' in altitude:
 | |
|             altitude = int(altitude.replace('FL', '')) * 100
 | |
|         else:
 | |
|             altitude = int(altitude.replace('FT', ''))
 | |
|         if 'VRB' == windData[0]:
 | |
|             row = ( altitude, 0, int(windData[1].replace('KT', '')) )
 | |
|         else:
 | |
|             row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
 | |
|         table.append(row)
 | |
| 
 | |
|         return table
 | |
| 
 | |
|     def parseNextUpdateTime(line : str):
 | |
|         entries = line.split(' ')
 | |
|         if 4 <= len(entries):
 | |
|             utcIndex = 2
 | |
|         if 'UTC' in entries[len(entries) - 2]:
 | |
|             utcIndex = len(entries) - 3
 | |
|         elif 'UTC' in entries[len(entries) - 1]:
 | |
|             utcIndex = len(entries - 2)
 | |
| 
 | |
|         currentUtc = dt.utcfromtimestamp(int(time.time()))
 | |
|         currentHour = int(currentUtc.strftime('%H'))
 | |
| 
 | |
|         # check if we have a day overlap
 | |
|         if currentHour > int(entries[utcIndex].split('.')[0]):
 | |
|             nextDay = currentUtc + datetime.timedelta(days=1)
 | |
|             date = nextDay.strftime('%Y-%m-%d')
 | |
|         else:
 | |
|             date = currentUtc.strftime('%Y-%m-%d')
 | |
| 
 | |
|         # create the new UTC update time
 | |
|         return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
 | |
| 
 | |
|     def parseGaforPage(self, url : str):
 | |
|         with urllib.request.urlopen(url) as site:
 | |
|             data = site.read().decode('utf-8')
 | |
|             site.close()
 | |
| 
 | |
|             parsed = BeautifulSoup(data, features='lxml')
 | |
| 
 | |
|             # search the info about the GAFOR areas
 | |
|             content = None
 | |
|             for element in parsed.body.find_all('pre'):
 | |
|                 content = element.text
 | |
| 
 | |
|             # analyze the received data
 | |
|             if None != content:
 | |
|                 windInformation = []
 | |
|                 nextUpdate = None
 | |
|                 windTable = []
 | |
|                 areaIds = None
 | |
| 
 | |
|                 # find all relevant information
 | |
|                 for line in content.splitlines():
 | |
|                     if '' == line:
 | |
|                         if 0 != len(windTable):
 | |
|                             windInformation.append(( areaIds, windTable ))
 | |
|                         areaIds = None
 | |
|                         windTable = []
 | |
|                     elif line.startswith('GAFOR-Gebiete'):
 | |
|                         areaIds = DwdCrawler.parseGaforAreas(line)
 | |
|                         windTable = []
 | |
|                     elif None != areaIds:
 | |
|                         windTable = DwdCrawler.parseWindTableRow(line, windTable)
 | |
|                     elif 'Aktualisierung erfolgt um ' in line:
 | |
|                         nextUpdate = DwdCrawler.parseNextUpdateTime(line)
 | |
| 
 | |
|                 # return the collected information
 | |
|                 if 0 == len(windInformation) or None == nextUpdate:
 | |
|                     return None, None
 | |
|                 else:
 | |
|                     return nextUpdate, windInformation
 | |
| 
 | |
|     def receiveWindData(self):
 | |
|         self.UpdateTime = None
 | |
|         self.WindData = None
 | |
| 
 | |
|         with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
 | |
|             data = site.read().decode('utf-8')
 | |
|             site.close()
 | |
| 
 | |
|             # find the pages of the GAFOR reports
 | |
|             pages = []
 | |
|             parsed = BeautifulSoup(data, features='lxml')
 | |
|             for link in parsed.body.find_all('a', title=True):
 | |
|                 if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
 | |
|                     # remove the jsession from the link
 | |
|                     pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
 | |
| 
 | |
|             # receive the wind data
 | |
|             self.UpdateTime = None
 | |
|             self.WindData = []
 | |
|             for page in pages:
 | |
|                 next, wind = self.parseGaforPage(page)
 | |
|                 if None != next:
 | |
|                     if None == self.UpdateTime or self.UpdateTime > next:
 | |
|                         self.UpdateTime = next
 | |
|                     self.WindData.extend(wind)
 | |
| 
 | |
|             # indicate that new wind data is available
 | |
|             if None != self.UpdateTime:
 | |
|                 return True
 | |
|             else:
 | |
|                 return False
 |