123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157 |
- import datetime
- import time
- import urllib.request
- from bs4 import BeautifulSoup
- from datetime import datetime as dt
- from threading import Thread
- class DwdCrawler(Thread):
- def __init__(self):
- Thread.__init__(self)
- self.dataAvailable = False
- self.executing = True
- self.start()
- def parseGaforAreas(areas : str):
- areas = areas.replace(':', '')
- areas = areas.split(' ')[1]
- areaIds = []
-
- for segment in areas.split(','):
-
- borders = segment.split('-')
- if 2 == len(borders):
- areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
- else:
- areaIds.append(int(borders[0]))
- return areaIds
- def parseWindTableRow(row : str, table):
-
- entries = row.split('|')
-
- if 2 > len(entries) or 'AMSL' in entries[0]:
- return table
-
- windData = entries[1].strip().split(' ')[0].split('/')
- if 2 != len(windData):
- return table
-
- altitude = entries[0].strip()
- if 'FL' in altitude:
- altitude = int(altitude.replace('FL', '')) * 100
- else:
- altitude = int(altitude.replace('FT', ''))
- row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
- table.append(row)
- return table
- def parseNextUpdateTime(line : str):
- entries = line.split(' ')
- if 4 <= len(entries):
- utcIndex = 2
- if 'UTC' in entries[len(entries) - 2]:
- utcIndex = len(entries) - 3
- elif 'UTC' in entries[len(entries) - 1]:
- utcIndex = len(entries - 2)
- currentUtc = dt.utcfromtimestamp(int(time.time()))
- currentHour = int(currentUtc.strftime('%H'))
-
- if currentHour > int(entries[utcIndex].split('.')[0]):
- nextDay = currentUtc + datetime.timedelta(days=1)
- date = nextDay.strftime('%Y-%m-%d')
- else:
- date = currentUtc.strftime('%Y-%m-%d')
-
- return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
- def parseGaforPage(self, url : str):
- with urllib.request.urlopen(url) as site:
- data = site.read().decode('utf-8')
- site.close()
- parsed = BeautifulSoup(data, features='lxml')
-
- content = None
- for element in parsed.body.find_all('pre'):
- content = element.text
-
- if None != content:
- windInformation = []
- udpdateTime = None
- windTable = []
- areaIds = None
-
- for line in content.splitlines():
- if '' == line:
- if 0 != len(windTable):
- windInformation.append(( areaIds, windTable ))
- areaIds = None
- windTable = []
- elif line.startswith('GAFOR-Gebiete'):
- areaIds = DwdCrawler.parseGaforAreas(line)
- windTable = []
- elif None != areaIds:
- windTable = DwdCrawler.parseWindTableRow(line, windTable)
- elif 'Aktualisierung erfolgt um ' in line:
- updateTime = DwdCrawler.parseNextUpdateTime(line)
-
- if 0 != len(windInformation) and None != updateTime:
- return updateTime, windInformation
- else:
- return None
- def run(self):
- with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
- data = site.read().decode('utf-8')
- site.close()
-
- pages = []
- parsed = BeautifulSoup(data, features='lxml')
- for link in parsed.body.find_all('a', title=True):
- if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
-
- pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
-
- self.updateTime = None
- self.windData = []
- for page in pages:
- next, wind = self.parseGaforPage(page)
- if None != next:
- if None == self.updateTime or self.updateTime > next:
- self.updateTime = next
- self.windData.extend(wind)
-
- if None != self.updateTime:
- self.dataAvailable = True
- self.executing = False
|