DwdCrawler.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. #!/usr/bin/env python
  2. import datetime
  3. import time
  4. import urllib.request
  5. from bs4 import BeautifulSoup
  6. from datetime import datetime as dt
  7. # @brief Checks the DWD pages for wind information
  8. # Format:
  9. # Provides next update tine (updateTime) of the DWD page in UTC
  10. # Provides a list of wind information (windData)
  11. # - organized as a list of tuples
  12. # - first element of tuple: GAFOR-IDs for the following wind information
  13. # - second element of tuple: list of tuples of wind data
  14. # - first element of wind data tuple: minimum altitude AMSL for this wind information
  15. # - second element of wind data tuple: wind direction
  16. # - third element of wind data tuple: wind speed (KT)
  17. class DwdCrawler():
  18. def __init__(self):
  19. self.updateTime = None
  20. self.windData = None
  21. def parseGaforAreas(areas : str):
  22. areas = areas.replace(':', '')
  23. areas = areas.split(' ')[1]
  24. areaIds = []
  25. # some IDs are lists
  26. for segment in areas.split(','):
  27. # check if we have range definitions or single IDs
  28. borders = segment.split('-')
  29. if 2 == len(borders):
  30. areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
  31. else:
  32. areaIds.append(int(borders[0]))
  33. return areaIds
  34. def parseWindTableRow(row : str, table):
  35. # get the columns
  36. entries = row.split('|')
  37. # check if the line is invalid or we have the header
  38. if 2 > len(entries) or 'AMSL' in entries[0]:
  39. return table
  40. # parse the wind data
  41. windData = entries[1].strip().split(' ')[0].split('/')
  42. if 2 != len(windData):
  43. return table
  44. # extend the table
  45. altitude = entries[0].strip()
  46. if 'FL' in altitude:
  47. altitude = int(altitude.replace('FL', '')) * 100
  48. else:
  49. altitude = int(altitude.replace('FT', ''))
  50. row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
  51. table.append(row)
  52. return table
  53. def parseNextUpdateTime(line : str):
  54. entries = line.split(' ')
  55. if 4 <= len(entries):
  56. utcIndex = 2
  57. if 'UTC' in entries[len(entries) - 2]:
  58. utcIndex = len(entries) - 3
  59. elif 'UTC' in entries[len(entries) - 1]:
  60. utcIndex = len(entries - 2)
  61. currentUtc = dt.utcfromtimestamp(int(time.time()))
  62. currentHour = int(currentUtc.strftime('%H'))
  63. # check if we have a day overlap
  64. if currentHour > int(entries[utcIndex].split('.')[0]):
  65. nextDay = currentUtc + datetime.timedelta(days=1)
  66. date = nextDay.strftime('%Y-%m-%d')
  67. else:
  68. date = currentUtc.strftime('%Y-%m-%d')
  69. # create the new UTC update time
  70. return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
  71. def parseGaforPage(self, url : str):
  72. with urllib.request.urlopen(url) as site:
  73. data = site.read().decode('utf-8')
  74. site.close()
  75. parsed = BeautifulSoup(data, features='lxml')
  76. # search the info about the GAFOR areas
  77. content = None
  78. for element in parsed.body.find_all('pre'):
  79. content = element.text
  80. # analyze the received data
  81. if None != content:
  82. windInformation = []
  83. nextUpdate = None
  84. windTable = []
  85. areaIds = None
  86. # find all relevant information
  87. for line in content.splitlines():
  88. if '' == line:
  89. if 0 != len(windTable):
  90. windInformation.append(( areaIds, windTable ))
  91. areaIds = None
  92. windTable = []
  93. elif line.startswith('GAFOR-Gebiete'):
  94. areaIds = DwdCrawler.parseGaforAreas(line)
  95. windTable = []
  96. elif None != areaIds:
  97. windTable = DwdCrawler.parseWindTableRow(line, windTable)
  98. elif 'Aktualisierung erfolgt um ' in line:
  99. nextUpdate = DwdCrawler.parseNextUpdateTime(line)
  100. # return the collected information
  101. if 0 == len(windInformation) or None == nextUpdate:
  102. return None, None
  103. else:
  104. return nextUpdate, windInformation
  105. def receiveWindData(self):
  106. self.updateTime = None
  107. self.windData = None
  108. with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
  109. data = site.read().decode('utf-8')
  110. site.close()
  111. # find the pages of the GAFOR reports
  112. pages = []
  113. parsed = BeautifulSoup(data, features='lxml')
  114. for link in parsed.body.find_all('a', title=True):
  115. if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
  116. # remove the jsession from the link
  117. pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
  118. # receive the wind data
  119. self.updateTime = None
  120. self.windData = []
  121. for page in pages:
  122. next, wind = self.parseGaforPage(page)
  123. if None != next:
  124. if None == self.updateTime or self.updateTime > next:
  125. self.updateTime = next
  126. self.windData.extend(wind)
  127. # indicate that new wind data is available
  128. if None != self.updateTime:
  129. return True
  130. else:
  131. return False