DwdCrawler.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #!/usr/bin/env python
  2. import datetime
  3. import time
  4. import urllib.request
  5. from bs4 import BeautifulSoup
  6. from datetime import datetime as dt
  7. # @brief Checks the DWD pages for wind information
  8. # Format:
  9. # Provides next update tine (updateTime) of the DWD page in UTC
  10. # Provides a list of wind information (windData)
  11. # - organized as a list of tuples
  12. # - first element of tuple: GAFOR-IDs for the following wind information
  13. # - second element of tuple: list of tuples of wind data
  14. # - first element of wind data tuple: minimum altitude AMSL for this wind information
  15. # - second element of wind data tuple: wind direction
  16. # - third element of wind data tuple: wind speed (KT)
  17. class DwdCrawler():
  18. def __init__(self):
  19. self.UpdateTime = None
  20. self.WindData = None
  21. def parseGaforAreas(areas : str):
  22. areas = areas.replace(':', '')
  23. areas = areas.split(' ')[1]
  24. areaIds = []
  25. # some IDs are lists
  26. for segment in areas.split(','):
  27. # check if we have range definitions or single IDs
  28. borders = segment.split('-')
  29. if 2 == len(borders):
  30. areaIds.extend(range(int(borders[0]), int(borders[1]) + 1))
  31. else:
  32. areaIds.append(int(borders[0]))
  33. return areaIds
  34. def parseWindTableRow(row : str, table):
  35. # get the columns
  36. entries = row.split('|')
  37. # check if the line is invalid or we have the header
  38. if 2 > len(entries) or 'AMSL' in entries[0]:
  39. return table
  40. # parse the wind data
  41. windData = entries[1].strip().split(' ')[0].split('/')
  42. if 2 != len(windData):
  43. return table
  44. # extend the table
  45. altitude = entries[0].strip()
  46. if 'FL' in altitude:
  47. altitude = int(altitude.replace('FL', '')) * 100
  48. else:
  49. altitude = int(altitude.replace('FT', ''))
  50. if 'VRB' == windData[0]:
  51. row = ( altitude, 0, int(windData[1].replace('KT', '')) )
  52. else:
  53. row = ( altitude, int(windData[0]), int(windData[1].replace('KT', '')) )
  54. table.append(row)
  55. return table
  56. def parseNextUpdateTime(line : str):
  57. entries = line.split(' ')
  58. if 4 <= len(entries):
  59. utcIndex = 2
  60. if 'UTC' in entries[len(entries) - 2]:
  61. utcIndex = len(entries) - 3
  62. elif 'UTC' in entries[len(entries) - 1]:
  63. utcIndex = len(entries - 2)
  64. currentUtc = dt.utcfromtimestamp(int(time.time()))
  65. currentHour = int(currentUtc.strftime('%H'))
  66. # check if we have a day overlap
  67. if currentHour > int(entries[utcIndex].split('.')[0]):
  68. nextDay = currentUtc + datetime.timedelta(days=1)
  69. date = nextDay.strftime('%Y-%m-%d')
  70. else:
  71. date = currentUtc.strftime('%Y-%m-%d')
  72. # create the new UTC update time
  73. return dt.strptime(date + ' ' + entries[utcIndex] + '+0000', '%Y-%m-%d %H.%M%z')
  74. def parseGaforPage(self, url : str):
  75. with urllib.request.urlopen(url) as site:
  76. data = site.read().decode('utf-8')
  77. site.close()
  78. parsed = BeautifulSoup(data, features='lxml')
  79. # search the info about the GAFOR areas
  80. content = None
  81. for element in parsed.body.find_all('pre'):
  82. content = element.text
  83. # analyze the received data
  84. if None != content:
  85. windInformation = []
  86. nextUpdate = None
  87. windTable = []
  88. areaIds = None
  89. # find all relevant information
  90. for line in content.splitlines():
  91. if '' == line:
  92. if 0 != len(windTable):
  93. for id in areaIds:
  94. windInformation.append([ id, windTable ])
  95. areaIds = None
  96. windTable = []
  97. elif line.startswith('GAFOR-Gebiete'):
  98. areaIds = DwdCrawler.parseGaforAreas(line)
  99. windTable = []
  100. elif None != areaIds:
  101. windTable = DwdCrawler.parseWindTableRow(line, windTable)
  102. elif 'Aktualisierung erfolgt um ' in line:
  103. nextUpdate = DwdCrawler.parseNextUpdateTime(line)
  104. # return the collected information
  105. if 0 == len(windInformation) or None == nextUpdate:
  106. return None, None
  107. else:
  108. return nextUpdate, windInformation
  109. def receiveWindData(self):
  110. self.UpdateTime = None
  111. self.WindData = None
  112. with urllib.request.urlopen('https://www.dwd.de/DE/fachnutzer/luftfahrt/teaser/luftsportberichte/luftsportberichte_node.html') as site:
  113. data = site.read().decode('utf-8')
  114. site.close()
  115. # find the pages of the GAFOR reports
  116. pages = []
  117. parsed = BeautifulSoup(data, features='lxml')
  118. for link in parsed.body.find_all('a', title=True):
  119. if 'node' in link['href'] and 'Flugwetterprognose' in link['title']:
  120. # remove the jsession from the link
  121. pages.append('https://www.dwd.de/' + link['href'].split(';')[0])
  122. # receive the wind data
  123. self.UpdateTime = None
  124. self.WindData = {}
  125. for page in pages:
  126. next, wind = self.parseGaforPage(page)
  127. if None != next:
  128. if None == self.UpdateTime or self.UpdateTime > next:
  129. self.UpdateTime = next
  130. for gafor in wind:
  131. self.WindData[gafor[0]] = gafor[1]
  132. # indicate that new wind data is available
  133. if None != self.UpdateTime:
  134. return True
  135. else:
  136. return False