|
@@ -0,0 +1,151 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+
|
|
|
+import argparse
|
|
|
+import configparser
|
|
|
+import os
|
|
|
+import urllib.request
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+from aman.types.PerformanceData import PerformanceData
|
|
|
+
|
|
|
+def findAircraftPages(rooturl : str, suburl : str):
|
|
|
+ aircrafts = []
|
|
|
+
|
|
|
+ with urllib.request.urlopen(rooturl + suburl) as site:
|
|
|
+ data = site.read().decode('utf-8')
|
|
|
+ site.close()
|
|
|
+
|
|
|
+ parsed = BeautifulSoup(data, features='lxml')
|
|
|
+
|
|
|
+ for link in parsed.body.find_all('a', title=True):
|
|
|
+ split = link['href'].split('/')
|
|
|
+ if 3 == len(split) and split[2] == link['title'] and 'Category' not in link['title'] and 'Special' not in link['href']:
|
|
|
+ aircrafts.append(rooturl + link['href'])
|
|
|
+
|
|
|
+ for link in parsed.body.find_all('a', attrs={ 'title': 'Category:Aircraft' }):
|
|
|
+ if 'previous' not in link.text:
|
|
|
+ aircrafts.extend(findAircraftPages(rooturl, link['href']))
|
|
|
+
|
|
|
+ return aircrafts
|
|
|
+
|
|
|
+def findAndParseEntry(tableRow, startIdx, substring, default):
|
|
|
+ while 0 < startIdx:
|
|
|
+ if substring in tableRow[startIdx].text:
|
|
|
+ split = tableRow[startIdx].text.split(' ')
|
|
|
+ if 1 >= len(split):
|
|
|
+ return default, startIdx - 2
|
|
|
+ else:
|
|
|
+ return int(split[0]), startIdx - 2
|
|
|
+ else:
|
|
|
+ startIdx -= 1
|
|
|
+
|
|
|
+ return 0, -1
|
|
|
+
|
|
|
+def findAndParseSpeedEntry(tableRow, startIdx, default):
|
|
|
+ return findAndParseEntry(tableRow, startIdx, 'kts', default)
|
|
|
+
|
|
|
+def findAndParseRodEntry(tableRow, startIdx, default):
|
|
|
+ return findAndParseEntry(tableRow, startIdx, 'ft/min', default)
|
|
|
+
|
|
|
+def parsePerformanceEntries(tableRowSpeeds, tableRowRODs):
|
|
|
+ speeds = []
|
|
|
+ rods = []
|
|
|
+
|
|
|
+ # parse the speed data
|
|
|
+ idx = len(tableRowSpeeds) - 1
|
|
|
+ while 0 < idx:
|
|
|
+ parsed = findAndParseSpeedEntry(tableRowSpeeds, idx, 140 if 0 == len(speeds) else 250)
|
|
|
+ if 0 < idx:
|
|
|
+ speeds.append(parsed[0])
|
|
|
+ idx = parsed[1]
|
|
|
+
|
|
|
+ # parse the ROD data
|
|
|
+ idx = len(tableRowRODs) - 1
|
|
|
+ while 0 < idx:
|
|
|
+ parsed = findAndParseRodEntry(tableRowRODs, idx, 2000)
|
|
|
+ if 0 < idx:
|
|
|
+ rods.append(parsed[0])
|
|
|
+ idx = parsed[1]
|
|
|
+
|
|
|
+ return speeds, rods
|
|
|
+
|
|
|
+def parsePerformanceData(url : str):
|
|
|
+ with urllib.request.urlopen(url) as site:
|
|
|
+ data = site.read().decode('utf-8')
|
|
|
+ site.close()
|
|
|
+
|
|
|
+ # check if we find the ICAO code
|
|
|
+ parsed = BeautifulSoup(data, features='lxml')
|
|
|
+ icao = parsed.body.find('h5', attrs={ 'id' : 'siteSub', 'class' : 'subtitle'})
|
|
|
+ if None == icao or '' == icao.text:
|
|
|
+ return False, None
|
|
|
+
|
|
|
+ aircraft = PerformanceData(icao.text)
|
|
|
+ performanceTable = parsed.body.find('table', attrs={ 'class' : 'wikitable', 'style' : 'font-size: 90%;' })
|
|
|
+ if None == performanceTable or None == performanceTable.find_all('tr')[1] or None == performanceTable.find_all('tr')[2]:
|
|
|
+ return False, None
|
|
|
+
|
|
|
+ speeds, rods = parsePerformanceEntries(performanceTable.find_all('tr')[1].find_all('td'),
|
|
|
+ performanceTable.find_all('tr')[2].find_all('td'))
|
|
|
+ if 10 > len(speeds):
|
|
|
+ speeds.insert(1, speeds[1])
|
|
|
+
|
|
|
+ # create the speed data
|
|
|
+ if len(speeds) >= 4:
|
|
|
+ aircraft.speedApproach = speeds[0]
|
|
|
+ aircraft.speedBelowFL100 = speeds[1]
|
|
|
+ aircraft.speedAboveFL100 = speeds[2]
|
|
|
+ aircraft.speedAboveFL240 = speeds[3]
|
|
|
+ # create the ROD data
|
|
|
+ if len(rods) >= 3:
|
|
|
+ aircraft.rodBelowFL100 = rods[0]
|
|
|
+ aircraft.rodAboveFL100 = rods[1]
|
|
|
+ aircraft.rodAboveFL240 = rods[2]
|
|
|
+
|
|
|
+ return len(speeds) >= 4 and len(rods) >= 3, aircraft
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ # create the commandline parser
|
|
|
+ parser = argparse.ArgumentParser(description='Extract the aircraft performace data')
|
|
|
+ parser.add_argument('directory', help='Directory where to store the performance data configuration')
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ # create the directory if it does not exist
|
|
|
+ if not os.path.exists(args.directory):
|
|
|
+ os.makedirs(args.directory)
|
|
|
+
|
|
|
+ # parse the aircrafts
|
|
|
+ links = findAircraftPages('https://www.skybrary.aero', '/index.php?title=Category:Aircraft')
|
|
|
+ print('Found ' + str(len(links)) + ' aircrafts')
|
|
|
+
|
|
|
+ aircrafts = []
|
|
|
+ parsed = 0
|
|
|
+ for link in links:
|
|
|
+ valid, aircraft = parsePerformanceData(link)
|
|
|
+
|
|
|
+ parsed += 1
|
|
|
+ print('Parsed ' + str(parsed) + ' of ' + str(len(links)), end='\r')
|
|
|
+
|
|
|
+ if False == valid:
|
|
|
+ print('Unable to find performance data for ' + link)
|
|
|
+ continue
|
|
|
+
|
|
|
+ aircrafts.append(aircraft)
|
|
|
+
|
|
|
+ print('Successfully parsed ' + str(len(aircrafts)) + ' of ' + str(len(links)) + ' aircrafts')
|
|
|
+
|
|
|
+ # create the configuration file
|
|
|
+ config = configparser.ConfigParser()
|
|
|
+ for aircraft in aircrafts:
|
|
|
+ config[aircraft.icao] = {
|
|
|
+ 'speedAboveFL240' : aircraft.speedAboveFL240,
|
|
|
+ 'rodAboveFL240' : aircraft.rodAboveFL240,
|
|
|
+ 'speedAboveFL100' : aircraft.speedAboveFL100,
|
|
|
+ 'rodAboveFL100' : aircraft.rodAboveFL100,
|
|
|
+ 'speedBelowFL100' : aircraft.speedBelowFL100,
|
|
|
+ 'rodBelowFL100' : aircraft.rodBelowFL100,
|
|
|
+ 'speedApproach' : aircraft.speedApproach
|
|
|
+ }
|
|
|
+
|
|
|
+ # write the configuration data
|
|
|
+ with open(args.directory + '/PerformanceData.ini', 'w') as file:
|
|
|
+ config.write(file)
|