#!/usr/bin/env python import argparse import configparser import os import urllib.request from bs4 import BeautifulSoup from aman.types.PerformanceData import PerformanceData def findAircraftPages(rooturl : str, suburl : str): aircrafts = [] with urllib.request.urlopen(rooturl + suburl) as site: data = site.read().decode('utf-8') site.close() parsed = BeautifulSoup(data, features='lxml') for link in parsed.body.find_all('a', title=True): split = link['href'].split('/') if 3 == len(split) and split[2] == link['title'] and 'Category' not in link['title'] and 'Special' not in link['href']: aircrafts.append(rooturl + link['href']) for link in parsed.body.find_all('a', attrs={ 'title': 'Category:Aircraft' }): if 'previous' not in link.text: aircrafts.extend(findAircraftPages(rooturl, link['href'])) return aircrafts def findAndParseEntry(tableRow, startIdx, substring, default): while 0 < startIdx: if substring in tableRow[startIdx].text: split = tableRow[startIdx].text.split(' ') if 1 >= len(split): return default, startIdx - 2 else: return int(split[0]), startIdx - 2 else: startIdx -= 1 return 0, -1 def findAndParseSpeedEntry(tableRow, startIdx, default): return findAndParseEntry(tableRow, startIdx, 'kts', default) def findAndParseRodEntry(tableRow, startIdx, default): return findAndParseEntry(tableRow, startIdx, 'ft/min', default) def parsePerformanceEntries(tableRowSpeeds, tableRowRODs): speeds = [] rods = [] # parse the speed data idx = len(tableRowSpeeds) - 1 while 0 < idx: parsed = findAndParseSpeedEntry(tableRowSpeeds, idx, 140 if 0 == len(speeds) else 250) if 0 < idx: speeds.append(parsed[0]) idx = parsed[1] # parse the ROD data idx = len(tableRowRODs) - 1 while 0 < idx: parsed = findAndParseRodEntry(tableRowRODs, idx, 2000) if 0 < idx: rods.append(parsed[0]) idx = parsed[1] return speeds, rods def parsePerformanceData(url : str): with urllib.request.urlopen(url) as site: data = site.read().decode('utf-8') site.close() # check if we find the ICAO code parsed = BeautifulSoup(data, features='lxml') icao = parsed.body.find('h5', attrs={ 'id' : 'siteSub', 'class' : 'subtitle'}) if None == icao or '' == icao.text: return False, None aircraft = PerformanceData(icao.text) performanceTable = parsed.body.find('table', attrs={ 'class' : 'wikitable', 'style' : 'font-size: 90%;' }) if None == performanceTable or None == performanceTable.find_all('tr')[1] or None == performanceTable.find_all('tr')[2]: return False, None speeds, rods = parsePerformanceEntries(performanceTable.find_all('tr')[1].find_all('td'), performanceTable.find_all('tr')[2].find_all('td')) if 10 > len(speeds): speeds.insert(1, speeds[1]) # create the speed data if len(speeds) >= 4: aircraft.speedApproach = speeds[0] aircraft.speedBelowFL100 = speeds[1] aircraft.speedAboveFL100 = speeds[2] aircraft.speedAboveFL240 = speeds[3] # create the ROD data if len(rods) >= 3: aircraft.rodBelowFL100 = rods[0] aircraft.rodAboveFL100 = rods[1] aircraft.rodAboveFL240 = rods[2] return len(speeds) >= 4 and len(rods) >= 3, aircraft if __name__ == '__main__': # create the commandline parser parser = argparse.ArgumentParser(description='Extract the aircraft performace data') parser.add_argument('directory', help='Directory where to store the performance data configuration') args = parser.parse_args() # create the directory if it does not exist if not os.path.exists(args.directory): os.makedirs(args.directory) # parse the aircrafts links = findAircraftPages('https://www.skybrary.aero', '/index.php?title=Category:Aircraft') print('Found ' + str(len(links)) + ' aircrafts') aircrafts = [] parsed = 0 for link in links: valid, aircraft = parsePerformanceData(link) parsed += 1 print('Parsed ' + str(parsed) + ' of ' + str(len(links)), end='\r') if False == valid: print('Unable to find performance data for ' + link) continue aircrafts.append(aircraft) print('Successfully parsed ' + str(len(aircrafts)) + ' of ' + str(len(links)) + ' aircrafts') # create the configuration file config = configparser.ConfigParser() for aircraft in aircrafts: config[aircraft.icao] = { 'speedAboveFL240' : aircraft.speedAboveFL240, 'rodAboveFL240' : aircraft.rodAboveFL240, 'speedAboveFL100' : aircraft.speedAboveFL100, 'rodAboveFL100' : aircraft.rodAboveFL100, 'speedBelowFL100' : aircraft.speedBelowFL100, 'rodBelowFL100' : aircraft.rodBelowFL100, 'speedApproach' : aircraft.speedApproach } # write the configuration data with open(args.directory + '/PerformanceData.ini', 'w') as file: config.write(file)