123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- #!/usr/bin/env python
- import argparse
- import configparser
- import os
- import urllib.request
- from bs4 import BeautifulSoup
- from aman.types.PerformanceData import PerformanceData
- def findAircraftPages(rooturl : str, suburl : str):
- aircrafts = []
- with urllib.request.urlopen(rooturl + suburl) as site:
- data = site.read().decode('utf-8')
- site.close()
- parsed = BeautifulSoup(data, features='lxml')
- for link in parsed.body.find_all('a', title=True):
- split = link['href'].split('/')
- if 3 == len(split) and split[2] == link['title'] and 'Category' not in link['title'] and 'Special' not in link['href']:
- aircrafts.append(rooturl + link['href'])
- for link in parsed.body.find_all('a', attrs={ 'title': 'Category:Aircraft' }):
- if 'previous' not in link.text:
- aircrafts.extend(findAircraftPages(rooturl, link['href']))
- return aircrafts
- def findAndParseEntry(tableRow, startIdx, substring, default):
- while 0 < startIdx:
- if substring in tableRow[startIdx].text:
- split = tableRow[startIdx].text.split(' ')
- if 1 >= len(split):
- return default, startIdx - 2
- else:
- return int(split[0]), startIdx - 2
- else:
- startIdx -= 1
- return 0, -1
- def findAndParseSpeedEntry(tableRow, startIdx, default):
- return findAndParseEntry(tableRow, startIdx, 'kts', default)
- def findAndParseRodEntry(tableRow, startIdx, default):
- return findAndParseEntry(tableRow, startIdx, 'ft/min', default)
- def parsePerformanceEntries(tableRowSpeeds, tableRowRODs):
- speeds = []
- rods = []
- # parse the speed data
- idx = len(tableRowSpeeds) - 1
- while 0 < idx:
- parsed = findAndParseSpeedEntry(tableRowSpeeds, idx, 140 if 0 == len(speeds) else 250)
- if 0 < idx:
- speeds.append(parsed[0])
- idx = parsed[1]
- # parse the ROD data
- idx = len(tableRowRODs) - 1
- while 0 < idx:
- parsed = findAndParseRodEntry(tableRowRODs, idx, 2000)
- if 0 < idx:
- rods.append(parsed[0])
- idx = parsed[1]
- return speeds, rods
- def parsePerformanceData(url : str):
- with urllib.request.urlopen(url) as site:
- data = site.read().decode('utf-8')
- site.close()
- # check if we find the ICAO code
- parsed = BeautifulSoup(data, features='lxml')
- icao = parsed.body.find('h5', attrs={ 'id' : 'siteSub', 'class' : 'subtitle'})
- if None == icao or '' == icao.text:
- return False, None
- aircraft = PerformanceData(icao.text)
- performanceTable = parsed.body.find('table', attrs={ 'class' : 'wikitable', 'style' : 'font-size: 90%;' })
- if None == performanceTable or None == performanceTable.find_all('tr')[1] or None == performanceTable.find_all('tr')[2]:
- return False, None
- speeds, rods = parsePerformanceEntries(performanceTable.find_all('tr')[1].find_all('td'),
- performanceTable.find_all('tr')[2].find_all('td'))
- if 10 > len(speeds):
- speeds.insert(1, speeds[1])
- # create the speed data
- if len(speeds) >= 4:
- aircraft.speedApproach = speeds[0]
- aircraft.speedBelowFL100 = speeds[1]
- aircraft.speedAboveFL100 = speeds[2]
- aircraft.speedAboveFL240 = speeds[3]
- # create the ROD data
- if len(rods) >= 3:
- aircraft.rodBelowFL100 = rods[0]
- aircraft.rodAboveFL100 = rods[1]
- aircraft.rodAboveFL240 = rods[2]
- return len(speeds) >= 4 and len(rods) >= 3, aircraft
- if __name__ == '__main__':
- # create the commandline parser
- parser = argparse.ArgumentParser(description='Extract the aircraft performace data')
- parser.add_argument('directory', help='Directory where to store the performance data configuration')
- args = parser.parse_args()
- # create the directory if it does not exist
- if not os.path.exists(args.directory):
- os.makedirs(args.directory)
- # parse the aircrafts
- links = findAircraftPages('https://www.skybrary.aero', '/index.php?title=Category:Aircraft')
- print('Found ' + str(len(links)) + ' aircrafts')
- aircrafts = []
- parsed = 0
- for link in links:
- valid, aircraft = parsePerformanceData(link)
- parsed += 1
- print('Parsed ' + str(parsed) + ' of ' + str(len(links)), end='\r')
- if False == valid:
- print('Unable to find performance data for ' + link)
- continue
- aircrafts.append(aircraft)
- print('Successfully parsed ' + str(len(aircrafts)) + ' of ' + str(len(links)) + ' aircrafts')
- # create the configuration file
- config = configparser.ConfigParser()
- for aircraft in aircrafts:
- config[aircraft.icao] = {
- 'speedAboveFL240' : aircraft.speedAboveFL240,
- 'rodAboveFL240' : aircraft.rodAboveFL240,
- 'speedAboveFL100' : aircraft.speedAboveFL100,
- 'rodAboveFL100' : aircraft.rodAboveFL100,
- 'speedBelowFL100' : aircraft.speedBelowFL100,
- 'rodBelowFL100' : aircraft.rodBelowFL100,
- 'speedApproach' : aircraft.speedApproach
- }
- # write the configuration data
- with open(args.directory + '/PerformanceData.ini', 'w') as file:
- config.write(file)
|