diff --git a/wikipedia b/wikipedia index 2a4b030..5866e59 100755 --- a/wikipedia +++ b/wikipedia @@ -3,26 +3,41 @@ import requests import sys import textwrap -def connection_error(): - print("Could not contact wikipedia servers.") - sys.exit(1) +# Fetch JSON from url and run it through transform, pretty printing errors +# and the data worked on as exhaustively as possible. +def json_query(url, transform, params={}): + try: + result = requests.get(url, params) + except ConnectionError: + print("Network connection error.") + sys.exit(1) + try: + data = result.json() + except JSONDecodeError as err: + print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n') + sys.exit(1) + try: + return transform(data) + except (IndexError, KeyError) as err: + print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err) + pprint.PrettyPrinter(indent=2).pprint(data) + sys.exit(1) -def page_search(string): +# Search wikipedia for string, returning at most max_results results +# or the empty list if no matches where returned. +def page_search(string, max_results): params = { 'q' : string, - 'limit' : 1 + 'limit' : max_results } - try: - return requests.get('https://en.wikipedia.org/w/rest.php/v1/search/page', params).json()['pages'] - except ConnectionError: - connection_error() + return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params) +# Get a JSON object for the titled page, containing page metadata and a text summary. def get_page_with_summary(title): - try: - return requests.get('https://en.wikipedia.org/api/rest_v1/page/summary/' + title).json() - except ConnectionError: - connection_error() + return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data) +# Get a list of the links from a page. For a disambiguation page, this means +# a list of the links to individual pages. def get_page_links(title): params = { 'action' : 'query', @@ -30,19 +45,17 @@ def get_page_links(title): 'prop' : 'links', 'format' : 'json' } - try: - return list(requests.get('https://en.wikipedia.org/w/api.php', params).json()['query']['pages'].values())[0]['links'] - except ConnectionError: - connection_error() + return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params) def main(): - if not sys.argv[1:]: + arg = ' '.join(sys.argv[1:]) + if not arg: print("Usage: wikipedia ") sys.exit(1) else: - result = page_search(' '.join(sys.argv[1:])) - if result: - page = get_page_with_summary(result[0]['title']) + results = page_search(arg, 1) + if results: + page = get_page_with_summary(results[0]['title']) if page['type'] == 'disambiguation': print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])])) else: