#!/usr/bin/python3 import requests import sys import textwrap import pprint try: from simplejson.errors import JSONDecodeError except ImportError: from json.decoder import JSONDecodeError # Fetch JSON from url and run it through transform, pretty printing errors # and the data worked on as exhaustively as possible. def json_query(url, transform, params={}): try: result = requests.get(url, params) except requests.exceptions.ConnectionError: print("Network connection error.") sys.exit(1) try: data = result.json() except JSONDecodeError as err: print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + str(err) + '\n' + str(result) + '\n') sys.exit(1) try: return transform(data) except (IndexError, KeyError) as err: print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + str(err)) pprint.PrettyPrinter(indent=2).pprint(data) sys.exit(1) # Search wikipedia for string, returning at most max_results results # or the empty list if no matches where returned. def page_search(string, max_results): params = { 'q' : string, 'limit' : max_results } return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params) # Get a JSON object for the titled page, containing page metadata and a text summary. def get_page_with_summary(title): return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data) # Get a list of the links from a page. For a disambiguation page, this means # a list of the links to individual pages. def get_page_links(title): params = { 'action' : 'query', 'titles' : title, 'prop' : 'links', 'format' : 'json' } return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params) def main(): arg = ' '.join(sys.argv[1:]) if not arg: print("Usage: wikipedia ") sys.exit(1) else: results = page_search(arg, 1) if results: page = get_page_with_summary(results[0]['title']) if page['type'] == 'disambiguation': print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])])) else: print(page['title'] + ':\n\n' + textwrap.fill(page['extract'], width=80)) else: print('No result found.') sys.exit(1) if __name__ == '__main__': main()