|
- #!/usr/bin/python3
- import requests
- import sys
- import textwrap
-
- # Fetch JSON from url and run it through transform, pretty printing errors
- # and the data worked on as exhaustively as possible.
- def json_query(url, transform, params={}):
- try:
- result = requests.get(url, params)
- except ConnectionError:
- print("Network connection error.")
- sys.exit(1)
- try:
- data = result.json()
- except JSONDecodeError as err:
- print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n')
- sys.exit(1)
- try:
- return transform(data)
- except (IndexError, KeyError) as err:
- print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err)
- pprint.PrettyPrinter(indent=2).pprint(data)
- sys.exit(1)
-
- # Search wikipedia for string, returning at most max_results results
- # or the empty list if no matches where returned.
- def page_search(string, max_results):
- params = {
- 'q' : string,
- 'limit' : max_results
- }
- return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)
-
- # Get a JSON object for the titled page, containing page metadata and a text summary.
- def get_page_with_summary(title):
- return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)
-
- # Get a list of the links from a page. For a disambiguation page, this means
- # a list of the links to individual pages.
- def get_page_links(title):
- params = {
- 'action' : 'query',
- 'titles' : title,
- 'prop' : 'links',
- 'format' : 'json'
- }
- return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)
-
- def main():
- arg = ' '.join(sys.argv[1:])
- if not arg:
- print("Usage: wikipedia <list of search terms>")
- sys.exit(1)
- else:
- results = page_search(arg, 1)
- if results:
- page = get_page_with_summary(results[0]['title'])
- if page['type'] == 'disambiguation':
- print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))
- else:
- print(page['title'] + ':\n\n' + textwrap.fill(page['extract'], width=80))
- else:
- print('No result found.')
- sys.exit(1)
-
- if __name__ == '__main__':
- main()
|