Scripts/wikipedia

74 lines
2.6 KiB
Plaintext
Raw Permalink Normal View History

2022-03-30 17:20:58 -04:00
#!/usr/bin/python3
import requests
import sys
import textwrap
2022-04-08 18:12:13 -04:00
import pprint
2022-04-08 18:10:21 -04:00
try:
from simplejson.errors import JSONDecodeError
except ImportError:
from json.decoder import JSONDecodeError
2022-03-30 17:20:58 -04:00
2022-03-31 17:03:08 -04:00
# Fetch JSON from url and run it through transform, pretty printing errors
# and the data worked on as exhaustively as possible.
def json_query(url, transform, params={}):
try:
result = requests.get(url, params)
2022-04-08 18:10:21 -04:00
except requests.exceptions.ConnectionError:
2022-03-31 17:03:08 -04:00
print("Network connection error.")
sys.exit(1)
try:
data = result.json()
except JSONDecodeError as err:
2022-04-08 18:10:21 -04:00
print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + str(err) + '\n' + str(result) + '\n')
2022-03-31 17:03:08 -04:00
sys.exit(1)
try:
return transform(data)
except (IndexError, KeyError) as err:
2022-04-08 18:10:21 -04:00
print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + str(err))
2022-03-31 17:03:08 -04:00
pprint.PrettyPrinter(indent=2).pprint(data)
sys.exit(1)
2022-03-30 17:20:58 -04:00
2022-03-31 17:03:08 -04:00
# Search wikipedia for string, returning at most max_results results
# or the empty list if no matches where returned.
def page_search(string, max_results):
2022-03-30 17:20:58 -04:00
params = {
'q' : string,
2022-03-31 17:03:08 -04:00
'limit' : max_results
2022-03-30 17:20:58 -04:00
}
2022-03-31 17:03:08 -04:00
return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)
2022-03-30 17:20:58 -04:00
2022-03-31 17:03:08 -04:00
# Get a JSON object for the titled page, containing page metadata and a text summary.
2022-03-30 17:20:58 -04:00
def get_page_with_summary(title):
2022-03-31 17:03:08 -04:00
return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)
2022-03-30 17:20:58 -04:00
2022-03-31 17:03:08 -04:00
# Get a list of the links from a page. For a disambiguation page, this means
# a list of the links to individual pages.
2022-03-30 17:20:58 -04:00
def get_page_links(title):
params = {
'action' : 'query',
'titles' : title,
'prop' : 'links',
'format' : 'json'
}
2022-03-31 17:03:08 -04:00
return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)
2022-03-30 17:20:58 -04:00
def main():
2022-03-31 17:03:08 -04:00
arg = ' '.join(sys.argv[1:])
if not arg:
2022-03-30 17:20:58 -04:00
print("Usage: wikipedia <list of search terms>")
sys.exit(1)
else:
2022-03-31 17:03:08 -04:00
results = page_search(arg, 1)
if results:
page = get_page_with_summary(results[0]['title'])
2022-03-30 17:20:58 -04:00
if page['type'] == 'disambiguation':
print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))
else:
print(page['title'] + ':\n\n' + textwrap.fill(page['extract'], width=80))
else:
print('No result found.')
sys.exit(1)
if __name__ == '__main__':
main()