Scripts/wikipedia

#!/usr/bin/python3
import requests
import sys
import textwrap

# Fetch JSON from url and run it through transform, pretty printing errors
# and the data worked on as exhaustively as possible.
def json_query(url, transform, params={}):
    try:
        result = requests.get(url, params)
    except ConnectionError:
        print("Network connection error.")
        sys.exit(1)
    try:
        data = result.json()
    except JSONDecodeError as err:
        print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n')
        sys.exit(1)
    try:
        return transform(data)
    except (IndexError, KeyError) as err:
        print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err)
        pprint.PrettyPrinter(indent=2).pprint(data)
        sys.exit(1)

# Search wikipedia for string, returning at most max_results results
# or the empty list if no matches where returned.
def page_search(string, max_results):
    params = {
            'q' : string,
            'limit' : max_results
            }
    return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)

# Get a JSON object for the titled page, containing page metadata and a text summary.
def get_page_with_summary(title):
    return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)

# Get a list of the links from a page. For a disambiguation page, this means
# a list of the links to individual pages.
def get_page_links(title):
    params = {
            'action' : 'query',
            'titles' : title,
            'prop' : 'links',
            'format' : 'json'
            }
    return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)

def main():
    arg = ' '.join(sys.argv[1:])
    if not arg:
        print("Usage: wikipedia <list of search terms>")
        sys.exit(1)
    else:
        results = page_search(arg, 1)
        if results:
            page = get_page_with_summary(results[0]['title'])
            if page['type'] == 'disambiguation':
                print('Ambiguous result, please clarify:\n    ' + '\n    '.join([link['title'] for link in get_page_links(page['title'])]))
            else:
                print(page['title'] + ':\n\n' + textwrap.fill(page['extract'], width=80))
        else:
            print('No result found.')
            sys.exit(1)

if __name__ == '__main__':
    main()
initial commit 2022-03-30 17:20:58 -04:00			`#!/usr/bin/python3`
			`import requests`
			`import sys`
			`import textwrap`

wikipedia 2022-03-31 17:03:08 -04:00			`# Fetch JSON from url and run it through transform, pretty printing errors`
			`# and the data worked on as exhaustively as possible.`
			`def json_query(url, transform, params={}):`
			`try:`
			`result = requests.get(url, params)`
			`except ConnectionError:`
			`print("Network connection error.")`
			`sys.exit(1)`
			`try:`
			`data = result.json()`
			`except JSONDecodeError as err:`
			`print('Error when decoding JSON:\nFrom endpoint ' + url + ':\n' + err + '\n' + result + '\n')`
			`sys.exit(1)`
			`try:`
			`return transform(data)`
			`except (IndexError, KeyError) as err:`
			`print('Error when traversing JSON:\nFrom endpoint ' + url + ':\n' + err)`
			`pprint.PrettyPrinter(indent=2).pprint(data)`
			`sys.exit(1)`
initial commit 2022-03-30 17:20:58 -04:00
wikipedia 2022-03-31 17:03:08 -04:00			`# Search wikipedia for string, returning at most max_results results`
			`# or the empty list if no matches where returned.`
			`def page_search(string, max_results):`
initial commit 2022-03-30 17:20:58 -04:00			`params = {`
			`'q' : string,`
wikipedia 2022-03-31 17:03:08 -04:00			`'limit' : max_results`
initial commit 2022-03-30 17:20:58 -04:00			`}`
wikipedia 2022-03-31 17:03:08 -04:00			`return json_query('https://en.wikipedia.org/w/rest.php/v1/search/page', lambda data: data['pages'], params)`
initial commit 2022-03-30 17:20:58 -04:00
wikipedia 2022-03-31 17:03:08 -04:00			`# Get a JSON object for the titled page, containing page metadata and a text summary.`
initial commit 2022-03-30 17:20:58 -04:00			`def get_page_with_summary(title):`
wikipedia 2022-03-31 17:03:08 -04:00			`return json_query('https://en.wikipedia.org/api/rest_v1/page/summary/' + title, lambda data: data)`
initial commit 2022-03-30 17:20:58 -04:00
wikipedia 2022-03-31 17:03:08 -04:00			`# Get a list of the links from a page. For a disambiguation page, this means`
			`# a list of the links to individual pages.`
initial commit 2022-03-30 17:20:58 -04:00			`def get_page_links(title):`
			`params = {`
			`'action' : 'query',`
			`'titles' : title,`
			`'prop' : 'links',`
			`'format' : 'json'`
			`}`
wikipedia 2022-03-31 17:03:08 -04:00			`return json_query('https://en.wikipedia.org/w/api.php', lambda data: list(data['query']['pages'].values())[0]['links'], params)`
initial commit 2022-03-30 17:20:58 -04:00
			`def main():`
wikipedia 2022-03-31 17:03:08 -04:00			`arg = ' '.join(sys.argv[1:])`
			`if not arg:`
initial commit 2022-03-30 17:20:58 -04:00			`print("Usage: wikipedia <list of search terms>")`
			`sys.exit(1)`
			`else:`
wikipedia 2022-03-31 17:03:08 -04:00			`results = page_search(arg, 1)`
			`if results:`
			`page = get_page_with_summary(results[0]['title'])`
initial commit 2022-03-30 17:20:58 -04:00			`if page['type'] == 'disambiguation':`
			`print('Ambiguous result, please clarify:\n ' + '\n '.join([link['title'] for link in get_page_links(page['title'])]))`
			`else:`
			`print(page['title'] + ':\n\n' + textwrap.fill(page['extract'], width=80))`
			`else:`
			`print('No result found.')`
			`sys.exit(1)`

			`if __name__ == '__main__':`
			`main()`