|
|
@@ -0,0 +1,30 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
from playwright.sync_api import sync_playwright |
|
|
|
|
|
|
|
root = 'https://giantessbooru.com/' |
|
|
|
cookies = [] |
|
|
|
with open('cookies.txt') as f: |
|
|
|
lines = [line.rstrip().split("=", 1) for line in f] |
|
|
|
cookies = [{'name':k, 'value':v, 'url':root} for k,v in lines] |
|
|
|
|
|
|
|
with sync_playwright() as p: |
|
|
|
browser = p.chromium.launch()\ |
|
|
|
.new_context(**p.devices['Desktop Chrome']) |
|
|
|
browser.add_cookies(cookies) |
|
|
|
page = browser.new_page() |
|
|
|
|
|
|
|
page.goto(root+"post/list") |
|
|
|
pages = int(page.locator('#paginator a').nth(-2).inner_text()) |
|
|
|
for i in range(pages+1): |
|
|
|
page.goto(root+f"post/list/{i}") |
|
|
|
print(i,page.title()) |
|
|
|
thumbs = page.locator('#mai > .thumb > a > img') |
|
|
|
for i in range(thumbs.count()): |
|
|
|
thumb = thumbs.nth(i) |
|
|
|
tags = thumb.get_attribute('title').split('//')[0].rstrip().split(' ') |
|
|
|
md5 = thumb.get_attribute('src').split('/')[2] |
|
|
|
with open(f"data/{md5}.tags.txt",'w',encoding="utf-8") as f: |
|
|
|
f.write('\n'.join(tags) + '\n') |
|
|
|
page.wait_for_timeout(1000) |
|
|
|
|
|
|
|
browser.close() |