Hentai-Downloader/source/sites/hentaicafe.d

101 lines
2.7 KiB
D
Raw Normal View History

2020-04-13 13:19:14 -04:00
module sites.hentaicafe;
import config.downloaderconfig;
import sites.basesite;
/++
+ This class handles downloads for the site `hentai.cafe`
+/
class HentaiCafe : BaseSite
{
protected:
import std.net.curl : get;
import std.conv : to;
import std.regex : regex, match;
import core.stdc.stdlib : exit, EXIT_FAILURE;
/++
+ Gets the image urls from the supplied json
+ each derived site class should override this class
+ if the image urls aren't stored in the way this method
+ expects them to be
+/
string[] getUrlsFromJson(string json)
{
string[] urls;
// Parse the json
JSONValue parsedJson = parseJSON(json);
// Extract the urls of the images
foreach(JSONValue val; parsedJson.array)
urls ~= val["url"].str.replace("\\", "");
return urls;
}
/++
2020-04-13 13:19:14 -04:00
+ This function gets the name of the the manga by the url
+/
override string getNameFromUrl(string url)
{
// Get the site html as a string
string siteContent = to!string(get(url));
// Find the name of the manga
auto nameRegex = `<h3>(.*)</h3>`.regex;
auto nameMatch = match(siteContent, nameRegex);
// Return only the name not the html tags
return nameMatch.captures[1];
}
/++
+ This function returns a list of all the image urls by
+ parsing the html
2020-04-13 13:19:14 -04:00
+/
override string[] getImageUrlsFromBase(string url)
{
// Check if the url is a hentai.cafe comic url
if(indexOf(url, "/hc.fyi/") == -1)
{
writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
// FIXME: no! :<
exit(EXIT_FAILURE);
}
// regex patterns for finding urls
auto comicRegex = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
auto jsonInfoRegex = `var pages = \[(.*)\]`.regex;
// Get page html
string comicHTML = to!string(get(url));
// Find the url in the html mess
auto comicUrlMatch = match(comicHTML, comicRegex);
// Sanitize the url
string comicURL = comicUrlMatch.captures[0];
comicURL = split(comicURL, " ")[0].replace("\"", "");
// Get the first manga page to extract the json with the page infos
string mangaPageHTML = to!string(get(comicURL));
// Get the json data of the page
auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);
string jsonData = jsonMatch.captures[0];
// Sanitize json
jsonData = split(jsonData, "=")[1];
return getUrlsFromJson(jsonData);
}
/++
+ This constructor just calls the inherited constructor
2020-04-13 13:19:14 -04:00
+/
public this(Config config)
{
super(config);
}
}