2020-04-13 13:19:14 -04:00
|
|
|
module sites.hentaicafe;
|
|
|
|
|
|
|
|
import config.downloaderconfig;
|
|
|
|
import sites.basesite;
|
|
|
|
|
|
|
|
/++
|
|
|
|
+ This class handles downloads for the site `hentai.cafe`
|
|
|
|
+/
|
|
|
|
class HentaiCafe : BaseSite
|
|
|
|
{
|
|
|
|
protected:
|
|
|
|
import std.net.curl : get;
|
|
|
|
import std.conv : to;
|
|
|
|
import std.regex : regex, match;
|
|
|
|
import core.stdc.stdlib : exit, EXIT_FAILURE;
|
|
|
|
|
|
|
|
/++
|
2020-04-13 14:42:57 -04:00
|
|
|
+ Gets the image urls from the supplied json
|
|
|
|
+ each derived site class should override this class
|
|
|
|
+ if the image urls aren't stored in the way this method
|
|
|
|
+ expects them to be
|
|
|
|
+/
|
|
|
|
string[] getUrlsFromJson(string json)
|
|
|
|
{
|
|
|
|
string[] urls;
|
|
|
|
// Parse the json
|
|
|
|
JSONValue parsedJson = parseJSON(json);
|
|
|
|
|
|
|
|
// Extract the urls of the images
|
|
|
|
foreach(JSONValue val; parsedJson.array)
|
|
|
|
urls ~= val["url"].str.replace("\\", "");
|
|
|
|
|
|
|
|
return urls;
|
|
|
|
}
|
|
|
|
|
|
|
|
/++
|
2020-04-13 13:19:14 -04:00
|
|
|
+ This function gets the name of the the manga by the url
|
|
|
|
+/
|
|
|
|
override string getNameFromUrl(string url)
|
|
|
|
{
|
|
|
|
// Get the site html as a string
|
|
|
|
string siteContent = to!string(get(url));
|
|
|
|
|
|
|
|
// Find the name of the manga
|
|
|
|
auto nameRegex = `<h3>(.*)</h3>`.regex;
|
|
|
|
auto nameMatch = match(siteContent, nameRegex);
|
|
|
|
|
|
|
|
// Return only the name not the html tags
|
|
|
|
return nameMatch.captures[1];
|
|
|
|
}
|
|
|
|
|
|
|
|
/++
|
2020-04-13 14:42:57 -04:00
|
|
|
+ This function returns a list of all the image urls by
|
|
|
|
+ parsing the html
|
2020-04-13 13:19:14 -04:00
|
|
|
+/
|
|
|
|
override string[] getImageUrlsFromBase(string url)
|
|
|
|
{
|
|
|
|
// Check if the url is a hentai.cafe comic url
|
|
|
|
if(indexOf(url, "/hc.fyi/") == -1)
|
|
|
|
{
|
|
|
|
writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
|
|
|
|
// FIXME: no! :<
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
// regex patterns for finding urls
|
|
|
|
auto comicRegex = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
|
|
|
|
auto jsonInfoRegex = `var pages = \[(.*)\]`.regex;
|
|
|
|
|
|
|
|
// Get page html
|
|
|
|
string comicHTML = to!string(get(url));
|
|
|
|
|
|
|
|
// Find the url in the html mess
|
|
|
|
auto comicUrlMatch = match(comicHTML, comicRegex);
|
|
|
|
|
|
|
|
// Sanitize the url
|
|
|
|
string comicURL = comicUrlMatch.captures[0];
|
|
|
|
comicURL = split(comicURL, " ")[0].replace("\"", "");
|
|
|
|
|
|
|
|
// Get the first manga page to extract the json with the page infos
|
|
|
|
string mangaPageHTML = to!string(get(comicURL));
|
|
|
|
|
|
|
|
// Get the json data of the page
|
|
|
|
auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);
|
|
|
|
string jsonData = jsonMatch.captures[0];
|
|
|
|
|
|
|
|
// Sanitize json
|
|
|
|
jsonData = split(jsonData, "=")[1];
|
|
|
|
|
|
|
|
return getUrlsFromJson(jsonData);
|
|
|
|
}
|
|
|
|
|
|
|
|
/++
|
2020-04-13 14:42:57 -04:00
|
|
|
+ This constructor just calls the inherited constructor
|
2020-04-13 13:19:14 -04:00
|
|
|
+/
|
|
|
|
public this(Config config)
|
|
|
|
{
|
|
|
|
super(config);
|
|
|
|
}
|
|
|
|
}
|