module sites.hentaicafe; import config.downloaderconfig; import sites.basesite; /++ + This class handles downloads for the site `hentai.cafe` +/ class HentaiCafe : BaseSite { protected: import std.net.curl : get; import std.conv : to; import std.regex : regex, match; import core.stdc.stdlib : exit, EXIT_FAILURE; /++ + This function gets the name of the the manga by the url +/ override string getNameFromUrl(string url) { // Get the site html as a string string siteContent = to!string(get(url)); // Find the name of the manga auto nameRegex = `

(.*)

`.regex; auto nameMatch = match(siteContent, nameRegex); // Return only the name not the html tags return nameMatch.captures[1]; } /++ + +/ override string[] getImageUrlsFromBase(string url) { // Check if the url is a hentai.cafe comic url if(indexOf(url, "/hc.fyi/") == -1) { writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`); // FIXME: no! :< exit(EXIT_FAILURE); } // regex patterns for finding urls auto comicRegex = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex; auto jsonInfoRegex = `var pages = \[(.*)\]`.regex; // Get page html string comicHTML = to!string(get(url)); // Find the url in the html mess auto comicUrlMatch = match(comicHTML, comicRegex); // Sanitize the url string comicURL = comicUrlMatch.captures[0]; comicURL = split(comicURL, " ")[0].replace("\"", ""); // Get the first manga page to extract the json with the page infos string mangaPageHTML = to!string(get(comicURL)); // Get the json data of the page auto jsonMatch = match(mangaPageHTML, jsonInfoRegex); string jsonData = jsonMatch.captures[0]; // Sanitize json jsonData = split(jsonData, "=")[1]; return getUrlsFromJson(jsonData); } /++ + +/ public this(Config config) { super(config); } }