Hentai-Downloader/source/sites/hentaicafe.d

module sites.hentaicafe;

import config.downloaderconfig;
import sites.basesite;

/++
+ This class handles downloads for the site `hentai.cafe`
+/
class HentaiCafe : BaseSite
{
protected:
    import std.net.curl : get;
    import std.conv     : to;
    import std.regex    : regex, match;
    import core.stdc.stdlib : exit, EXIT_FAILURE;

    /++
    + Gets the image urls from the supplied json
    + each derived site class should override this class
    + if the image urls aren't stored in the way this method
    + expects them to be
    +/
    string[] getUrlsFromJson(string json)
    {
        string[] urls;
        // Parse the json
        JSONValue parsedJson = parseJSON(json);

        // Extract the urls of the images
        foreach(JSONValue val; parsedJson.array)
            urls ~= val["url"].str.replace("\\", "");

        return urls;
    }

    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
    {
        // Get the site html as a string
        string siteContent = to!string(get(url));

        // Find the name of the manga
        auto nameRegex = `<h3>(.*)</h3>`.regex;
        auto nameMatch = match(siteContent, nameRegex);

        // Return only the name not the html tags
        return nameMatch.captures[1];
    }

    /++
    + This function returns a list of all the image urls by
    + parsing the html
    +/
    override string[] getImageUrlsFromBase(string url)
    {
        // Check if the url is a hentai.cafe comic url
        if(indexOf(url, "/hc.fyi/") == -1)
        {
            writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
            // FIXME: no! :<
            exit(EXIT_FAILURE);
        }

        // regex patterns for finding urls
        auto comicRegex     = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
        auto jsonInfoRegex  = `var pages = \[(.*)\]`.regex;

        // Get page html
        string comicHTML = to!string(get(url));

        // Find the url in the html mess
        auto comicUrlMatch = match(comicHTML, comicRegex);

        // Sanitize the url
        string comicURL = comicUrlMatch.captures[0];
        comicURL = split(comicURL, " ")[0].replace("\"", "");

        // Get the first manga page to extract the json with the page infos
        string mangaPageHTML = to!string(get(comicURL));

        // Get the json data of the page
        auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);
        string jsonData = jsonMatch.captures[0];

        // Sanitize json
        jsonData = split(jsonData, "=")[1];

        return getUrlsFromJson(jsonData);
    }

    /++
    + This constructor just calls the inherited constructor
    +/
    public this(Config config)
    {
        super(config);
    }
}