Hentai-Downloader/source/sites/nhentai.d

module sites.nhentai;

import config.downloaderconfig;
import sites.basesite;

/++
+ This class handles downloads for the site `nhentai.net`
+/
class NHentai : BaseSite
{
    import std.conv         : to, ConvException;
    import std.regex        : regex, match;
    import std.net.curl     : get, byChunk, HTTPStatusException;
    import core.stdc.stdlib : exit, EXIT_FAILURE;
    import std.parallelism  : parallel;

    /++
    + This is the base url for all images
    +/
    immutable string imageUrl = "https://i.nhentai.net/galleries/";

    /++
    + This function tests if the jpg supplied in the url actually
    + exists on the server or if its a 404
    +/
    bool isJPGValid(string url)
    {
        try
        {
            if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
            // Make the get request
            get(url);
            return true;
        }
        catch(HTTPStatusException ex)
        {
            if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
            return false;
        }
    }

    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
    {
        // Get the site content
        string siteContent = to!string(get(url));

        // Find the name of the manga
        auto nameRegex = `<h1>(.*)</h1>`.regex;
        auto nameMatch = match(siteContent, nameRegex);

        // Return only the name not the hmtl tags
        return nameMatch.captures[1];
    }

    override string[] getImageUrlsFromBase(string url)
    {
        // Check if the url is a nhentai  url
        if(indexOf(url, "/g/") == -1)
        {
            writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
            // FIXME: no! :<
            exit(EXIT_FAILURE);
        }

        // Regex patterns for finding ulrs and stuff
        auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";
        auto pageCountRegex = "<div>([0-9].*) pages</div>";

        // Download the hmtl
        auto coverHtml = to!string(get(url));

        // Find the content id
        auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];

        if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);

        // Find the number of pages
        auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];

        // Convert the page number to an integer
        immutable int pageNumber = to!int(pageNumberMatch);

        // Generate a list of all the images
        string[] urls;

        // Loop over the range in parallel to make it faster
        auto range = new int[pageNumber];
        foreach(i; parallel(range))
        {
            // Craft the url with all parameters
            string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";

            if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);

            // See if the url is a valid jpg and if not change the extension to png
            if(!isJPGValid(extractedUrl))
            {
                if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl);
                extractedUrl = extractedUrl.replace(".jpg", ".png");
            }

            // Add the url to the list
            urls ~= extractedUrl;
        }

        return urls;
    }

    /++
    + This constructor just calls the inherited constructor
    +/
    public this(Config config)
    {
        super(config);
    }
}