Hentai-Downloader/source/sites/nhentai.d

module sites.nhentai;

import config.downloaderconfig;
import sites.basesite;

/++
+ This class handles downloads for the site `nhentai.net`
+/
class NHentai : BaseSite
{
    import std.conv         : to, ConvException;
    import std.regex        : regex, match;
    import std.net.curl     : get, byChunk, HTTPStatusException;
    import core.stdc.stdlib : exit, EXIT_FAILURE;
    import std.parallelism  : parallel;

    /++
    + This is the base url for all images
    +/
    immutable string imageUrl = "https://i.nhentai.net/galleries/";

    /++
    + This function tests if the jpg supplied in the url actually
    + exists on the server or if its a 404
    +/
    bool isJPGValid(string url)
    {
        try
        {
            if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
            // Make the get request
            get(url);
            return true;
        }
        catch(HTTPStatusException ex)
        {
            if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
            return false;
        }
    }

    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
    {
        // Get the site content
        string siteContent = to!string(get(url));

        // Find the name of the manga
        auto nameRegex = `<h1>(.*)</h1>`.regex;
        auto nameMatch = match(siteContent, nameRegex);

        // Return only the name not the hmtl tags
        return nameMatch.captures[1];
    }

    override string[] getImageUrlsFromBase(string url)
    {
        // Check if the url is a nhentai  url
        if(indexOf(url, "/g/") == -1)
        {
            writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
            // FIXME: no! :<
            exit(EXIT_FAILURE);
        }

        // Regex patterns for finding ulrs and stuff
        auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";
        auto pageCountRegex = "<div>([0-9].*) pages</div>";

        // Download the hmtl
        auto coverHtml = to!string(get(url));

        // Find the content id
        auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];

        if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);

        // Find the number of pages
        auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];

        // Convert the page number to an integer
        immutable int pageNumber = to!int(pageNumberMatch);

        // Generate a list of all the images
        string[] urls;

        // Loop over the range in parallel to make it faster
        auto range = new int[pageNumber];
        foreach(i, ref element; parallel(range))
        {
            // Craft the url with all parameters
            string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";

            if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);

            // See if the url is a valid jpg and if not change the extension to png
            if(!isJPGValid(extractedUrl))
            {
                if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl);
                extractedUrl = extractedUrl.replace(".jpg", ".png");
            }

            // Add the url to the list
            urls ~= extractedUrl;
        }

        return urls;
    }

    /++
    + This constructor just calls the inherited constructor
    +/
    public this(Config config)
    {
        super(config);
    }
}
Initial commit 2020-04-13 13:19:14 -04:00			`module sites.nhentai;`

			`import config.downloaderconfig;`
			`import sites.basesite;`

			`/++`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			+ This class handles downloads for the site `nhentai.net`
Initial commit 2020-04-13 13:19:14 -04:00			`+/`
			`class NHentai : BaseSite`
			`{`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`import std.conv : to, ConvException;`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`import std.regex : regex, match;`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`import std.net.curl : get, byChunk, HTTPStatusException;`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`import core.stdc.stdlib : exit, EXIT_FAILURE;`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`import std.parallelism : parallel;`
Initial commit 2020-04-13 13:19:14 -04:00
			`/++`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`+ This is the base url for all images`
Initial commit 2020-04-13 13:19:14 -04:00			`+/`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`immutable string imageUrl = "https://i.nhentai.net/galleries/";`
Initial commit 2020-04-13 13:19:14 -04:00
			`/++`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`+ This function tests if the jpg supplied in the url actually`
			`+ exists on the server or if its a 404`
			`+/`
			`bool isJPGValid(string url)`
			`{`
			`try`
			`{`
			`if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);`
			`// Make the get request`
			`get(url);`
			`return true;`
			`}`
			`catch(HTTPStatusException ex)`
			`{`
			`if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);`
			`return false;`
			`}`
			`}`

			`/++`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`+ This function gets the name of the the manga by the url`
Initial commit 2020-04-13 13:19:14 -04:00			`+/`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`override string getNameFromUrl(string url)`
Initial commit 2020-04-13 13:19:14 -04:00			`{`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Get the site content`
			`string siteContent = to!string(get(url));`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Find the name of the manga`
			auto nameRegex = `<h1>(.*)</h1>`.regex;
			`auto nameMatch = match(siteContent, nameRegex);`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Return only the name not the hmtl tags`
			`return nameMatch.captures[1];`
Initial commit 2020-04-13 13:19:14 -04:00			`}`

Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`override string[] getImageUrlsFromBase(string url)`
Initial commit 2020-04-13 13:19:14 -04:00			`{`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Check if the url is a nhentai url`
			`if(indexOf(url, "/g/") == -1)`
Initial commit 2020-04-13 13:19:14 -04:00			`{`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
			`// FIXME: no! :<`
			`exit(EXIT_FAILURE);`
Initial commit 2020-04-13 13:19:14 -04:00			`}`

Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Regex patterns for finding ulrs and stuff`
			`auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";`
			`auto pageCountRegex = "<div>([0-9].*) pages</div>";`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Download the hmtl`
			`auto coverHtml = to!string(get(url));`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Find the content id`
			`auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];`
Initial commit 2020-04-13 13:19:14 -04:00
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Find the number of pages`
			`auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Convert the page number to an integer`
			`immutable int pageNumber = to!int(pageNumberMatch);`
Initial commit 2020-04-13 13:19:14 -04:00
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`// Generate a list of all the images`
			`string[] urls;`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00
			`// Loop over the range in parallel to make it faster`
			`auto range = new int[pageNumber];`
Fixed parrallel loop 2020-04-13 16:37:02 -04:00			`foreach(i, ref element; parallel(range))`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`{`
			`// Craft the url with all parameters`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";`

			`if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);`

			`// See if the url is a valid jpg and if not change the extension to png`
			`if(!isJPGValid(extractedUrl))`
			`{`
Typo lol 2020-04-13 16:28:35 -04:00			`if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl);`
Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster 2020-04-13 16:12:54 -04:00			`extractedUrl = extractedUrl.replace(".jpg", ".png");`
			`}`

			`// Add the url to the list`
			`urls ~= extractedUrl;`
Fixed nhentai class. Was using an api that just died 2020-04-13 14:42:57 -04:00			`}`
Initial commit 2020-04-13 13:19:14 -04:00
			`return urls;`
			`}`

			`/++`
			`+ This constructor just calls the inherited constructor`
			`+/`
			`public this(Config config)`
			`{`
			`super(config);`
			`}`
			`}`