Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster

2020-04-13 22:12:54 +02:00 · 2020-04-13 22:12:54 +02:00 · f823681dbf
commit f823681dbf
parent 73363e0f1a
2 changed files with 45 additions and 9 deletions
--- a/source/sites/basesite.d
+++ b/source/sites/basesite.d
@ -9,9 +9,6 @@ import sites.basesiteintf;
 +/
 class BaseSite : BaseSiteIntf
 {
-private:
-    Config _config;
-
 protected:
    import std.stdio    : writeln, writefln;
    import std.file     : exists, rmdirRecurse, mkdir;
@ -19,6 +16,9 @@ protected:
    import std.array    : replace, split;
    import std.string   : indexOf;
    import std.net.curl : download;
+    import std.parallelism : parallel;
+
+    Config _config;

    // This function needs to be implemented by each derived site class
    abstract string getNameFromUrl(string url);
@ -59,7 +59,7 @@ protected:
    +/
    void downloadImages(string[] imageUrls, string outputPath)
    {
-        foreach(string url; imageUrls)
+        foreach(string url; parallel(imageUrls))
        {
            // Extract the filename from the url
            string filepath = outputPath ~ extractFileNameFromUrl(url);
--- a/source/sites/nhentai.d
+++ b/source/sites/nhentai.d
@ -8,10 +8,11 @@ import sites.basesite;
 +/
 class NHentai : BaseSite
 {
-    import std.conv         : to;
+    import std.conv         : to, ConvException;
    import std.regex        : regex, match;
-    import std.net.curl     : get;
+    import std.net.curl     : get, byChunk, HTTPStatusException;
    import core.stdc.stdlib : exit, EXIT_FAILURE;
+    import std.parallelism  : parallel;

    /++
    + This is the base url for all images
@ -19,6 +20,26 @@ class NHentai : BaseSite
    immutable string imageUrl = "https://i.nhentai.net/galleries/";

    /++
+    + This function tests if the jpg supplied in the url actually
+    + exists on the server or if its a 404
+    +/
+    bool isJPGValid(string url)
+    {
+        try
+        {
+            if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
+            // Make the get request
+            get(url);
+            return true;
+        }
+        catch(HTTPStatusException ex)
+        {
+            if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
+            return false;
+        }
+    }
+
+    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
@ -54,7 +75,7 @@ class NHentai : BaseSite
        // Find the content id
        auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];

-        writeln(contentIDMatch);
+        if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);

        // Find the number of pages
        auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
@ -64,10 +85,25 @@ class NHentai : BaseSite

        // Generate a list of all the images
        string[] urls;
-        for(int i = 1; i < pageNumber; i++)
+
+        // Loop over the range in parallel to make it faster
+        auto range = new int[pageNumber];
+        foreach(i; parallel(range))
        {
            // Craft the url with all parameters
-            urls ~= imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
+            string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
+
+            if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);
+
+            // See if the url is a valid jpg and if not change the extension to png
+            if(!isJPGValid(extractedUrl))
+            {
+                if(_config.enable_debug_output) writefln("[i] %s is not a valdi jpg changing to png!!", extractedUrl);
+                extractedUrl = extractedUrl.replace(".jpg", ".png");
+            }
+
+            // Add the url to the list
+            urls ~= extractedUrl;
        }

        return urls;