diff --git a/source/sites/basesite.d b/source/sites/basesite.d index 422a858..3748ba1 100644 --- a/source/sites/basesite.d +++ b/source/sites/basesite.d @@ -9,9 +9,6 @@ import sites.basesiteintf; +/ class BaseSite : BaseSiteIntf { -private: - Config _config; - protected: import std.stdio : writeln, writefln; import std.file : exists, rmdirRecurse, mkdir; @@ -19,6 +16,9 @@ protected: import std.array : replace, split; import std.string : indexOf; import std.net.curl : download; + import std.parallelism : parallel; + + Config _config; // This function needs to be implemented by each derived site class abstract string getNameFromUrl(string url); @@ -59,7 +59,7 @@ protected: +/ void downloadImages(string[] imageUrls, string outputPath) { - foreach(string url; imageUrls) + foreach(string url; parallel(imageUrls)) { // Extract the filename from the url string filepath = outputPath ~ extractFileNameFromUrl(url); diff --git a/source/sites/nhentai.d b/source/sites/nhentai.d index a9a48cb..d55824b 100644 --- a/source/sites/nhentai.d +++ b/source/sites/nhentai.d @@ -8,10 +8,11 @@ import sites.basesite; +/ class NHentai : BaseSite { - import std.conv : to; + import std.conv : to, ConvException; import std.regex : regex, match; - import std.net.curl : get; + import std.net.curl : get, byChunk, HTTPStatusException; import core.stdc.stdlib : exit, EXIT_FAILURE; + import std.parallelism : parallel; /++ + This is the base url for all images @@ -19,6 +20,26 @@ class NHentai : BaseSite immutable string imageUrl = "https://i.nhentai.net/galleries/"; /++ + + This function tests if the jpg supplied in the url actually + + exists on the server or if its a 404 + +/ + bool isJPGValid(string url) + { + try + { + if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url); + // Make the get request + get(url); + return true; + } + catch(HTTPStatusException ex) + { + if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status); + return false; + } + } + + /++ + This function gets the name of the the manga by the url +/ override string getNameFromUrl(string url) @@ -54,7 +75,7 @@ class NHentai : BaseSite // Find the content id auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1]; - writeln(contentIDMatch); + if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch); // Find the number of pages auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1]; @@ -64,10 +85,25 @@ class NHentai : BaseSite // Generate a list of all the images string[] urls; - for(int i = 1; i < pageNumber; i++) + + // Loop over the range in parallel to make it faster + auto range = new int[pageNumber]; + foreach(i; parallel(range)) { // Craft the url with all parameters - urls ~= imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg"; + string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg"; + + if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl); + + // See if the url is a valid jpg and if not change the extension to png + if(!isJPGValid(extractedUrl)) + { + if(_config.enable_debug_output) writefln("[i] %s is not a valdi jpg changing to png!!", extractedUrl); + extractedUrl = extractedUrl.replace(".jpg", ".png"); + } + + // Add the url to the list + urls ~= extractedUrl; } return urls;