Removed parallel loop from download. Double checking the urls of the images

This commit is contained in:
Beepboop Belong the 3rd 2020-04-13 23:02:49 +02:00
parent b2d68b2140
commit ec9f619f38
Signed by: beepboopbelong
GPG Key ID: B873A12869A7BD29
2 changed files with 23 additions and 11 deletions

View File

@ -16,7 +16,7 @@ protected:
import std.array : replace, split; import std.array : replace, split;
import std.string : indexOf; import std.string : indexOf;
import std.net.curl : download; import std.net.curl : download;
import std.parallelism : parallel; /* import std.parallelism : parallel; */
Config _config; Config _config;
@ -59,8 +59,9 @@ protected:
+/ +/
void downloadImages(string[] imageUrls, string outputPath) void downloadImages(string[] imageUrls, string outputPath)
{ {
foreach(string url; parallel(imageUrls)) foreach(string url; imageUrls)
{ {
// Extract the filename from the url // Extract the filename from the url
string filepath = outputPath ~ extractFileNameFromUrl(url); string filepath = outputPath ~ extractFileNameFromUrl(url);

View File

@ -23,7 +23,7 @@ class NHentai : BaseSite
+ This function tests if the jpg supplied in the url actually + This function tests if the jpg supplied in the url actually
+ exists on the server or if its a 404 + exists on the server or if its a 404
+/ +/
bool isJPGValid(string url) bool isUrlValid(string url)
{ {
try try
{ {
@ -88,21 +88,32 @@ class NHentai : BaseSite
// Loop over the range in parallel to make it faster // Loop over the range in parallel to make it faster
auto range = new int[pageNumber]; auto range = new int[pageNumber];
foreach(i, ref element; parallel(range)) foreach(i, ref elment; parallel(range))
{ {
// Craft the url with all parameters // Craft the url with all parameters
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg"; string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i+1);// ~ ".jpg";
if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl); if(isUrlValid(extractedUrl ~ ".jpg"))
// See if the url is a valid jpg and if not change the extension to png
if(!isJPGValid(extractedUrl))
{ {
if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl); // The url with a jpg at the end didn't return a
extractedUrl = extractedUrl.replace(".jpg", ".png"); // 404 so the image is assumed to be valid
extractedUrl ~= ".jpg";
}
else if(isUrlValid(extractedUrl ~ ".png"))
{
// The url with a jpg at the end didn't return a
// 404 so the image is assumed to be valid
extractedUrl ~= ".png";
}
else
{
// Both reqests failed so we are going to skip this image
writeln("[!] Failed to get image for url : ", extractedUrl);
continue;
} }
// Add the url to the list // Add the url to the list
if(_config.enable_debug_output) writefln("[i] The image url %s is assumed to be valid", extractedUrl);
urls ~= extractedUrl; urls ~= extractedUrl;
} }