Removed parallel loop from download. Double checking the urls of the images

This commit is contained in:
Beepboop Belong the 3rd 2020-04-13 23:02:49 +02:00
parent b2d68b2140
commit ec9f619f38
Signed by: beepboopbelong
GPG Key ID: B873A12869A7BD29
2 changed files with 23 additions and 11 deletions

View File

@ -16,7 +16,7 @@ protected:
import std.array : replace, split;
import std.string : indexOf;
import std.net.curl : download;
import std.parallelism : parallel;
/* import std.parallelism : parallel; */
Config _config;
@ -59,8 +59,9 @@ protected:
+/
void downloadImages(string[] imageUrls, string outputPath)
{
foreach(string url; parallel(imageUrls))
foreach(string url; imageUrls)
{
// Extract the filename from the url
string filepath = outputPath ~ extractFileNameFromUrl(url);

View File

@ -23,7 +23,7 @@ class NHentai : BaseSite
+ This function tests if the jpg supplied in the url actually
+ exists on the server or if its a 404
+/
bool isJPGValid(string url)
bool isUrlValid(string url)
{
try
{
@ -88,21 +88,32 @@ class NHentai : BaseSite
// Loop over the range in parallel to make it faster
auto range = new int[pageNumber];
foreach(i, ref element; parallel(range))
foreach(i, ref elment; parallel(range))
{
// Craft the url with all parameters
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i+1);// ~ ".jpg";
if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);
// See if the url is a valid jpg and if not change the extension to png
if(!isJPGValid(extractedUrl))
if(isUrlValid(extractedUrl ~ ".jpg"))
{
if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl);
extractedUrl = extractedUrl.replace(".jpg", ".png");
// The url with a jpg at the end didn't return a
// 404 so the image is assumed to be valid
extractedUrl ~= ".jpg";
}
else if(isUrlValid(extractedUrl ~ ".png"))
{
// The url with a jpg at the end didn't return a
// 404 so the image is assumed to be valid
extractedUrl ~= ".png";
}
else
{
// Both reqests failed so we are going to skip this image
writeln("[!] Failed to get image for url : ", extractedUrl);
continue;
}
// Add the url to the list
if(_config.enable_debug_output) writefln("[i] The image url %s is assumed to be valid", extractedUrl);
urls ~= extractedUrl;
}