Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster

This commit is contained in:
Beepboop Belong the 3rd 2020-04-13 22:12:54 +02:00
parent 73363e0f1a
commit f823681dbf
Signed by: beepboopbelong
GPG Key ID: B873A12869A7BD29
2 changed files with 45 additions and 9 deletions

View File

@ -9,9 +9,6 @@ import sites.basesiteintf;
+/ +/
class BaseSite : BaseSiteIntf class BaseSite : BaseSiteIntf
{ {
private:
Config _config;
protected: protected:
import std.stdio : writeln, writefln; import std.stdio : writeln, writefln;
import std.file : exists, rmdirRecurse, mkdir; import std.file : exists, rmdirRecurse, mkdir;
@ -19,6 +16,9 @@ protected:
import std.array : replace, split; import std.array : replace, split;
import std.string : indexOf; import std.string : indexOf;
import std.net.curl : download; import std.net.curl : download;
import std.parallelism : parallel;
Config _config;
// This function needs to be implemented by each derived site class // This function needs to be implemented by each derived site class
abstract string getNameFromUrl(string url); abstract string getNameFromUrl(string url);
@ -59,7 +59,7 @@ protected:
+/ +/
void downloadImages(string[] imageUrls, string outputPath) void downloadImages(string[] imageUrls, string outputPath)
{ {
foreach(string url; imageUrls) foreach(string url; parallel(imageUrls))
{ {
// Extract the filename from the url // Extract the filename from the url
string filepath = outputPath ~ extractFileNameFromUrl(url); string filepath = outputPath ~ extractFileNameFromUrl(url);

View File

@ -8,10 +8,11 @@ import sites.basesite;
+/ +/
class NHentai : BaseSite class NHentai : BaseSite
{ {
import std.conv : to; import std.conv : to, ConvException;
import std.regex : regex, match; import std.regex : regex, match;
import std.net.curl : get; import std.net.curl : get, byChunk, HTTPStatusException;
import core.stdc.stdlib : exit, EXIT_FAILURE; import core.stdc.stdlib : exit, EXIT_FAILURE;
import std.parallelism : parallel;
/++ /++
+ This is the base url for all images + This is the base url for all images
@ -19,6 +20,26 @@ class NHentai : BaseSite
immutable string imageUrl = "https://i.nhentai.net/galleries/"; immutable string imageUrl = "https://i.nhentai.net/galleries/";
/++ /++
+ This function tests if the jpg supplied in the url actually
+ exists on the server or if its a 404
+/
bool isJPGValid(string url)
{
try
{
if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
// Make the get request
get(url);
return true;
}
catch(HTTPStatusException ex)
{
if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
return false;
}
}
/++
+ This function gets the name of the the manga by the url + This function gets the name of the the manga by the url
+/ +/
override string getNameFromUrl(string url) override string getNameFromUrl(string url)
@ -54,7 +75,7 @@ class NHentai : BaseSite
// Find the content id // Find the content id
auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1]; auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
writeln(contentIDMatch); if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);
// Find the number of pages // Find the number of pages
auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1]; auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
@ -64,10 +85,25 @@ class NHentai : BaseSite
// Generate a list of all the images // Generate a list of all the images
string[] urls; string[] urls;
for(int i = 1; i < pageNumber; i++)
// Loop over the range in parallel to make it faster
auto range = new int[pageNumber];
foreach(i; parallel(range))
{ {
// Craft the url with all parameters // Craft the url with all parameters
urls ~= imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg"; string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);
// See if the url is a valid jpg and if not change the extension to png
if(!isJPGValid(extractedUrl))
{
if(_config.enable_debug_output) writefln("[i] %s is not a valdi jpg changing to png!!", extractedUrl);
extractedUrl = extractedUrl.replace(".jpg", ".png");
}
// Add the url to the list
urls ~= extractedUrl;
} }
return urls; return urls;