Added a check to see if the image is a jpg or png. Made the download loop parallel so its faster
This commit is contained in:
parent
73363e0f1a
commit
f823681dbf
@ -9,9 +9,6 @@ import sites.basesiteintf;
|
|||||||
+/
|
+/
|
||||||
class BaseSite : BaseSiteIntf
|
class BaseSite : BaseSiteIntf
|
||||||
{
|
{
|
||||||
private:
|
|
||||||
Config _config;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
import std.stdio : writeln, writefln;
|
import std.stdio : writeln, writefln;
|
||||||
import std.file : exists, rmdirRecurse, mkdir;
|
import std.file : exists, rmdirRecurse, mkdir;
|
||||||
@ -19,6 +16,9 @@ protected:
|
|||||||
import std.array : replace, split;
|
import std.array : replace, split;
|
||||||
import std.string : indexOf;
|
import std.string : indexOf;
|
||||||
import std.net.curl : download;
|
import std.net.curl : download;
|
||||||
|
import std.parallelism : parallel;
|
||||||
|
|
||||||
|
Config _config;
|
||||||
|
|
||||||
// This function needs to be implemented by each derived site class
|
// This function needs to be implemented by each derived site class
|
||||||
abstract string getNameFromUrl(string url);
|
abstract string getNameFromUrl(string url);
|
||||||
@ -59,7 +59,7 @@ protected:
|
|||||||
+/
|
+/
|
||||||
void downloadImages(string[] imageUrls, string outputPath)
|
void downloadImages(string[] imageUrls, string outputPath)
|
||||||
{
|
{
|
||||||
foreach(string url; imageUrls)
|
foreach(string url; parallel(imageUrls))
|
||||||
{
|
{
|
||||||
// Extract the filename from the url
|
// Extract the filename from the url
|
||||||
string filepath = outputPath ~ extractFileNameFromUrl(url);
|
string filepath = outputPath ~ extractFileNameFromUrl(url);
|
||||||
|
@ -8,10 +8,11 @@ import sites.basesite;
|
|||||||
+/
|
+/
|
||||||
class NHentai : BaseSite
|
class NHentai : BaseSite
|
||||||
{
|
{
|
||||||
import std.conv : to;
|
import std.conv : to, ConvException;
|
||||||
import std.regex : regex, match;
|
import std.regex : regex, match;
|
||||||
import std.net.curl : get;
|
import std.net.curl : get, byChunk, HTTPStatusException;
|
||||||
import core.stdc.stdlib : exit, EXIT_FAILURE;
|
import core.stdc.stdlib : exit, EXIT_FAILURE;
|
||||||
|
import std.parallelism : parallel;
|
||||||
|
|
||||||
/++
|
/++
|
||||||
+ This is the base url for all images
|
+ This is the base url for all images
|
||||||
@ -19,6 +20,26 @@ class NHentai : BaseSite
|
|||||||
immutable string imageUrl = "https://i.nhentai.net/galleries/";
|
immutable string imageUrl = "https://i.nhentai.net/galleries/";
|
||||||
|
|
||||||
/++
|
/++
|
||||||
|
+ This function tests if the jpg supplied in the url actually
|
||||||
|
+ exists on the server or if its a 404
|
||||||
|
+/
|
||||||
|
bool isJPGValid(string url)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
|
||||||
|
// Make the get request
|
||||||
|
get(url);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch(HTTPStatusException ex)
|
||||||
|
{
|
||||||
|
if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/++
|
||||||
+ This function gets the name of the the manga by the url
|
+ This function gets the name of the the manga by the url
|
||||||
+/
|
+/
|
||||||
override string getNameFromUrl(string url)
|
override string getNameFromUrl(string url)
|
||||||
@ -54,7 +75,7 @@ class NHentai : BaseSite
|
|||||||
// Find the content id
|
// Find the content id
|
||||||
auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
|
auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
|
||||||
|
|
||||||
writeln(contentIDMatch);
|
if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);
|
||||||
|
|
||||||
// Find the number of pages
|
// Find the number of pages
|
||||||
auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
|
auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
|
||||||
@ -64,10 +85,25 @@ class NHentai : BaseSite
|
|||||||
|
|
||||||
// Generate a list of all the images
|
// Generate a list of all the images
|
||||||
string[] urls;
|
string[] urls;
|
||||||
for(int i = 1; i < pageNumber; i++)
|
|
||||||
|
// Loop over the range in parallel to make it faster
|
||||||
|
auto range = new int[pageNumber];
|
||||||
|
foreach(i; parallel(range))
|
||||||
{
|
{
|
||||||
// Craft the url with all parameters
|
// Craft the url with all parameters
|
||||||
urls ~= imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
|
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
|
||||||
|
|
||||||
|
if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);
|
||||||
|
|
||||||
|
// See if the url is a valid jpg and if not change the extension to png
|
||||||
|
if(!isJPGValid(extractedUrl))
|
||||||
|
{
|
||||||
|
if(_config.enable_debug_output) writefln("[i] %s is not a valdi jpg changing to png!!", extractedUrl);
|
||||||
|
extractedUrl = extractedUrl.replace(".jpg", ".png");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the url to the list
|
||||||
|
urls ~= extractedUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return urls;
|
return urls;
|
||||||
|
Loading…
Reference in New Issue
Block a user