131 lines
3.9 KiB
D
131 lines
3.9 KiB
D
module sites.nhentai;
|
|
|
|
import config.downloaderconfig;
|
|
import sites.basesite;
|
|
|
|
/++
|
|
+ This class handles downloads for the site `nhentai.net`
|
|
+/
|
|
class NHentai : BaseSite
|
|
{
|
|
import std.conv : to, ConvException;
|
|
import std.regex : regex, match;
|
|
import std.net.curl : get, byChunk, HTTPStatusException;
|
|
import core.stdc.stdlib : exit, EXIT_FAILURE;
|
|
import std.parallelism : parallel;
|
|
|
|
/++
|
|
+ This is the base url for all images
|
|
+/
|
|
immutable string imageUrl = "https://i.nhentai.net/galleries/";
|
|
|
|
/++
|
|
+ This function tests if the jpg supplied in the url actually
|
|
+ exists on the server or if its a 404
|
|
+/
|
|
bool isUrlValid(string url)
|
|
{
|
|
try
|
|
{
|
|
if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
|
|
// Make the get request
|
|
get(url);
|
|
return true;
|
|
}
|
|
catch(HTTPStatusException ex)
|
|
{
|
|
if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/++
|
|
+ This function gets the name of the the manga by the url
|
|
+/
|
|
override string getNameFromUrl(string url)
|
|
{
|
|
// Get the site content
|
|
string siteContent = to!string(get(url));
|
|
|
|
// Find the name of the manga
|
|
auto nameRegex = `<h1>(.*)</h1>`.regex;
|
|
auto nameMatch = match(siteContent, nameRegex);
|
|
|
|
// Return only the name not the hmtl tags
|
|
return nameMatch.captures[1];
|
|
}
|
|
|
|
override string[] getImageUrlsFromBase(string url)
|
|
{
|
|
// Check if the url is a nhentai url
|
|
if(indexOf(url, "/g/") == -1)
|
|
{
|
|
writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
|
|
// FIXME: no! :<
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
// Regex patterns for finding ulrs and stuff
|
|
auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";
|
|
auto pageCountRegex = "<div>([0-9].*) pages</div>";
|
|
|
|
// Download the hmtl
|
|
auto coverHtml = to!string(get(url));
|
|
|
|
// Find the content id
|
|
auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
|
|
|
|
if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);
|
|
|
|
// Find the number of pages
|
|
auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
|
|
|
|
// Convert the page number to an integer
|
|
immutable int pageNumber = to!int(pageNumberMatch);
|
|
|
|
// Generate a list of all the images
|
|
string[] urls;
|
|
|
|
// Loop over the range in parallel to make it faster
|
|
auto range = new int[pageNumber];
|
|
foreach(i, ref elment; parallel(range))
|
|
{
|
|
// Craft the url with all parameters
|
|
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i+1);// ~ ".jpg";
|
|
|
|
if(isUrlValid(extractedUrl ~ ".jpg"))
|
|
{
|
|
// The url with a jpg at the end didn't return a
|
|
// 404 so the image is assumed to be valid
|
|
extractedUrl ~= ".jpg";
|
|
}
|
|
else if(isUrlValid(extractedUrl ~ ".png"))
|
|
{
|
|
// The url with a jpg at the end didn't return a
|
|
// 404 so the image is assumed to be valid
|
|
extractedUrl ~= ".png";
|
|
}
|
|
else
|
|
{
|
|
// Both reqests failed so we are going to skip this image
|
|
writeln("[!] Failed to get image for url : ", extractedUrl);
|
|
continue;
|
|
}
|
|
|
|
// Add the url to the list
|
|
if(_config.enable_debug_output) writefln("[i] The image url %s is assumed to be valid", extractedUrl);
|
|
urls ~= extractedUrl;
|
|
}
|
|
|
|
return urls;
|
|
}
|
|
|
|
/++
|
|
+ This constructor just calls the inherited constructor
|
|
+/
|
|
public this(Config config)
|
|
{
|
|
super(config);
|
|
}
|
|
}
|