Hentai-Downloader/source/sites/nhentai.d

120 lines
3.6 KiB
D
Raw Normal View History

2020-04-13 13:19:14 -04:00
module sites.nhentai;
import config.downloaderconfig;
import sites.basesite;
/++
+ This class handles downloads for the site `nhentai.net`
2020-04-13 13:19:14 -04:00
+/
class NHentai : BaseSite
{
import std.conv : to, ConvException;
import std.regex : regex, match;
import std.net.curl : get, byChunk, HTTPStatusException;
import core.stdc.stdlib : exit, EXIT_FAILURE;
import std.parallelism : parallel;
2020-04-13 13:19:14 -04:00
/++
+ This is the base url for all images
2020-04-13 13:19:14 -04:00
+/
immutable string imageUrl = "https://i.nhentai.net/galleries/";
2020-04-13 13:19:14 -04:00
/++
+ This function tests if the jpg supplied in the url actually
+ exists on the server or if its a 404
+/
bool isJPGValid(string url)
{
try
{
if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url);
// Make the get request
get(url);
return true;
}
catch(HTTPStatusException ex)
{
if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status);
return false;
}
}
/++
+ This function gets the name of the the manga by the url
2020-04-13 13:19:14 -04:00
+/
override string getNameFromUrl(string url)
2020-04-13 13:19:14 -04:00
{
// Get the site content
string siteContent = to!string(get(url));
2020-04-13 13:19:14 -04:00
// Find the name of the manga
auto nameRegex = `<h1>(.*)</h1>`.regex;
auto nameMatch = match(siteContent, nameRegex);
2020-04-13 13:19:14 -04:00
// Return only the name not the hmtl tags
return nameMatch.captures[1];
2020-04-13 13:19:14 -04:00
}
override string[] getImageUrlsFromBase(string url)
2020-04-13 13:19:14 -04:00
{
// Check if the url is a nhentai url
if(indexOf(url, "/g/") == -1)
2020-04-13 13:19:14 -04:00
{
writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
// FIXME: no! :<
exit(EXIT_FAILURE);
2020-04-13 13:19:14 -04:00
}
// Regex patterns for finding ulrs and stuff
auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";
auto pageCountRegex = "<div>([0-9].*) pages</div>";
2020-04-13 13:19:14 -04:00
// Download the hmtl
auto coverHtml = to!string(get(url));
2020-04-13 13:19:14 -04:00
// Find the content id
auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
2020-04-13 13:19:14 -04:00
if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch);
2020-04-13 13:19:14 -04:00
// Find the number of pages
auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
2020-04-13 13:19:14 -04:00
// Convert the page number to an integer
immutable int pageNumber = to!int(pageNumberMatch);
2020-04-13 13:19:14 -04:00
// Generate a list of all the images
string[] urls;
// Loop over the range in parallel to make it faster
auto range = new int[pageNumber];
2020-04-13 16:37:02 -04:00
foreach(i, ref element; parallel(range))
{
// Craft the url with all parameters
string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
if(_config.enable_debug_output) writefln("[i] Checking if %s is an actual jpg", extractedUrl);
// See if the url is a valid jpg and if not change the extension to png
if(!isJPGValid(extractedUrl))
{
2020-04-13 16:28:35 -04:00
if(_config.enable_debug_output) writefln("[i] %s is not a valid jpg changing to png!!", extractedUrl);
extractedUrl = extractedUrl.replace(".jpg", ".png");
}
// Add the url to the list
urls ~= extractedUrl;
}
2020-04-13 13:19:14 -04:00
return urls;
}
/++
+ This constructor just calls the inherited constructor
+/
public this(Config config)
{
super(config);
}
}