module sites.nhentai; import config.downloaderconfig; import sites.basesite; /++ + This class handles downloads for the site `nhentai.net` +/ class NHentai : BaseSite { import std.conv : to, ConvException; import std.regex : regex, match; import std.net.curl : get, byChunk, HTTPStatusException; import core.stdc.stdlib : exit, EXIT_FAILURE; import std.parallelism : parallel; /++ + This is the base url for all images +/ immutable string imageUrl = "https://i.nhentai.net/galleries/"; /++ + This function tests if the jpg supplied in the url actually + exists on the server or if its a 404 +/ bool isUrlValid(string url) { try { if(_config.enable_debug_output) writeln("[i] Trying to reqest ", url); // Make the get request get(url); return true; } catch(HTTPStatusException ex) { if(_config.enable_debug_output) writeln("[i] Get request returned status: ", ex.status); return false; } } /++ + This function gets the name of the the manga by the url +/ override string getNameFromUrl(string url) { // Get the site content string siteContent = to!string(get(url)); // Find the name of the manga auto nameRegex = `

(.*)

`.regex; auto nameMatch = match(siteContent, nameRegex); // Return only the name not the hmtl tags return nameMatch.captures[1]; } override string[] getImageUrlsFromBase(string url) { // Check if the url is a nhentai url if(indexOf(url, "/g/") == -1) { writefln(`[!] The given url doesn't contain "/g/" it was ignored!`); // FIXME: no! :< exit(EXIT_FAILURE); } // Regex patterns for finding ulrs and stuff auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg"; auto pageCountRegex = "
([0-9].*) pages
"; // Download the hmtl auto coverHtml = to!string(get(url)); // Find the content id auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1]; if(_config.enable_debug_output) writefln("[i] Extracted content-id -> %s", contentIDMatch); // Find the number of pages auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1]; // Convert the page number to an integer immutable int pageNumber = to!int(pageNumberMatch); // Generate a list of all the images string[] urls; // Loop over the range in parallel to make it faster auto range = new int[pageNumber]; foreach(i, ref elment; parallel(range)) { // Craft the url with all parameters string extractedUrl = imageUrl ~ contentIDMatch ~ "/" ~ to!string(i+1);// ~ ".jpg"; if(isUrlValid(extractedUrl ~ ".jpg")) { // The url with a jpg at the end didn't return a // 404 so the image is assumed to be valid extractedUrl ~= ".jpg"; } else if(isUrlValid(extractedUrl ~ ".png")) { // The url with a jpg at the end didn't return a // 404 so the image is assumed to be valid extractedUrl ~= ".png"; } else { // Both reqests failed so we are going to skip this image writeln("[!] Failed to get image for url : ", extractedUrl); continue; } // Add the url to the list if(_config.enable_debug_output) writefln("[i] The image url %s is assumed to be valid", extractedUrl); urls ~= extractedUrl; } return urls; } /++ + This constructor just calls the inherited constructor +/ public this(Config config) { super(config); } }