commit 00071051d3622f8a148c9489afc22fe583faf478 Author: beepboopbelong Date: Mon Apr 13 19:19:14 2020 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c4cc2c4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.dub +docs.json +__dummy.html +docs/ +/hentai_downloader +hentai_downloader.so +hentai_downloader.dylib +hentai_downloader.dll +hentai_downloader.a +hentai_downloader.lib +hentai_downloader-test-* +*.exe +*.o +*.obj +*.lst +list.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a24667 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Hentai Downloader + +## Prerequisites +* dub +* dmd or ldc2 + +## Building hentai_downloader + +`dub build` + +# Config file + +On the first run of the program a config.json files is created in +`~/.config/hentai_downloader` + +## Sites: +* [X] hentai.cafe +* [X] nhentai.net +* [ ] all the other ones + +## TODO: +* [ ] Create a makefile that builds and installs the hentai_downloader into /usr/local/bin +* [ ] Fix the FIXMEs in the code +* [ ] Optional flag to compress the downloaded folders diff --git a/dub.json b/dub.json new file mode 100644 index 0000000..c08ebdc --- /dev/null +++ b/dub.json @@ -0,0 +1,9 @@ +{ + "authors": [ + "" + ], + "copyright": "Copyright © 2020, ", + "description": "Download doujins/hentai from the commandline", + "license": "proprietary", + "name": "hentai_downloader" +} diff --git a/source/app.d b/source/app.d new file mode 100644 index 0000000..3986870 --- /dev/null +++ b/source/app.d @@ -0,0 +1,63 @@ +import std.stdio; +import std.string; + +// My classes +import config.downloaderconfig; + +import sites.hentaicafe; +import sites.nhentai; + +import inputhandler; + +void printHelp() +{ + writeln(` + Usage: + -h Display this help message + -b Batchmode -> Downloads all links in the given text file + Download only one manga`); +} + +void main(string[] args) +{ + /* writeln(args); */ + + if(args.length < 2) + { + printHelp(); + return; + } + + if(args.length == 2) + { + // Direct link was supplied + string url = args[1]; + + // Call the factory with the link that was supplied + siteFactory(url); + } + else if(args.length >= 3) + { + // Batchmode + import std.file : readText; + + string filename = args[2]; + // Read all the links into memory + string fileContents = readText(filename); + // Transform fileContents into an array + string[] urls = fileContents.split("\n"); + + // Sanitize the urls + foreach(string url; urls) + { + // If the url is empty move on to the next + if(strip(url) == "") continue; + url = strip(url); + } + + /* writeln(urls); */ + + // Call the factory + siteFactory(urls); + } +} diff --git a/source/config/downloaderconfig.d b/source/config/downloaderconfig.d new file mode 100644 index 0000000..a1cf6b9 --- /dev/null +++ b/source/config/downloaderconfig.d @@ -0,0 +1,219 @@ +module config.downloaderconfig; + +/++ ++ This struct represents the config of this ++ programm and is used by the other classes ++ to configure stuff ++/ +struct Config +{ + /++ + + This variable holds the folder/path into which + + mangas are downloaded by default + +/ + string standard_download_folder; + + /++ + + This variable determins if mangas should be downloaded + + again even if they already are downloaded + +/ + bool redownload_mangas_regardless; + + /++ + + This variable determins if the downloader should output debug infos + +/ + bool enable_debug_output; +} + +/++ ++ This class handles reading and creating the config files ++/ +class DownloaderConfig +{ +static private: + import std.stdio : writeln; + import std.json : JSONValue, parseJSON; + import std.file : exists, mkdir, readText, copy; + import std.stdio : toFile; + import std.string: strip; + import std.array : replace; + + /++ + + This specifies the default path to the config + +/ + string default_config_path = "/.config/hentai_downloader/config.json"; + + /++ + + This specifies the default path to the template file config file + +/ + immutable string default_config_template_path = "./default_config_template.json"; + + /++ + + This holds the text of the `default_config_template.json` file in case it + + doesn't exist in the default path anymore + +/ + immutable string default_config_template_text = + ` + { + "standard_download_folder" : "~/Downloads/Lewds/", + "redownload_mangas_regardless" : false, + "enable_debug_output" : false + } + `; + + /++ + + This function checks the config in `config_path` exists + +/ + bool configExists(string config_path) + { + return exists(config_path); + } + + /++ + + This function creates a new default config at `config_path` + +/ + void createNewConfig(string config_path) + { + writeln("[*] Creating new config file in ", config_path); + + // The folder that holds the config file + immutable string config_dir = "/home/" ~ getUsername() ~ "/.config/hentai_downloader"; + + /* writeln("Config folder:", config_dir); */ + + // if the folder ~/.config/hentai_downloader doesnt exist + if(!exists(config_dir)) + mkdir(config_dir); // Create it + + /* writeln("Config file: ", config_path); */ + + // Write to template file into the config folder + if(!exists(config_path)) + { + /* writeln("Saving default config to file: ", config_path); */ + default_config_template_text.toFile(config_path); + } + } + + /++ + + Turns a relative path e.g. "~/.config" into an absolute path "/home/user/.config" + +/ + string makeRelativePathAbsolute(string path) + { + return path.replace("~", "/home/"~getUsername()); + } + + /++ + + This function checks if the folder specified in the config file exists + + and if not creates it + +/ + void checkStandardFolder(Config config) + { + // Replace the relative path + string absolutePath = makeRelativePathAbsolute(config.standard_download_folder); + + if(!exists(absolutePath)) + { + // If the `standard_download_folder` doesnt exist, create it! + absolutePath.mkdir(); + } + } + + /++ + + Gets the username of the current user + +/ + string getUsername() + { + import std.process : executeShell; + import core.stdc.stdlib : exit, EXIT_FAILURE; + // Execute whoami to get the current username + auto whoami = executeShell(`whoami`); + if(whoami.status != 0) + { + // If whoami fails exit the program + // FIXME: raise an exception or something + writeln("[!] Failed to get the current username"); + exit(EXIT_FAILURE); + } + // Return the stripped username + return whoami.output.strip(); + } + + /++ + + Get the users .config path + +/ + string getUserConfigPath() + { + // Return the combined path to the .config folder of the user + return "/home/" ~ getUsername() ~ default_config_path; + } + + /++ + + This function trys to parse the config file + + given in `config_path`, and return a Config struct + +/ + Config parseConfig(string config_path) + { + import std.conv : to; + + // Read the config file + string config_text = to!string(readText(config_path)); + + // Parse the config as JSON + auto config_json = parseJSON(config_text); + + // Create a new config + Config _config; + + // Assign the values that were parsed from the config file + _config.standard_download_folder = config_json["standard_download_folder"].str(); + _config.redownload_mangas_regardless = config_json["redownload_mangas_regardless"].boolean(); + _config.enable_debug_output = config_json["enable_debug_output"].boolean(); + + // Adjust the foler path + _config.standard_download_folder = makeRelativePathAbsolute(_config.standard_download_folder); + + return _config; + } + + /++ + + Loads and checks the given config + +/ + Config loadMyConfig(string config_path) + { + // Get the cofig path for this user + config_path = getUserConfigPath(); + + // Check if the given config exists + if(!configExists(config_path)) + createNewConfig(config_path); // If it doesn't, create it! + + // Parse the config + Config _config = parseConfig(config_path); + + // Check the download folder + checkStandardFolder(_config); + + return _config; + } + + +public: + /++ + + This loads and parses the default config at `default_config_path` + + into a `Config` struct + +/ + static Config loadConfig() + { + return loadMyConfig(this.default_config_path); + } + + /++ + + This loads and parses a custom config from `custom_config_path` + + into a `Config` struct + +/ + static Config loadConfig(string custom_config_path) + { + return loadMyConfig(custom_config_path); + } +} diff --git a/source/inputhandler.d b/source/inputhandler.d new file mode 100644 index 0000000..e8f9cb5 --- /dev/null +++ b/source/inputhandler.d @@ -0,0 +1,68 @@ +import std.stdio; +import std.string; +import std.array; +import core.stdc.stdlib : exit, EXIT_FAILURE; + +import config.downloaderconfig; + +import sites.basesite; +import sites.hentaicafe; +import sites.nhentai; + +/++ ++ This function parses the url and creates the appropriate site object ++ and then downloads the images ++/ +void siteFactory(string url) +{ + immutable string hentaicafe_indicator = "/hc.fyi/"; + immutable string nhentai_indicator = "/g/"; + + // Load the config file + Config config = DownloaderConfig.loadConfig(); + + // Placeholder for down casted object + BaseSite mangaSite; + + if(indexOf(url, hentaicafe_indicator) != -1) // The supplied url is a hentaicafe url + { + // Create `HentaiCafe` object + HentaiCafe hentaicafe = new HentaiCafe(config); + + // Implicit downcast to `BaseSite` + mangaSite = hentaicafe; + } + else if(indexOf(url, nhentai_indicator) != -1) // The supplied url is a nhentai url + { + // Create `NHentai` object + NHentai nhentai = new NHentai(config); + + // Implicit downcast to `BaseSite` + mangaSite = nhentai; + } + else + { + writeln("[!] The url you supplied isn't supported :("); + + writeln(url); + // FIXME: + // Dont exit with a failure + exit(EXIT_FAILURE); + } + + // Download the manga + mangaSite.downloadDoujin(url); +} + +/++ ++ This function parses each url in a list ++ and creates the appropriate site object to ++ download the managa ++/ +void siteFactory(string[] urls) +{ + // Call the site factory for each url that way you can have a list + // of mixed manag links + foreach(string url; urls) + siteFactory(url); +} diff --git a/source/sites/basesite.d b/source/sites/basesite.d new file mode 100644 index 0000000..69cacb2 --- /dev/null +++ b/source/sites/basesite.d @@ -0,0 +1,166 @@ +module sites.basesite; + +import config.downloaderconfig; +import sites.basesiteintf; + +/++ ++ This is the baseclass which all ++ other site classes inherit from ++/ +class BaseSite : BaseSiteIntf +{ +private: + Config _config; + +protected: + import std.stdio : writeln, writefln; + import std.file : exists, rmdirRecurse, mkdir; + import std.json : parseJSON, JSONValue; + import std.array : replace, split; + import std.string : indexOf; + import std.net.curl : download; + + // This function needs to be implemented by each derived site class + abstract string getNameFromUrl(string url); + + // This function needs to be implemented by each derived site class + abstract string[] getImageUrlsFromBase(string url); + + /++ + + Gets the image urls from the supplied json + + each derived site class should override this class + + if the image urls aren't stored in the way this method + + expects them to be + +/ + string[] getUrlsFromJson(string json) + { + string[] urls; + // Parse the json + JSONValue parsedJson = parseJSON(json); + + // Extract the urls of the images + foreach(JSONValue val; parsedJson.array) + urls ~= val["url"].str.replace("\\", ""); + + return urls; + } + + /++ + + This function creates a folder with the supplied name. + + If the folder already exists the folder will get deleted!! + +/ + void createOuputFolder(string foldername) + { + // Check if foler exits already + if(exists(foldername)) + { + writefln(`[!] Folder with the name "%s" exists already...`, foldername); + writeln("[!] Deleting it now!"); + rmdirRecurse(foldername); + } + + writefln(`[*] Creating folder "%s"`, foldername); + mkdir(foldername); + } + + /++ + + This function extracts the name of a file from the supplied url + +/ + string extractFileNameFromUrl(string url) + { + string[] tmpString = url.split("/"); + return tmpString[tmpString.length-1]; + } + + /++ + + This function downloads the images over the + + url supplied in the `imageUrls` into the `outputPath` + +/ + void downloadImages(string[] imageUrls, string outputPath) + { + foreach(string url; imageUrls) + { + // Extract the filename from the url + string filepath = outputPath ~ extractFileNameFromUrl(url); + + if(_config.enable_debug_output) writefln("[i] Downloading from %s ==> %s", url, filepath); + + // Download the image + download(url, filepath); + } + } + + /++ + + Downloads a doujin from `url` into the `outputPath` + +/ + void downloadDoujinFromUrl(string url, string outputPath) + { + // Create a folder with the name of the managa + createOuputFolder(outputPath); + + // Extract the urls of the managa images + string[] urls = getImageUrlsFromBase(url); + + // Download the images over the extracted urls + downloadImages(urls, outputPath); + + writeln("[*] Done downloading..."); + } + +public: + + /++ + + This constructor is to setup the site class with the + + supplied `Config` + +/ + this(Config config) + { + // Set the config + _config = config; + } + + /++ + + This function downloads a doujin from the supplied url + +/ + void downloadDoujin(string url) + { + // Get the name of the doujin + string _foldername = _config.standard_download_folder ~ getNameFromUrl(url) ~ "/"; + + // Check if the folder already exists and `redownload_mangas_regardless` is set to false + if(exists(_foldername) && !_config.redownload_mangas_regardless) + { + // Then stop downloading + return; + } + + if(_config.enable_debug_output) writefln("[i] _foldername is ----> %s", _foldername); + + // Download the doujin into a folder with the name of the doujin + downloadDoujinFromUrl(url, _foldername); + } + + /++ + + This function downloads multiple doujins + +/ + void downloadDoujin(string[] urls) + { + foreach(string url; urls) + { + // Get the name of the doujin + string _foldername = _config.standard_download_folder ~ getNameFromUrl(url) ~ "/"; + + if(_config.enable_debug_output) writefln("[i] _foldername is :s%", _foldername); + + // Check if the folder already exists and `redownload_mangas_regardless` is set to false + if(exists(_foldername) && !_config.redownload_mangas_regardless) + { + // Then continue to the next url in the list + continue; + } + + // Download the doujin into a folder with the name of the doujin + downloadDoujinFromUrl(url, _foldername); + } + } +} diff --git a/source/sites/basesiteintf.d b/source/sites/basesiteintf.d new file mode 100644 index 0000000..28a8b7f --- /dev/null +++ b/source/sites/basesiteintf.d @@ -0,0 +1,54 @@ +module sites.basesiteintf; + +/++ ++ This is the interface for the base class ++ from which all the other sites are inherited ++/ +interface BaseSiteIntf +{ +protected: + /++ + + This function returns the name of the manga by parsing the + + html from the url + +/ + string getNameFromUrl(string url); + + /++ + + This function parses the json in `json` + + and returns a string array containing all + + the urls extracted from the `json` arg + +/ + string[] getUrlsFromJson(string json); + + /++ + + This function extracts the urls of the images from the supplied manga base url + +/ + string[] getImageUrlsFromBase(string url); + + /++ + + This function creates a folder with the given name + +/ + void createOuputFolder(string foldername); + + /++ + + This function downloads the images over the + + url supplied in the `imageUrls` into the `outputPath` + +/ + void downloadImages(string[] imageUrls, string outputPath); + + /++ + + Downloads a doujin from `url` into `outputPath` + +/ + void downloadDoujinFromUrl(string url, string outputPath); + +public: + /++ + + Downloads a dojin from `url` into the `outputPath` + +/ + void downloadDoujin(string url); + + /* /++ + + Download multiple doujins + +/ + void downloadDojin(string[] urls); */ +} diff --git a/source/sites/hentaicafe.d b/source/sites/hentaicafe.d new file mode 100644 index 0000000..dceae9e --- /dev/null +++ b/source/sites/hentaicafe.d @@ -0,0 +1,80 @@ +module sites.hentaicafe; + +import config.downloaderconfig; +import sites.basesite; + +/++ ++ This class handles downloads for the site `hentai.cafe` ++/ +class HentaiCafe : BaseSite +{ +protected: + import std.net.curl : get; + import std.conv : to; + import std.regex : regex, match; + import core.stdc.stdlib : exit, EXIT_FAILURE; + + /++ + + This function gets the name of the the manga by the url + +/ + override string getNameFromUrl(string url) + { + // Get the site html as a string + string siteContent = to!string(get(url)); + + // Find the name of the manga + auto nameRegex = `

(.*)

`.regex; + auto nameMatch = match(siteContent, nameRegex); + + // Return only the name not the html tags + return nameMatch.captures[1]; + } + + /++ + + + +/ + override string[] getImageUrlsFromBase(string url) + { + // Check if the url is a hentai.cafe comic url + if(indexOf(url, "/hc.fyi/") == -1) + { + writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`); + // FIXME: no! :< + exit(EXIT_FAILURE); + } + + // regex patterns for finding urls + auto comicRegex = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex; + auto jsonInfoRegex = `var pages = \[(.*)\]`.regex; + + // Get page html + string comicHTML = to!string(get(url)); + + // Find the url in the html mess + auto comicUrlMatch = match(comicHTML, comicRegex); + + // Sanitize the url + string comicURL = comicUrlMatch.captures[0]; + comicURL = split(comicURL, " ")[0].replace("\"", ""); + + // Get the first manga page to extract the json with the page infos + string mangaPageHTML = to!string(get(comicURL)); + + // Get the json data of the page + auto jsonMatch = match(mangaPageHTML, jsonInfoRegex); + string jsonData = jsonMatch.captures[0]; + + // Sanitize json + jsonData = split(jsonData, "=")[1]; + + return getUrlsFromJson(jsonData); + } + + /++ + + + +/ + public this(Config config) + { + super(config); + } +} diff --git a/source/sites/nhentai.d b/source/sites/nhentai.d new file mode 100644 index 0000000..f467af5 --- /dev/null +++ b/source/sites/nhentai.d @@ -0,0 +1,156 @@ +module sites.nhentai; + +import config.downloaderconfig; +import sites.basesite; + +/++ ++ This class handles downloads for the site `nhentai` ++/ +class NHentai : BaseSite +{ +private + import std.conv : to; + import std.net.curl : get; + import std.json : JSONValue, parseJSON; + import std.array : split; + + /++ + + This struct holds all the needed infos about the nhentai doujin + +/ + struct NHentai_Doujin_Info + { + /++ + + This is the number of the manga + +/ + string number; + + /++ + + This is the title of the the manga + +/ + string title; + + /++ + + This array holds all the urls of the images + +/ + string[] imageUrls; + } + + /++ + + This is the url of the nhentai api + + calls are made by the number of the manga + + for example "https://apis.nhent.ai/g/1" + + + + The returned json string contains all the info + + should be read into `NHenta_Doujin_Info` + +/ + immutable string api_url = "https://apis.nhent.ai/g/"; + + /++ + + This variable holds the class internal + + number of the manga + +/ + string _number; + + /++ + + This struct contains all the needed infos + + to download the managa + +/ + NHentai_Doujin_Info _nhentai_doujin_info; + + /++ + + This function extracts the number of the manga + + from the supplied url + +/ + string extractNumFromUrl(string url) + { + string[] tmpString = url.split("/"); + // FIXME: length could be unsigned so substract bad! + return tmpString[tmpString.length-2]; + } + + /++ + + This function gets the info of of the doujin using the api + + it returns a struct with all the important info + +/ + NHentai_Doujin_Info getDoujinInfo(string mangaNum) + { + NHentai_Doujin_Info _info; + + // Craft the url + string requestUrl = api_url ~ mangaNum; + + // Get the json data for the manga + string jsonData = to!string(get(requestUrl)); + + // Extract the image urls from the json string + _info.imageUrls = getUrlsFromJson(jsonData); + + // Parse the data + auto parseData = parseJSON(jsonData); + + // Get the title + _info.title = parseData["title"].str(); + + return _info; + } + + /++ + + If the class internal info struct is filled + + but the number is different `getDoujinInfo` gets + + called otherwise nothing happens + +/ + void fetchInfoForManaga(string number) + { + // If the doujin info wasnt fetched fetch it now + if(_nhentai_doujin_info.number != number) + { + writeln("\nGetting info...."); + // Fill the info + _nhentai_doujin_info = getDoujinInfo(number); + _nhentai_doujin_info.number = number; + } + } + +protected: + override string getNameFromUrl(string url) + { + // Extract the manga number + _number = extractNumFromUrl(url); + + // Fetch manga infos + fetchInfoForManaga(_number); + + // Return the name of the managa + return _nhentai_doujin_info.title; + } + + override string[] getImageUrlsFromBase(string url) + { + // Fetch info if it wanst already fetched + fetchInfoForManaga(_number); + + return _nhentai_doujin_info.imageUrls; + } + + override string[] getUrlsFromJson(string json) + { + // Extract url from json + string[] urls; + + JSONValue parsedJson = parseJSON(json); + + // Extract the urls for the images + foreach(JSONValue val; parsedJson["pages"].array()) + urls ~= val.str().replace("i.bakaa.me", "i.nhentai.net"); + + return urls; + } + + /++ + + This constructor just calls the inherited constructor + +/ + public this(Config config) + { + super(config); + } +}