Initial commit

2020-04-13 19:19:14 +02:00 · 2020-04-13 19:19:14 +02:00 · 00071051d3
commit 00071051d3
10 changed files with 855 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,16 @@
+.dub
+docs.json
+__dummy.html
+docs/
+/hentai_downloader
+hentai_downloader.so
+hentai_downloader.dylib
+hentai_downloader.dll
+hentai_downloader.a
+hentai_downloader.lib
+hentai_downloader-test-*
+*.exe
+*.o
+*.obj
+*.lst
+list.txt
--- a/README.md
+++ b/README.md
@ -0,0 +1,24 @@
+# Hentai Downloader
+
+## Prerequisites
+* dub
+* dmd or ldc2
+
+## Building hentai_downloader
+
+`dub build`
+
+# Config file
+
+On the first run of the program a config.json files is created in
+`~/.config/hentai_downloader`
+
+## Sites:
+* [X]  hentai.cafe
+* [X]  nhentai.net
+* [ ]  all the other ones
+
+## TODO:
+* [ ]  Create a makefile that builds and installs the hentai_downloader into /usr/local/bin
+* [ ]  Fix the FIXMEs in the code
+* [ ]  Optional flag to compress the downloaded folders
--- a/dub.json
+++ b/dub.json
@ -0,0 +1,9 @@
+{
+	"authors": [
+		""
+	],
+	"copyright": "Copyright © 2020, ",
+	"description": "Download doujins/hentai from the commandline",
+	"license": "proprietary",
+	"name": "hentai_downloader"
+}
--- a/source/app.d
+++ b/source/app.d
@ -0,0 +1,63 @@
+import std.stdio;
+import std.string;
+
+// My classes
+import config.downloaderconfig;
+
+import sites.hentaicafe;
+import sites.nhentai;
+
+import inputhandler;
+
+void printHelp()
+{
+	writeln(`
+		Usage:
+		-h	 			Display this help message
+		-b <text file>	Batchmode -> Downloads all links in the given text file
+		<link>			Download only one manga`);
+}
+
+void main(string[] args)
+{
+	/* writeln(args); */
+
+	if(args.length < 2)
+	{
+		printHelp();
+		return;
+	}
+
+	if(args.length == 2)
+	{
+		// Direct link was supplied
+		string url = args[1];
+
+		// Call the factory with the link that was supplied
+		siteFactory(url);
+	}
+	else if(args.length >= 3)
+	{
+		// Batchmode
+		import std.file : readText;
+
+		string filename = args[2];
+		// Read all the links into memory
+		string fileContents = readText(filename);
+		// Transform fileContents into an array
+		string[] urls = fileContents.split("\n");
+
+		// Sanitize the urls
+		foreach(string url; urls)
+		{
+			// If the url is empty move on to the next
+			if(strip(url) == "") continue;
+			url = strip(url);
+		}
+
+		/* writeln(urls); */
+
+		// Call the factory
+		siteFactory(urls);
+	}
+}
--- a/source/config/downloaderconfig.d
+++ b/source/config/downloaderconfig.d
@ -0,0 +1,219 @@
+module config.downloaderconfig;
+
+/++
+ This struct represents the config of this
+ programm and is used by the other classes
+ to configure stuff
+/
+struct Config
+{
+    /++
+    + This variable holds the folder/path into which
+    + mangas are downloaded by default
+    +/
+    string standard_download_folder;
+
+    /++
+    + This variable determins if mangas should be downloaded
+    + again even if they already are downloaded
+    +/
+    bool   redownload_mangas_regardless;
+
+    /++
+    + This variable determins if the downloader should output debug infos
+    +/
+    bool   enable_debug_output;
+}
+
+/++
+ This class handles reading and creating the config files
+/
+class DownloaderConfig
+{
+static private:
+    import std.stdio : writeln;
+    import std.json  : JSONValue, parseJSON;
+    import std.file  : exists, mkdir, readText, copy;
+    import std.stdio : toFile;
+    import std.string: strip;
+    import std.array : replace;
+
+    /++
+    + This specifies the default path to the config
+    +/
+    string default_config_path = "/.config/hentai_downloader/config.json";
+
+    /++
+    + This specifies the default path to the template file config file
+    +/
+    immutable string default_config_template_path = "./default_config_template.json";
+
+    /++
+    + This holds the text of the `default_config_template.json` file in case it
+    + doesn't exist in the default path anymore
+    +/
+    immutable string default_config_template_text =
+    `
+    {
+        "standard_download_folder"      : "~/Downloads/Lewds/",
+        "redownload_mangas_regardless"  : false,
+        "enable_debug_output"           : false
+    }
+    `;
+
+    /++
+    + This function checks the config in `config_path` exists
+    +/
+    bool configExists(string config_path)
+    {
+        return exists(config_path);
+    }
+
+    /++
+    + This function creates a new default config at `config_path`
+    +/
+    void createNewConfig(string config_path)
+    {
+        writeln("[*] Creating new config file in ", config_path);
+
+        // The folder that holds the config file
+        immutable string config_dir = "/home/" ~ getUsername() ~ "/.config/hentai_downloader";
+
+        /* writeln("Config folder:", config_dir); */
+
+        // if the folder ~/.config/hentai_downloader doesnt exist
+        if(!exists(config_dir))
+            mkdir(config_dir); // Create it
+
+        /* writeln("Config file: ", config_path); */
+
+        // Write to template file into the config folder
+        if(!exists(config_path))
+        {
+            /* writeln("Saving default config to file: ", config_path); */
+            default_config_template_text.toFile(config_path);
+        }
+    }
+
+    /++
+    + Turns a relative path e.g. "~/.config" into an absolute path "/home/user/.config"
+    +/
+    string makeRelativePathAbsolute(string path)
+    {
+        return path.replace("~", "/home/"~getUsername());
+    }
+
+    /++
+    + This function checks if the folder specified in the config file exists
+    + and if not creates it
+    +/
+    void checkStandardFolder(Config config)
+    {
+        // Replace the relative path
+        string absolutePath = makeRelativePathAbsolute(config.standard_download_folder);
+
+        if(!exists(absolutePath))
+        {
+            // If the `standard_download_folder` doesnt exist, create it!
+            absolutePath.mkdir();
+        }
+    }
+
+    /++
+    + Gets the username of the current user
+    +/
+    string getUsername()
+    {
+        import std.process : executeShell;
+        import core.stdc.stdlib : exit, EXIT_FAILURE;
+        // Execute whoami to get the current username
+        auto whoami = executeShell(`whoami`);
+        if(whoami.status != 0)
+        {
+            // If whoami fails exit the program
+            // FIXME: raise an exception or something
+            writeln("[!] Failed to get the current username");
+            exit(EXIT_FAILURE);
+        }
+        // Return the stripped username
+        return whoami.output.strip();
+    }
+
+    /++
+    + Get the users .config path
+    +/
+    string getUserConfigPath()
+    {
+        // Return the combined path to the .config folder of the user
+        return "/home/" ~ getUsername() ~ default_config_path;
+    }
+
+    /++
+    + This function trys to parse the config file
+    + given in `config_path`, and return a Config struct
+    +/
+    Config parseConfig(string config_path)
+    {
+        import std.conv : to;
+
+        // Read the config file
+        string config_text = to!string(readText(config_path));
+
+        // Parse the config as JSON
+        auto config_json = parseJSON(config_text);
+
+        // Create a new config
+        Config _config;
+
+        // Assign the values that were parsed from the config file
+        _config.standard_download_folder     = config_json["standard_download_folder"].str();
+        _config.redownload_mangas_regardless = config_json["redownload_mangas_regardless"].boolean();
+        _config.enable_debug_output          = config_json["enable_debug_output"].boolean();
+
+        // Adjust the foler path
+        _config.standard_download_folder = makeRelativePathAbsolute(_config.standard_download_folder);
+
+        return _config;
+    }
+
+    /++
+    + Loads and checks the given config
+    +/
+    Config loadMyConfig(string config_path)
+    {
+        // Get the cofig path for this user
+        config_path = getUserConfigPath();
+
+        // Check if the given config exists
+        if(!configExists(config_path))
+            createNewConfig(config_path); // If it doesn't, create it!
+
+        // Parse the config
+        Config _config = parseConfig(config_path);
+
+        // Check the download folder
+        checkStandardFolder(_config);
+
+        return _config;
+    }
+
+
+public:
+    /++
+    + This loads and parses the default config at `default_config_path`
+    + into a `Config` struct
+    +/
+    static Config loadConfig()
+    {
+        return loadMyConfig(this.default_config_path);
+    }
+
+    /++
+    + This loads and parses a custom config from `custom_config_path`
+    + into a `Config` struct
+    +/
+    static Config loadConfig(string custom_config_path)
+    {
+        return loadMyConfig(custom_config_path);
+    }
+}
--- a/source/inputhandler.d
+++ b/source/inputhandler.d
@ -0,0 +1,68 @@
+import std.stdio;
+import std.string;
+import std.array;
+import core.stdc.stdlib : exit, EXIT_FAILURE;
+
+import config.downloaderconfig;
+
+import sites.basesite;
+import sites.hentaicafe;
+import sites.nhentai;
+
+/++
+ This function parses the url and creates the appropriate site object
+ and then downloads the images
+/
+void siteFactory(string url)
+{
+    immutable string hentaicafe_indicator = "/hc.fyi/";
+    immutable string nhentai_indicator    = "/g/";
+
+    // Load the config file
+    Config config = DownloaderConfig.loadConfig();
+
+    // Placeholder for down casted object
+    BaseSite mangaSite;
+
+    if(indexOf(url, hentaicafe_indicator) != -1) // The supplied url is a hentaicafe url
+    {
+        // Create `HentaiCafe` object
+        HentaiCafe hentaicafe = new HentaiCafe(config);
+
+        // Implicit downcast to `BaseSite`
+        mangaSite = hentaicafe;
+    }
+    else if(indexOf(url, nhentai_indicator) != -1) // The supplied url is a nhentai url
+    {
+        // Create `NHentai` object
+        NHentai nhentai = new NHentai(config);
+
+        // Implicit downcast to `BaseSite`
+        mangaSite = nhentai;
+    }
+    else
+    {
+        writeln("[!] The url you supplied isn't supported :(");
+
+        writeln(url);
+        // FIXME:
+        // Dont exit with a failure
+        exit(EXIT_FAILURE);
+    }
+
+    // Download the manga
+    mangaSite.downloadDoujin(url);
+}
+
+/++
+ This function parses each url in a list
+ and creates the appropriate site object to
+ download the managa
+/
+void siteFactory(string[] urls)
+{
+    // Call the site factory for each url that way you can have a list
+    // of mixed manag links
+    foreach(string url; urls)
+        siteFactory(url);
+}
--- a/source/sites/basesite.d
+++ b/source/sites/basesite.d
@ -0,0 +1,166 @@
+module sites.basesite;
+
+import config.downloaderconfig;
+import sites.basesiteintf;
+
+/++
+ This is the baseclass which all
+ other site classes inherit from
+/
+class BaseSite : BaseSiteIntf
+{
+private:
+    Config _config;
+
+protected:
+    import std.stdio    : writeln, writefln;
+    import std.file     : exists, rmdirRecurse, mkdir;
+    import std.json     : parseJSON, JSONValue;
+    import std.array    : replace, split;
+    import std.string   : indexOf;
+    import std.net.curl : download;
+
+    // This function needs to be implemented by each derived site class
+    abstract string getNameFromUrl(string url);
+
+    // This function needs to be implemented by each derived site class
+    abstract string[] getImageUrlsFromBase(string url);
+
+    /++
+    + Gets the image urls from the supplied json
+    + each derived site class should override this class
+    + if the image urls aren't stored in the way this method
+    + expects them to be
+    +/
+    string[] getUrlsFromJson(string json)
+    {
+        string[] urls;
+        // Parse the json
+        JSONValue parsedJson = parseJSON(json);
+
+        // Extract the urls of the images
+        foreach(JSONValue val; parsedJson.array)
+            urls ~= val["url"].str.replace("\\", "");
+
+        return urls;
+    }
+
+    /++
+    + This function creates a folder with the supplied name.
+    + If the folder already exists the folder will get deleted!!
+    +/
+    void createOuputFolder(string foldername)
+    {
+        // Check if foler exits already
+        if(exists(foldername))
+        {
+            writefln(`[!] Folder with the name "%s" exists already...`, foldername);
+            writeln("[!] Deleting it now!");
+            rmdirRecurse(foldername);
+        }
+
+        writefln(`[*] Creating folder "%s"`, foldername);
+        mkdir(foldername);
+    }
+
+    /++
+    + This function extracts the name of a file from the supplied url
+    +/
+    string extractFileNameFromUrl(string url)
+    {
+        string[] tmpString = url.split("/");
+        return tmpString[tmpString.length-1];
+    }
+
+    /++
+    + This function downloads the images over the
+    + url supplied in the `imageUrls` into the `outputPath`
+    +/
+    void downloadImages(string[] imageUrls, string outputPath)
+    {
+        foreach(string url; imageUrls)
+        {
+            // Extract the filename from the url
+            string filepath = outputPath ~ extractFileNameFromUrl(url);
+
+            if(_config.enable_debug_output) writefln("[i] Downloading from %s ==> %s", url, filepath);
+
+            // Download the image
+            download(url, filepath);
+        }
+    }
+
+    /++
+    + Downloads a doujin from `url` into the `outputPath`
+    +/
+    void downloadDoujinFromUrl(string url, string outputPath)
+    {
+        // Create a folder with the name of the managa
+        createOuputFolder(outputPath);
+
+        // Extract the urls of the managa images
+        string[] urls = getImageUrlsFromBase(url);
+
+        // Download the images over the extracted urls
+        downloadImages(urls, outputPath);
+
+        writeln("[*] Done downloading...");
+    }
+
+public:
+
+    /++
+    + This constructor is to setup the site class with the
+    + supplied `Config`
+    +/
+    this(Config config)
+    {
+        // Set the config
+        _config = config;
+    }
+
+    /++
+    + This function downloads a doujin from the supplied url
+    +/
+    void downloadDoujin(string url)
+    {
+        // Get the name of the doujin
+        string _foldername = _config.standard_download_folder ~ getNameFromUrl(url) ~ "/";
+
+        // Check if the folder already exists and `redownload_mangas_regardless` is set to false
+        if(exists(_foldername) && !_config.redownload_mangas_regardless)
+        {
+            // Then stop downloading
+            return;
+        }
+
+        if(_config.enable_debug_output) writefln("[i] _foldername is ----> %s", _foldername);
+
+        // Download the doujin into a folder with the name of the doujin
+        downloadDoujinFromUrl(url, _foldername);
+    }
+
+    /++
+    + This function downloads multiple doujins
+    +/
+    void downloadDoujin(string[] urls)
+    {
+        foreach(string url; urls)
+        {
+            // Get the name of the doujin
+            string _foldername = _config.standard_download_folder ~ getNameFromUrl(url) ~ "/";
+
+            if(_config.enable_debug_output) writefln("[i] _foldername is :s%", _foldername);
+
+            // Check if the folder already exists and `redownload_mangas_regardless` is set to false
+            if(exists(_foldername) && !_config.redownload_mangas_regardless)
+            {
+                // Then continue to the next url in the list
+                continue;
+            }
+
+            // Download the doujin into a folder with the name of the doujin
+            downloadDoujinFromUrl(url, _foldername);
+        }
+    }
+}
--- a/source/sites/basesiteintf.d
+++ b/source/sites/basesiteintf.d
@ -0,0 +1,54 @@
+module sites.basesiteintf;
+
+/++
+ This is the interface for the base class
+ from which all the other sites are inherited
+/
+interface BaseSiteIntf
+{
+protected:
+    /++
+    + This function returns the name of the manga by parsing the
+    + html from the url
+    +/
+    string getNameFromUrl(string url);
+
+    /++
+    + This function parses the json in `json`
+    + and returns a string array containing all
+    + the urls extracted from the `json` arg
+    +/
+    string[] getUrlsFromJson(string json);
+
+    /++
+    + This function extracts the urls of the images from the supplied manga base url
+    +/
+    string[] getImageUrlsFromBase(string url);
+
+    /++
+    + This function creates a folder with the given name
+    +/
+    void createOuputFolder(string foldername);
+
+    /++
+    + This function downloads the images over the
+    + url supplied in the `imageUrls` into the `outputPath`
+    +/
+    void downloadImages(string[] imageUrls, string outputPath);
+
+    /++
+    + Downloads a doujin from `url` into `outputPath`
+    +/
+    void downloadDoujinFromUrl(string url, string outputPath);
+
+public:
+    /++
+    + Downloads a dojin from `url` into the `outputPath`
+    +/
+    void downloadDoujin(string url);
+
+    /* /++
+    + Download multiple doujins
+    +/
+    void downloadDojin(string[] urls); */
+}
--- a/source/sites/hentaicafe.d
+++ b/source/sites/hentaicafe.d
@ -0,0 +1,80 @@
+module sites.hentaicafe;
+
+import config.downloaderconfig;
+import sites.basesite;
+
+/++
+ This class handles downloads for the site `hentai.cafe`
+/
+class HentaiCafe : BaseSite
+{
+protected:
+    import std.net.curl : get;
+    import std.conv     : to;
+    import std.regex    : regex, match;
+    import core.stdc.stdlib : exit, EXIT_FAILURE;
+
+    /++
+    + This function gets the name of the the manga by the url
+    +/
+    override string getNameFromUrl(string url)
+    {
+        // Get the site html as a string
+        string siteContent = to!string(get(url));
+
+        // Find the name of the manga
+        auto nameRegex = `<h3>(.*)</h3>`.regex;
+        auto nameMatch = match(siteContent, nameRegex);
+
+        // Return only the name not the html tags
+        return nameMatch.captures[1];
+    }
+
+    /++
+    +
+    +/
+    override string[] getImageUrlsFromBase(string url)
+    {
+        // Check if the url is a hentai.cafe comic url
+        if(indexOf(url, "/hc.fyi/") == -1)
+        {
+            writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
+            // FIXME: no! :<
+            exit(EXIT_FAILURE);
+        }
+
+        // regex patterns for finding urls
+        auto comicRegex     = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
+        auto jsonInfoRegex  = `var pages = \[(.*)\]`.regex;
+
+        // Get page html
+        string comicHTML = to!string(get(url));
+
+        // Find the url in the html mess
+        auto comicUrlMatch = match(comicHTML, comicRegex);
+
+        // Sanitize the url
+        string comicURL = comicUrlMatch.captures[0];
+        comicURL = split(comicURL, " ")[0].replace("\"", "");
+
+        // Get the first manga page to extract the json with the page infos
+        string mangaPageHTML = to!string(get(comicURL));
+
+        // Get the json data of the page
+        auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);
+        string jsonData = jsonMatch.captures[0];
+
+        // Sanitize json
+        jsonData = split(jsonData, "=")[1];
+
+        return getUrlsFromJson(jsonData);
+    }
+
+    /++
+    +
+    +/
+    public this(Config config)
+    {
+        super(config);
+    }
+}
--- a/source/sites/nhentai.d
+++ b/source/sites/nhentai.d
@ -0,0 +1,156 @@
+module sites.nhentai;
+
+import config.downloaderconfig;
+import sites.basesite;
+
+/++
+ This class handles downloads for the site `nhentai`
+/
+class NHentai : BaseSite
+{
+private
+    import std.conv     : to;
+    import std.net.curl : get;
+    import std.json     : JSONValue, parseJSON;
+    import std.array    : split;
+
+    /++
+    + This struct holds all the needed infos about the nhentai doujin
+    +/
+    struct NHentai_Doujin_Info
+    {
+        /++
+        + This is the number of the manga
+        +/
+        string   number;
+
+        /++
+        + This is the title of the the manga
+        +/
+        string   title;
+
+        /++
+        + This array holds all the urls of the images
+        +/
+        string[] imageUrls;
+    }
+
+    /++
+    + This is the url of the nhentai api
+    + calls are made by the number of the manga
+    + for example "https://apis.nhent.ai/g/1"
+    +
+    + The returned json string contains all the info
+    + should be read into `NHenta_Doujin_Info`
+    +/
+    immutable string api_url = "https://apis.nhent.ai/g/";
+
+    /++
+    + This variable holds the class internal
+    + number of the manga
+    +/
+    string _number;
+
+    /++
+    + This struct contains all the needed infos
+    + to download the managa
+    +/
+    NHentai_Doujin_Info _nhentai_doujin_info;
+
+    /++
+    + This function extracts the number of the manga
+    + from the supplied url
+    +/
+    string extractNumFromUrl(string url)
+    {
+        string[] tmpString = url.split("/");
+        // FIXME: length could be unsigned so substract bad!
+        return tmpString[tmpString.length-2];
+    }
+
+    /++
+    + This function gets the info of of the doujin using the api
+    + it returns a struct with all the important info
+    +/
+    NHentai_Doujin_Info getDoujinInfo(string mangaNum)
+    {
+        NHentai_Doujin_Info _info;
+
+        // Craft the url
+        string requestUrl = api_url ~ mangaNum;
+
+        // Get the json data for the manga
+        string jsonData = to!string(get(requestUrl));
+
+        // Extract the image urls from the json string
+        _info.imageUrls = getUrlsFromJson(jsonData);
+
+        // Parse the data
+        auto parseData = parseJSON(jsonData);
+
+        // Get the title
+        _info.title = parseData["title"].str();
+
+        return _info;
+    }
+
+    /++
+    + If the class internal info struct is filled
+    + but the number is different `getDoujinInfo` gets
+    + called otherwise nothing happens
+    +/
+    void fetchInfoForManaga(string number)
+    {
+        // If the doujin info wasnt fetched fetch it now
+        if(_nhentai_doujin_info.number != number)
+        {
+            writeln("\nGetting info....");
+            // Fill the info
+            _nhentai_doujin_info = getDoujinInfo(number);
+            _nhentai_doujin_info.number = number;
+        }
+    }
+
+protected:
+    override string getNameFromUrl(string url)
+    {
+        // Extract the manga number
+        _number = extractNumFromUrl(url);
+
+        // Fetch manga infos
+        fetchInfoForManaga(_number);
+
+        // Return the name of the managa
+        return _nhentai_doujin_info.title;
+    }
+
+    override string[] getImageUrlsFromBase(string url)
+    {
+        // Fetch info if it wanst already fetched
+        fetchInfoForManaga(_number);
+
+        return _nhentai_doujin_info.imageUrls;
+    }
+
+    override string[] getUrlsFromJson(string json)
+    {
+        // Extract url from json
+        string[] urls;
+
+        JSONValue parsedJson = parseJSON(json);
+
+        // Extract the urls for the images
+        foreach(JSONValue val; parsedJson["pages"].array())
+            urls ~= val.str().replace("i.bakaa.me", "i.nhentai.net");
+
+        return urls;
+    }
+
+    /++
+    + This constructor just calls the inherited constructor
+    +/
+    public this(Config config)
+    {
+        super(config);
+    }
+}