Fixed nhentai class. Was using an api that just died

2020-04-13 20:42:57 +02:00 · 2020-04-13 20:42:57 +02:00 · b69656eb81
commit b69656eb81
parent 48af91c224
4 changed files with 67 additions and 146 deletions
--- a/source/sites/basesite.d
+++ b/source/sites/basesite.d
@ -27,25 +27,6 @@ protected:
    abstract string[] getImageUrlsFromBase(string url);

    /++
-    + Gets the image urls from the supplied json
-    + each derived site class should override this class
-    + if the image urls aren't stored in the way this method
-    + expects them to be
-    +/
-    string[] getUrlsFromJson(string json)
-    {
-        string[] urls;
-        // Parse the json
-        JSONValue parsedJson = parseJSON(json);
-
-        // Extract the urls of the images
-        foreach(JSONValue val; parsedJson.array)
-            urls ~= val["url"].str.replace("\\", "");
-
-        return urls;
-    }
-
-    /++
    + This function creates a folder with the supplied name.
    + If the folder already exists the folder will get deleted!!
    +/
--- a/source/sites/basesiteintf.d
+++ b/source/sites/basesiteintf.d
@ -14,13 +14,6 @@ protected:
    string getNameFromUrl(string url);

    /++
-    + This function parses the json in `json`
-    + and returns a string array containing all
-    + the urls extracted from the `json` arg
-    +/
-    string[] getUrlsFromJson(string json);
-
-    /++
    + This function extracts the urls of the images from the supplied manga base url
    +/
    string[] getImageUrlsFromBase(string url);
--- a/source/sites/hentaicafe.d
+++ b/source/sites/hentaicafe.d
@ -15,6 +15,25 @@ protected:
    import core.stdc.stdlib : exit, EXIT_FAILURE;

    /++
+    + Gets the image urls from the supplied json
+    + each derived site class should override this class
+    + if the image urls aren't stored in the way this method
+    + expects them to be
+    +/
+    string[] getUrlsFromJson(string json)
+    {
+        string[] urls;
+        // Parse the json
+        JSONValue parsedJson = parseJSON(json);
+
+        // Extract the urls of the images
+        foreach(JSONValue val; parsedJson.array)
+            urls ~= val["url"].str.replace("\\", "");
+
+        return urls;
+    }
+
+    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
@ -31,7 +50,8 @@ protected:
    }

    /++
-    +
+    + This function returns a list of all the image urls by
+    + parsing the html
    +/
    override string[] getImageUrlsFromBase(string url)
    {
@ -71,7 +91,7 @@ protected:
    }

    /++
-    +
+    + This constructor just calls the inherited constructor
    +/
    public this(Config config)
    {
--- a/source/sites/nhentai.d
+++ b/source/sites/nhentai.d
@ -4,144 +4,71 @@ import config.downloaderconfig;
 import sites.basesite;

 /++
-+ This class handles downloads for the site `nhentai`
+ This class handles downloads for the site `nhentai.net`
 +/
 class NHentai : BaseSite
 {
-private
    import std.conv         : to;
+    import std.regex        : regex, match;
    import std.net.curl     : get;
-    import std.json     : JSONValue, parseJSON;
-    import std.array    : split;
+    import core.stdc.stdlib : exit, EXIT_FAILURE;

    /++
-    + This struct holds all the needed infos about the nhentai doujin
+    + This is the base url for all images
    +/
-    struct NHentai_Doujin_Info
-    {
-        /++
-        + This is the number of the manga
-        +/
-        string   number;
+    immutable string imageUrl = "https://i.nhentai.net/galleries/";

    /++
-        + This is the title of the the manga
+    + This function gets the name of the the manga by the url
    +/
-        string   title;
-
-        /++
-        + This array holds all the urls of the images
-        +/
-        string[] imageUrls;
-    }
-
-    /++
-    + This is the url of the nhentai api
-    + calls are made by the number of the manga
-    + for example "https://apis.nhent.ai/g/1"
-    +
-    + The returned json string contains all the info
-    + should be read into `NHenta_Doujin_Info`
-    +/
-    immutable string api_url = "https://apis.nhent.ai/g/";
-
-    /++
-    + This variable holds the class internal
-    + number of the manga
-    +/
-    string _number;
-
-    /++
-    + This struct contains all the needed infos
-    + to download the managa
-    +/
-    NHentai_Doujin_Info _nhentai_doujin_info;
-
-    /++
-    + This function extracts the number of the manga
-    + from the supplied url
-    +/
-    string extractNumFromUrl(string url)
-    {
-        string[] tmpString = url.split("/");
-        // FIXME: length could be unsigned so substract bad!
-        return tmpString[tmpString.length-2];
-    }
-
-    /++
-    + This function gets the info of of the doujin using the api
-    + it returns a struct with all the important info
-    +/
-    NHentai_Doujin_Info getDoujinInfo(string mangaNum)
-    {
-        NHentai_Doujin_Info _info;
-
-        // Craft the url
-        string requestUrl = api_url ~ mangaNum;
-
-        // Get the json data for the manga
-        string jsonData = to!string(get(requestUrl));
-
-        // Extract the image urls from the json string
-        _info.imageUrls = getUrlsFromJson(jsonData);
-
-        // Parse the data
-        auto parseData = parseJSON(jsonData);
-
-        // Get the title
-        _info.title = parseData["title"].str();
-
-        return _info;
-    }
-
-    /++
-    + If the class internal info struct is filled
-    + but the number is different `getDoujinInfo` gets
-    + called otherwise nothing happens
-    +/
-    void fetchInfoForManaga(string number)
-    {
-        // If the doujin info wasnt fetched fetch it now
-        if(_nhentai_doujin_info.number != number)
-        {
-            writeln("\nGetting info....");
-            // Fill the info
-            _nhentai_doujin_info = getDoujinInfo(number);
-            _nhentai_doujin_info.number = number;
-        }
-    }
-
-protected:
    override string getNameFromUrl(string url)
    {
-        // Extract the manga number
-        _number = extractNumFromUrl(url);
+        // Get the site content
+        string siteContent = to!string(get(url));

-        // Fetch manga infos
-        fetchInfoForManaga(_number);
+        // Find the name of the manga
+        auto nameRegex = `<h1>(.*)</h1>`.regex;
+        auto nameMatch = match(siteContent, nameRegex);

-        // Return the name of the managa
-        return _nhentai_doujin_info.title;
+        // Return only the name not the hmtl tags
+        return nameMatch.captures[1];
    }

    override string[] getImageUrlsFromBase(string url)
    {
-        // Fetch info if it wanst already fetched
-        fetchInfoForManaga(_number);
-
-        return _nhentai_doujin_info.imageUrls;
+        // Check if the url is a nhentai  url
+        if(indexOf(url, "/g/") == -1)
+        {
+            writefln(`[!] The given url doesn't contain "/g/" it was ignored!`);
+            // FIXME: no! :<
+            exit(EXIT_FAILURE);
        }

-    override string[] getUrlsFromJson(string json)
-    {
-        // Extract url from json
+        // Regex patterns for finding ulrs and stuff
+        auto contentIDRegex = "https://t.nhentai.net/galleries/([0-9].*)/cover.jpg";
+        auto pageCountRegex = "<div>([0-9].*) pages</div>";
+
+        // Download the hmtl
+        auto coverHtml = to!string(get(url));
+
+        // Find the content id
+        auto contentIDMatch = match(coverHtml, contentIDRegex).captures[1];
+
+        writeln(contentIDMatch);
+
+        // Find the number of pages
+        auto pageNumberMatch = match(coverHtml, pageCountRegex).captures[1];
+
+        // Convert the page number to an integer
+        immutable int pageNumber = to!int(pageNumberMatch);
+
+        // Generate a list of all the images
        string[] urls;
-
-        JSONValue parsedJson = parseJSON(json);
-
-        // Extract the urls for the images
-        foreach(JSONValue val; parsedJson["pages"].array())
-            urls ~= val.str().replace("i.bakaa.me", "i.nhentai.net");
+        for(int i = 1; i < pageNumber; i++)
+        {
+            // Craft the url with all parameters
+            urls ~= imageUrl ~ contentIDMatch ~ "/" ~ to!string(i) ~ ".jpg";
+        }

        return urls;
    }