Hentai-Downloader/source/sites/hentaicafe.d

module sites.hentaicafe;

import config.downloaderconfig;
import sites.basesite;

/++
+ This class handles downloads for the site `hentai.cafe`
+/
class HentaiCafe : BaseSite
{
protected:
    import std.net.curl : get;
    import std.conv     : to;
    import std.regex    : regex, match;
    import core.stdc.stdlib : exit, EXIT_FAILURE;

    /++
    + This function gets the name of the the manga by the url
    +/
    override string getNameFromUrl(string url)
    {
        // Get the site html as a string
        string siteContent = to!string(get(url));

        // Find the name of the manga
        auto nameRegex = `<h3>(.*)</h3>`.regex;
        auto nameMatch = match(siteContent, nameRegex);

        // Return only the name not the html tags
        return nameMatch.captures[1];
    }

    /++
    +
    +/
    override string[] getImageUrlsFromBase(string url)
    {
        // Check if the url is a hentai.cafe comic url
        if(indexOf(url, "/hc.fyi/") == -1)
        {
            writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
            // FIXME: no! :<
            exit(EXIT_FAILURE);
        }

        // regex patterns for finding urls
        auto comicRegex     = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
        auto jsonInfoRegex  = `var pages = \[(.*)\]`.regex;

        // Get page html
        string comicHTML = to!string(get(url));

        // Find the url in the html mess
        auto comicUrlMatch = match(comicHTML, comicRegex);

        // Sanitize the url
        string comicURL = comicUrlMatch.captures[0];
        comicURL = split(comicURL, " ")[0].replace("\"", "");

        // Get the first manga page to extract the json with the page infos
        string mangaPageHTML = to!string(get(comicURL));

        // Get the json data of the page
        auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);
        string jsonData = jsonMatch.captures[0];

        // Sanitize json
        jsonData = split(jsonData, "=")[1];

        return getUrlsFromJson(jsonData);
    }

    /++
    +
    +/
    public this(Config config)
    {
        super(config);
    }
}
Initial commit 2020-04-13 13:19:14 -04:00			`module sites.hentaicafe;`

			`import config.downloaderconfig;`
			`import sites.basesite;`

			`/++`
			+ This class handles downloads for the site `hentai.cafe`
			`+/`
			`class HentaiCafe : BaseSite`
			`{`
			`protected:`
			`import std.net.curl : get;`
			`import std.conv : to;`
			`import std.regex : regex, match;`
			`import core.stdc.stdlib : exit, EXIT_FAILURE;`

			`/++`
			`+ This function gets the name of the the manga by the url`
			`+/`
			`override string getNameFromUrl(string url)`
			`{`
			`// Get the site html as a string`
			`string siteContent = to!string(get(url));`

			`// Find the name of the manga`
			auto nameRegex = `<h3>(.*)</h3>`.regex;
			`auto nameMatch = match(siteContent, nameRegex);`

			`// Return only the name not the html tags`
			`return nameMatch.captures[1];`
			`}`

			`/++`
			`+`
			`+/`
			`override string[] getImageUrlsFromBase(string url)`
			`{`
			`// Check if the url is a hentai.cafe comic url`
			`if(indexOf(url, "/hc.fyi/") == -1)`
			`{`
			writefln(`[!] The given url doesn't contain "/hc.fyi/" it was ignored!`);
			`// FIXME: no! :<`
			`exit(EXIT_FAILURE);`
			`}`

			`// regex patterns for finding urls`
			auto comicRegex = `\"(https://hentai.cafe/manga/read/.*)\" title`.regex;
			auto jsonInfoRegex = `var pages = \[(.*)\]`.regex;

			`// Get page html`
			`string comicHTML = to!string(get(url));`

			`// Find the url in the html mess`
			`auto comicUrlMatch = match(comicHTML, comicRegex);`

			`// Sanitize the url`
			`string comicURL = comicUrlMatch.captures[0];`
			`comicURL = split(comicURL, " ")[0].replace("\"", "");`

			`// Get the first manga page to extract the json with the page infos`
			`string mangaPageHTML = to!string(get(comicURL));`

			`// Get the json data of the page`
			`auto jsonMatch = match(mangaPageHTML, jsonInfoRegex);`
			`string jsonData = jsonMatch.captures[0];`

			`// Sanitize json`
			`jsonData = split(jsonData, "=")[1];`

			`return getUrlsFromJson(jsonData);`
			`}`

			`/++`
			`+`
			`+/`
			`public this(Config config)`
			`{`
			`super(config);`
			`}`
			`}`