lyadmin/perl-script/bookdl.pl

#!/usr/bin/perl

binmode STDOUT, ":utf8";

use warnings;
use strict;

use JSON;

my $DOMAIN;
my $THREAD_NO;
my $thread_no;
my $URL_PREFIX;

my $fn;
my $OUT_DIR;

my $USAGE;
my $ACMD;

my %jh;

my @a1;

$DOMAIN = "lainchan.org";
$URL_PREFIX = "https://$DOMAIN/lit/res/";
$OUT_DIR = "./dl/";

$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id";


# die if no arguments
die "$USAGE" unless scalar @ARGV > 0;
$ACMD = $ARGV[0];

($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/;

# die if didnt provide thread number
die "$USAGE" unless $thread_no;


# Download the JSON from lainchan with list
# of book filenames
do {
    my $url = $URL_PREFIX.$thread_no.".json";
    printf("%s\n", $url);
    `wget $url -O $thread_no.json`;
};

$fn = "$thread_no.json";

# Read JSON with list of files
open FILE, "<", $fn or die "could not open file";
do{
    my $json_str;

    local $/=undef;
    
    $json_str = <FILE>;
    chomp $json_str;
    
    %jh = %{JSON->new()->decode($json_str)};
};
close FILE;

# Now we need to parse the JSON we just read
# into an array of tuples


# this anonymous function returns a list
# of tuples of the below form:
#     (file_name, file_url)
@a1 = sub{
    my @a0;
    my @a2;

    my $f1;

    # filters for file types we
    # dont want to downloads
    sub f0 {
	return $_[0]->{"ext"} &&
	    !($_[0]->{"ext"} =~ /jpe?g/) &&
	    !($_[0]->{"ext"} =~ /png/)   &&
	    !($_[0]->{"ext"} =~ /gif/)   &&
	    !($_[0]->{"ext"} =~ /webm/)
    }

    # create an array of files
    # that meet our file ext requirement
    # from the posts attribute
    @a0 = grep {f0($_)} @{$jh{"posts"}};

    # do the same filter on the
    # extra_files attribute
    @a2 = grep {
	f0($_)
    } map {
	@{$_->{"extra_files"}}
    } grep {
	$_->{"extra_files"}
    } @{$jh{"posts"}};

    # Return our tuple
    return map {
	[
	 sprintf("%s%s", $_->{"filename"}, $_->{"ext"}),          # file_name
	 sprintf("%s%s%s", $URL_PREFIX, $_->{"tim"}, $_->{"ext"}) # file_url
	]
    } (@a0, @a2);
}->();

# Print a list of wget commands from our tuples
for my $i1 (@a1){
    printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]);
}
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`#!/usr/bin/perl`

			`binmode STDOUT, ":utf8";`

			`use warnings;`
			`use strict;`

			`use JSON;`

bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`my $DOMAIN;`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`my $THREAD_NO;`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`my $thread_no;`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`my $URL_PREFIX;`

bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`my $fn;`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`my $OUT_DIR;`

bookdl.pl - added URL_PREFIX... user_cleanup.pl - started script for delteting users who clear out their homedir 2021-03-23 22:38:55 -04:00			`my $USAGE;`
			`my $ACMD;`

added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`my %jh;`

			`my @a1;`

bookdl.pl - added URL_PREFIX... user_cleanup.pl - started script for delteting users who clear out their homedir 2021-03-23 22:38:55 -04:00			`$DOMAIN = "lainchan.org";`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`$URL_PREFIX = "https://$DOMAIN/lit/res/";`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`$OUT_DIR = "./dl/";`

bookdl.pl - added URL_PREFIX... user_cleanup.pl - started script for delteting users who clear out their homedir 2021-03-23 22:38:55 -04:00			`$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id";`

bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00
			`# die if no arguments`
bookdl.pl - added URL_PREFIX... user_cleanup.pl - started script for delteting users who clear out their homedir 2021-03-23 22:38:55 -04:00			`die "$USAGE" unless scalar @ARGV > 0;`
			`$ACMD = $ARGV[0];`

bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/;`

			`# die if didnt provide thread number`
			`die "$USAGE" unless $thread_no;`


			`# Download the JSON from lainchan with list`
			`# of book filenames`
			`do {`
			`my $url = $URL_PREFIX.$thread_no.".json";`
			`printf("%s\n", $url);`
			`wget $url -O $thread_no.json`;
			`};`

			`$fn = "$thread_no.json";`

added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`# Read JSON with list of files`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`open FILE, "<", $fn or die "could not open file";`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`do{`
			`my $json_str;`

			`local $/=undef;`

			`$json_str = <FILE>;`
			`chomp $json_str;`

			`%jh = %{JSON->new()->decode($json_str)};`
			`};`
			`close FILE;`

bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`# Now we need to parse the JSON we just read`
			`# into an array of tuples`



			`# this anonymous function returns a list`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`# of tuples of the below form:`
			`# (file_name, file_url)`
			`@a1 = sub{`
			`my @a0;`
			`my @a2;`

			`my $f1;`

			`# filters for file types we`
			`# dont want to downloads`
			`sub f0 {`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`return $_[0]->{"ext"} &&`
			`!($_[0]->{"ext"} =~ /jpe?g/) &&`
			`!($_[0]->{"ext"} =~ /png/) &&`
			`!($_[0]->{"ext"} =~ /gif/) &&`
			`!($_[0]->{"ext"} =~ /webm/)`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`}`

			`# create an array of files`
			`# that meet our file ext requirement`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`# from the posts attribute`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`@a0 = grep {f0($_)} @{$jh{"posts"}};`

			`# do the same filter on the`
			`# extra_files attribute`
			`@a2 = grep {`
			`f0($_)`
			`} map {`
			`@{$_->{"extra_files"}}`
			`} grep {`
			`$_->{"extra_files"}`
			`} @{$jh{"posts"}};`

			`# Return our tuple`
			`return map {`
			`[`
			`sprintf("%s%s", $_->{"filename"}, $_->{"ext"}), # file_name`
			`sprintf("%s%s%s", $URL_PREFIX, $_->{"tim"}, $_->{"ext"}) # file_url`
			`]`
			`} (@a0, @a2);`
			`}->();`

			`# Print a list of wget commands from our tuples`
			`for my $i1 (@a1){`
bookdl.pl - cleaned up filter function. added code to download thread. added comments 2021-03-29 09:21:34 -04:00			`printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]);`
added bookdl.pl for downloading books from lainchan 2021-03-07 22:06:33 -05:00			`}`