lyadmin/perl-script/bookdl.pl

#!/usr/bin/perl

binmode STDOUT, ":utf8";

use warnings;
use strict;

use JSON;

my $DOMAIN;
my $THREAD_NO;
my $thread_no;
my $URL_PREFIX;

my $fn;
my $OUT_DIR;

my $USAGE;
my $ACMD;

my %jh;

my @a1;

$DOMAIN = "lainchan.org";
$URL_PREFIX = "https://$DOMAIN/lit/res/";
$OUT_DIR = "./dl/";

$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id";


# die if no arguments
die "$USAGE" unless scalar @ARGV > 0;
$ACMD = $ARGV[0];

($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/;

# die if didnt provide thread number
die "$USAGE" unless $thread_no;


# Download the JSON from lainchan with list
# of book filenames
do {
    my $url = $URL_PREFIX.$thread_no.".json";
    printf("%s\n", $url);
    `wget $url -O $thread_no.json`;
};

$fn = "$thread_no.json";

# Read JSON with list of files
open FILE, "<", $fn or die "could not open file";
do{
    my $json_str;

    local $/=undef;

    $json_str = <FILE>;
    chomp $json_str;

    %jh = %{JSON->new()->decode($json_str)};
};
close FILE;

# Now we need to parse the JSON we just read
# into an array of tuples


# this anonymous function returns a list
# of tuples of the below form:
#     (file_name, file_url)
@a1 = sub{
    my @a0;
    my @a2;

    my $f1;

    # filters for file types we
    # dont want to downloads
    sub f0 {
	return $_[0]->{"ext"} &&
	    !($_[0]->{"ext"} =~ /jpe?g/) &&
	    !($_[0]->{"ext"} =~ /png/)   &&
	    !($_[0]->{"ext"} =~ /gif/)   &&
	    !($_[0]->{"ext"} =~ /webm/)
    }

    # create an array of files
    # that meet our file ext requirement
    # from the posts attribute
    @a0 = grep {f0($_)} @{$jh{"posts"}};

    # do the same filter on the
    # extra_files attribute
    @a2 = grep {
	f0($_)
    } map {
	@{$_->{"extra_files"}}
    } grep {
	$_->{"extra_files"}
    } @{$jh{"posts"}};

    # Return our tuple
    return map {
	[
	 sprintf("%s%s", $_->{"filename"}, $_->{"ext"}),          # file_name
	 sprintf("%s%s%s", $URL_PREFIX, $_->{"tim"}, $_->{"ext"}) # file_url
	]
    } (@a0, @a2);
}->();

# Print a list of wget commands from our tuples
for my $i1 (@a1){
    printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]);
}