2021-03-07 22:06:33 -05:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
|
|
|
binmode STDOUT, ":utf8";
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
use JSON;
|
|
|
|
|
2021-03-29 09:21:34 -04:00
|
|
|
my $DOMAIN;
|
2021-03-07 22:06:33 -05:00
|
|
|
my $THREAD_NO;
|
2021-03-29 09:21:34 -04:00
|
|
|
my $thread_no;
|
2021-03-07 22:06:33 -05:00
|
|
|
my $URL_PREFIX;
|
|
|
|
|
2021-03-29 09:21:34 -04:00
|
|
|
my $fn;
|
2021-03-07 22:06:33 -05:00
|
|
|
my $OUT_DIR;
|
|
|
|
|
2021-03-23 22:38:55 -04:00
|
|
|
my $USAGE;
|
|
|
|
my $ACMD;
|
|
|
|
|
2021-03-07 22:06:33 -05:00
|
|
|
my %jh;
|
|
|
|
|
|
|
|
my @a1;
|
|
|
|
|
2021-03-23 22:38:55 -04:00
|
|
|
$DOMAIN = "lainchan.org";
|
2021-03-29 09:21:34 -04:00
|
|
|
$URL_PREFIX = "https://$DOMAIN/lit/res/";
|
2021-03-07 22:06:33 -05:00
|
|
|
$OUT_DIR = "./dl/";
|
|
|
|
|
2021-03-23 22:38:55 -04:00
|
|
|
$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id";
|
|
|
|
|
2021-03-29 09:21:34 -04:00
|
|
|
|
|
|
|
# die if no arguments
|
2021-03-23 22:38:55 -04:00
|
|
|
die "$USAGE" unless scalar @ARGV > 0;
|
|
|
|
$ACMD = $ARGV[0];
|
|
|
|
|
2021-03-29 09:21:34 -04:00
|
|
|
($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/;
|
|
|
|
|
|
|
|
# die if didnt provide thread number
|
|
|
|
die "$USAGE" unless $thread_no;
|
|
|
|
|
|
|
|
|
|
|
|
# Download the JSON from lainchan with list
|
|
|
|
# of book filenames
|
|
|
|
do {
|
|
|
|
my $url = $URL_PREFIX.$thread_no.".json";
|
|
|
|
printf("%s\n", $url);
|
|
|
|
`wget $url -O $thread_no.json`;
|
|
|
|
};
|
|
|
|
|
|
|
|
$fn = "$thread_no.json";
|
|
|
|
|
2021-03-07 22:06:33 -05:00
|
|
|
# Read JSON with list of files
|
2021-03-29 09:21:34 -04:00
|
|
|
open FILE, "<", $fn or die "could not open file";
|
2021-03-07 22:06:33 -05:00
|
|
|
do{
|
|
|
|
my $json_str;
|
|
|
|
|
|
|
|
local $/=undef;
|
|
|
|
|
|
|
|
$json_str = <FILE>;
|
|
|
|
chomp $json_str;
|
|
|
|
|
|
|
|
%jh = %{JSON->new()->decode($json_str)};
|
|
|
|
};
|
|
|
|
close FILE;
|
|
|
|
|
2021-03-29 09:21:34 -04:00
|
|
|
# Now we need to parse the JSON we just read
|
|
|
|
# into an array of tuples
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# this anonymous function returns a list
|
2021-03-07 22:06:33 -05:00
|
|
|
# of tuples of the below form:
|
|
|
|
# (file_name, file_url)
|
|
|
|
@a1 = sub{
|
|
|
|
my @a0;
|
|
|
|
my @a2;
|
|
|
|
|
|
|
|
my $f1;
|
|
|
|
|
|
|
|
# filters for file types we
|
|
|
|
# dont want to downloads
|
|
|
|
sub f0 {
|
2021-03-29 09:21:34 -04:00
|
|
|
return $_[0]->{"ext"} &&
|
|
|
|
!($_[0]->{"ext"} =~ /jpe?g/) &&
|
|
|
|
!($_[0]->{"ext"} =~ /png/) &&
|
|
|
|
!($_[0]->{"ext"} =~ /gif/) &&
|
|
|
|
!($_[0]->{"ext"} =~ /webm/)
|
2021-03-07 22:06:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
# create an array of files
|
|
|
|
# that meet our file ext requirement
|
2021-03-29 09:21:34 -04:00
|
|
|
# from the posts attribute
|
2021-03-07 22:06:33 -05:00
|
|
|
@a0 = grep {f0($_)} @{$jh{"posts"}};
|
|
|
|
|
|
|
|
# do the same filter on the
|
|
|
|
# extra_files attribute
|
|
|
|
@a2 = grep {
|
|
|
|
f0($_)
|
|
|
|
} map {
|
|
|
|
@{$_->{"extra_files"}}
|
|
|
|
} grep {
|
|
|
|
$_->{"extra_files"}
|
|
|
|
} @{$jh{"posts"}};
|
|
|
|
|
|
|
|
# Return our tuple
|
|
|
|
return map {
|
|
|
|
[
|
|
|
|
sprintf("%s%s", $_->{"filename"}, $_->{"ext"}), # file_name
|
|
|
|
sprintf("%s%s%s", $URL_PREFIX, $_->{"tim"}, $_->{"ext"}) # file_url
|
|
|
|
]
|
|
|
|
} (@a0, @a2);
|
|
|
|
}->();
|
|
|
|
|
|
|
|
# Print a list of wget commands from our tuples
|
|
|
|
for my $i1 (@a1){
|
2021-03-29 09:21:34 -04:00
|
|
|
printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]);
|
2021-03-07 22:06:33 -05:00
|
|
|
}
|