|
|
@@ -7,10 +7,12 @@ use strict; |
|
|
|
|
|
|
|
use JSON; |
|
|
|
|
|
|
|
my $DOMAIN; |
|
|
|
my $THREAD_NO; |
|
|
|
my $thread_no; |
|
|
|
my $URL_PREFIX; |
|
|
|
|
|
|
|
my $FN; |
|
|
|
my $fn; |
|
|
|
my $OUT_DIR; |
|
|
|
|
|
|
|
my $USAGE; |
|
|
@@ -21,23 +23,34 @@ my %jh; |
|
|
|
my @a1; |
|
|
|
|
|
|
|
$DOMAIN = "lainchan.org"; |
|
|
|
# $URL_PREFIX = "https://lainchan.org/lit/src/"; |
|
|
|
$URL_PREFIX = "https://$DOMAIN/lit/src/"; |
|
|
|
$THREAD_NO = 4619; |
|
|
|
# $THREAD_NO = 6105; |
|
|
|
# $THREAD_NO = 4345; |
|
|
|
# $THREAD_NO = 4953; |
|
|
|
$FN = "$THREAD_NO.json"; |
|
|
|
$URL_PREFIX = "https://$DOMAIN/lit/res/"; |
|
|
|
$OUT_DIR = "./dl/"; |
|
|
|
|
|
|
|
|
|
|
|
$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id"; |
|
|
|
|
|
|
|
|
|
|
|
# die if no arguments |
|
|
|
die "$USAGE" unless scalar @ARGV > 0; |
|
|
|
$ACMD = $ARGV[0]; |
|
|
|
|
|
|
|
($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/; |
|
|
|
|
|
|
|
# die if didnt provide thread number |
|
|
|
die "$USAGE" unless $thread_no; |
|
|
|
|
|
|
|
|
|
|
|
# Download the JSON from lainchan with list |
|
|
|
# of book filenames |
|
|
|
do { |
|
|
|
my $url = $URL_PREFIX.$thread_no.".json"; |
|
|
|
printf("%s\n", $url); |
|
|
|
`wget $url -O $thread_no.json`; |
|
|
|
}; |
|
|
|
|
|
|
|
$fn = "$thread_no.json"; |
|
|
|
|
|
|
|
# Read JSON with list of files |
|
|
|
open FILE, "<", $FN or die "could not open file"; |
|
|
|
open FILE, "<", $fn or die "could not open file"; |
|
|
|
do{ |
|
|
|
my $json_str; |
|
|
|
|
|
|
@@ -50,7 +63,12 @@ do{ |
|
|
|
}; |
|
|
|
close FILE; |
|
|
|
|
|
|
|
# anonymous function that returns a list |
|
|
|
# Now we need to parse the JSON we just read |
|
|
|
# into an array of tuples |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# this anonymous function returns a list |
|
|
|
# of tuples of the below form: |
|
|
|
# (file_name, file_url) |
|
|
|
@a1 = sub{ |
|
|
@@ -61,24 +79,17 @@ close FILE; |
|
|
|
|
|
|
|
# filters for file types we |
|
|
|
# dont want to downloads |
|
|
|
sub f1 { |
|
|
|
return $_[0]->{"ext"} && !($_[0]->{"ext"} =~ /jpe?g/); |
|
|
|
} |
|
|
|
sub f2 { |
|
|
|
return !($_[0]->{"ext"} =~ /png/); |
|
|
|
} |
|
|
|
sub f3 { |
|
|
|
return !($_[0]->{"ext"} =~ /gif/); |
|
|
|
} |
|
|
|
sub f4 { |
|
|
|
return !($_[0]->{"ext"} =~ /webm/); |
|
|
|
} |
|
|
|
sub f0 { |
|
|
|
return f1($_[0]) && f2($_[0]) && f3($_[0]) && f4($_[0]) |
|
|
|
return $_[0]->{"ext"} && |
|
|
|
!($_[0]->{"ext"} =~ /jpe?g/) && |
|
|
|
!($_[0]->{"ext"} =~ /png/) && |
|
|
|
!($_[0]->{"ext"} =~ /gif/) && |
|
|
|
!($_[0]->{"ext"} =~ /webm/) |
|
|
|
} |
|
|
|
|
|
|
|
# create an array of files |
|
|
|
# that meet our file ext requirement |
|
|
|
# from the posts attribute |
|
|
|
@a0 = grep {f0($_)} @{$jh{"posts"}}; |
|
|
|
|
|
|
|
# do the same filter on the |
|
|
@@ -102,5 +113,5 @@ close FILE; |
|
|
|
|
|
|
|
# Print a list of wget commands from our tuples |
|
|
|
for my $i1 (@a1){ |
|
|
|
printf("wget -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]); |
|
|
|
printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]); |
|
|
|
} |