bookdl.pl - cleaned up filter function. added code to download thread. added comments

This commit is contained in:
gashapwn 2021-03-29 13:21:34 +00:00
parent 40b68f35a6
commit 707b4522d2

View File

@ -7,10 +7,12 @@ use strict;
use JSON; use JSON;
my $DOMAIN;
my $THREAD_NO; my $THREAD_NO;
my $thread_no;
my $URL_PREFIX; my $URL_PREFIX;
my $FN; my $fn;
my $OUT_DIR; my $OUT_DIR;
my $USAGE; my $USAGE;
@ -21,23 +23,34 @@ my %jh;
my @a1; my @a1;
$DOMAIN = "lainchan.org"; $DOMAIN = "lainchan.org";
# $URL_PREFIX = "https://lainchan.org/lit/src/"; $URL_PREFIX = "https://$DOMAIN/lit/res/";
$URL_PREFIX = "https://$DOMAIN/lit/src/";
$THREAD_NO = 4619;
# $THREAD_NO = 6105;
# $THREAD_NO = 4345;
# $THREAD_NO = 4953;
$FN = "$THREAD_NO.json";
$OUT_DIR = "./dl/"; $OUT_DIR = "./dl/";
$USAGE = "Usage: bookdl.pl [http://someurl/]thread_id"; $USAGE = "Usage: bookdl.pl [http://someurl/]thread_id";
# die if no arguments
die "$USAGE" unless scalar @ARGV > 0; die "$USAGE" unless scalar @ARGV > 0;
$ACMD = $ARGV[0]; $ACMD = $ARGV[0];
($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/;
# die if didnt provide thread number
die "$USAGE" unless $thread_no;
# Download the JSON from lainchan with list
# of book filenames
do {
my $url = $URL_PREFIX.$thread_no.".json";
printf("%s\n", $url);
`wget $url -O $thread_no.json`;
};
$fn = "$thread_no.json";
# Read JSON with list of files # Read JSON with list of files
open FILE, "<", $FN or die "could not open file"; open FILE, "<", $fn or die "could not open file";
do{ do{
my $json_str; my $json_str;
@ -50,7 +63,12 @@ do{
}; };
close FILE; close FILE;
# anonymous function that returns a list # Now we need to parse the JSON we just read
# into an array of tuples
# this anonymous function returns a list
# of tuples of the below form: # of tuples of the below form:
# (file_name, file_url) # (file_name, file_url)
@a1 = sub{ @a1 = sub{
@ -61,24 +79,17 @@ close FILE;
# filters for file types we # filters for file types we
# dont want to downloads # dont want to downloads
sub f1 {
return $_[0]->{"ext"} && !($_[0]->{"ext"} =~ /jpe?g/);
}
sub f2 {
return !($_[0]->{"ext"} =~ /png/);
}
sub f3 {
return !($_[0]->{"ext"} =~ /gif/);
}
sub f4 {
return !($_[0]->{"ext"} =~ /webm/);
}
sub f0 { sub f0 {
return f1($_[0]) && f2($_[0]) && f3($_[0]) && f4($_[0]) return $_[0]->{"ext"} &&
!($_[0]->{"ext"} =~ /jpe?g/) &&
!($_[0]->{"ext"} =~ /png/) &&
!($_[0]->{"ext"} =~ /gif/) &&
!($_[0]->{"ext"} =~ /webm/)
} }
# create an array of files # create an array of files
# that meet our file ext requirement # that meet our file ext requirement
# from the posts attribute
@a0 = grep {f0($_)} @{$jh{"posts"}}; @a0 = grep {f0($_)} @{$jh{"posts"}};
# do the same filter on the # do the same filter on the
@ -102,5 +113,5 @@ close FILE;
# Print a list of wget commands from our tuples # Print a list of wget commands from our tuples
for my $i1 (@a1){ for my $i1 (@a1){
printf("wget -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]); printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]);
} }