From 707b4522d24729258442d1eb7c6a0b34ec3dc338 Mon Sep 17 00:00:00 2001 From: gashapwn Date: Mon, 29 Mar 2021 13:21:34 +0000 Subject: [PATCH] bookdl.pl - cleaned up filter function. added code to download thread. added comments --- perl-script/bookdl.pl | 61 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/perl-script/bookdl.pl b/perl-script/bookdl.pl index 652d295..b90a7b2 100644 --- a/perl-script/bookdl.pl +++ b/perl-script/bookdl.pl @@ -7,10 +7,12 @@ use strict; use JSON; +my $DOMAIN; my $THREAD_NO; +my $thread_no; my $URL_PREFIX; -my $FN; +my $fn; my $OUT_DIR; my $USAGE; @@ -21,23 +23,34 @@ my %jh; my @a1; $DOMAIN = "lainchan.org"; -# $URL_PREFIX = "https://lainchan.org/lit/src/"; -$URL_PREFIX = "https://$DOMAIN/lit/src/"; -$THREAD_NO = 4619; -# $THREAD_NO = 6105; -# $THREAD_NO = 4345; -# $THREAD_NO = 4953; -$FN = "$THREAD_NO.json"; +$URL_PREFIX = "https://$DOMAIN/lit/res/"; $OUT_DIR = "./dl/"; - $USAGE = "Usage: bookdl.pl [http://someurl/]thread_id"; + +# die if no arguments die "$USAGE" unless scalar @ARGV > 0; $ACMD = $ARGV[0]; +($thread_no) = $ACMD =~ /.*\/([0-9]+).{0,5}/; + +# die if didnt provide thread number +die "$USAGE" unless $thread_no; + + +# Download the JSON from lainchan with list +# of book filenames +do { + my $url = $URL_PREFIX.$thread_no.".json"; + printf("%s\n", $url); + `wget $url -O $thread_no.json`; +}; + +$fn = "$thread_no.json"; + # Read JSON with list of files -open FILE, "<", $FN or die "could not open file"; +open FILE, "<", $fn or die "could not open file"; do{ my $json_str; @@ -50,7 +63,12 @@ do{ }; close FILE; -# anonymous function that returns a list +# Now we need to parse the JSON we just read +# into an array of tuples + + + +# this anonymous function returns a list # of tuples of the below form: # (file_name, file_url) @a1 = sub{ @@ -61,24 +79,17 @@ close FILE; # filters for file types we # dont want to downloads - sub f1 { - return $_[0]->{"ext"} && !($_[0]->{"ext"} =~ /jpe?g/); - } - sub f2 { - return !($_[0]->{"ext"} =~ /png/); - } - sub f3 { - return !($_[0]->{"ext"} =~ /gif/); - } - sub f4 { - return !($_[0]->{"ext"} =~ /webm/); - } sub f0 { - return f1($_[0]) && f2($_[0]) && f3($_[0]) && f4($_[0]) + return $_[0]->{"ext"} && + !($_[0]->{"ext"} =~ /jpe?g/) && + !($_[0]->{"ext"} =~ /png/) && + !($_[0]->{"ext"} =~ /gif/) && + !($_[0]->{"ext"} =~ /webm/) } # create an array of files # that meet our file ext requirement + # from the posts attribute @a0 = grep {f0($_)} @{$jh{"posts"}}; # do the same filter on the @@ -102,5 +113,5 @@ close FILE; # Print a list of wget commands from our tuples for my $i1 (@a1){ - printf("wget -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]); + printf("wget -nv -nc %s -O '%s%s'\n", scalar $i1->[1], $OUT_DIR, scalar $i1->[0]); }