#!/usr/bin/env bash set -eo pipefail image="$1"; [ -z "$1" ] && { echo "No image file specified." >&2; exit 1; } inspect () { ( #Jerry-rig error handling onto debugfs { debugfs -R "$*" "$image" 1>&3; } 2>&1 | tail -n +2 |\ { if IFS= read -r line; then printf '%s\n' "$line" cat; exit 1 fi; } >&2 ) 3>&1 | cat #Never invoke pager. } BLOCKSIZE=`inspect stats | sed -nE 's/^Block size:\s+([0-9]+)$/\1/p'` #Prefer same filesystem for efficiency on large files TMPDIR="$(mktemp -d "`pwd`/.pool.XXXXXXXX")" || "$(mktemp -d)" || exit 1 trap 'rm -rf -- "$TMPDIR"' EXIT size () { inspect stat "$1" \ | head -n3 \ | sed -nE 's/.*Size: ([0-9]+).*/\1/p' } extents () { remainder=$((`size $1`%BLOCKSIZE)) inspect dump_extents "$1" \ | tail -n +2 \ | awk '{print $5, $8, $11}' \ | sed '$s/$/ '$remainder'/' } THRESHOLD=${THRESHOLD:-$((1024*1024))} listall () { if [ $# -eq 0 ]; then listall "`inspect ls -p /`" wait; return fi perl -p -e 's!/\n$!/\x0!g' <<< "$1" \ | while IFS=/ read -r -d $'\0' _ inode itype _ _ name isize; do case ${itype:0:3} in '100') [ $isize -ge $THRESHOLD ] && echo $inode $isize;; '040') [[ "${name}" != @(.|..) ]] && { echo "Recursing into $name" >&2; listall "`inspect ls -p "<$inode>"`" & };; esac done } mkdir -p pool { echo -e "#BLOCKSIZE=$BLOCKSIZE\n#INODE SHA256SUM\n#INDEX BLOCK LENGTH [REMAINDER]" listall \ | while IFS=' ' read -r -d $'\n' inode isize; do #TODO: Make asynchronus tmp="$(mktemp -p "$TMPDIR")" sha=`inspect dump "<$inode>" /dev/stdout \ | tee "$tmp" \ | sha256sum \ | cut -d' ' -f1` [ -f pool/$sha ] \ && rm -f "$tmp" \ || mv -v "$tmp" pool/$sha 1>&2 echo $inode $sha extents "<$inode>" done } > ${2:-/dev/stdout}