#!/usr/bin/perl
### dmmosaic.perl  -*- Perl -*-
## Create devmapper devices out of parts of other devices according to a map.

### Ivan Shmakov, 2022

## To the extent possible under law, the author(s) have dedicated
## all copyright and related and neighboring rights to this software
## to the public domain worldwide.  This software is distributed
## without any warranty.

## You should have received a copy of the CC0 Public Domain Dedication
## along with this software.  If not, see
## <http://creativecommons.org/publicdomain/zero/1.0/>.

### Commentary:

##  This tool is intended to be a part of a software package
##  currently in development that aims to facilitate maintenance
##  of incremental block-by-block archives on non-overwritable
##  media, such as recordable optical disks (DVD+R, CD-R, etc.)

##  The purpose of this tool proper is, given one or more archive
##  devices (such as /dev/sr0) and a map (such as produced by
##  xorriso, see below), to recreate the archived block devices
##  using the devmapper facility, available in Linux and NetBSD.

##  The file names encode the target devices to be created and the
##  512-byte block offsets of their respective chunks in Crockford
##  Base32, and should have the same number of Base32 digits, such
##  as DIR/EXAMPLE/00000000 (offset 0 for target device EXAMPLE),
##  00G00000 (offset 536870912) and 078W1FNF (offset 7814037167.)

##  Note that while the chunks may overlap, the behavior of this
##  tool with respect to such overlaps is not finalized as of yet.

##  The input map is checked for the lines formatted as follows;
##  any other lines are silently ignored.

##  Drive current: (anything)/dev/SOURCE-DEVICE
##  File data lba: 0 , OFFSET , (integer) , SIZE , (anything)/TARGET/ENC-OFF

##  OFFSET is in --block-size= byte blocks; the default being
##  --block-size=2048 (per ECMA 119.)  SIZE is in bytes and is rounded
##  up to an integral number of 512-byte blocks, as required by devmapper.
##  The ENC-OFF encoded target offset is in 512 byte blocks per above
##  and also as required by devmapper.

##  Assuming the use of ECMA 119 as the archival filesystem, example
##  usage could be as follows.

##  # xorriso -indev stdio:/dev/ARCHIVE-DEVICE \
##        -load sbsector 1234560 \
##        -find /ARCHIVE/DIRECTORY -exec report_lba -- \
##        2>&1 | dmmosaic --debug --dry-run 

##  The -load sbsector option is used to select the session on the
##  device; it may be omitted if there is only one.  The --debug
##  option turns on the output useful for troubleshooting; --no-debug
##  is the default.  The --dry-run option inhibits the dmsetup create
##  invocation and instead outputs the devmapper tables to stdout;
##  --no-dry-run is the default.

##  Note that neither this tool proper nor xorriso(1) require root
##  privileges and as such, can be run by a non-root user, or started
##  with no capabilities(7), such as by using setpriv(1) on Linux:

##  # setpriv --no-new-privs --inh-caps=-all --bounding-set=-all \
##        -- xorriso 

##  However, this tool in --no-dry-run mode (default) is ought to
##  invoke dmsetup(8), which does require certain capabilities to do
##  its task.

### History:

## 0.3  2022-11-29 08:15Z
##      (add_range): Fixed the patch level condition.  Show the existing
##      map entry with --debug.

## 0.2  2022-11-28 16:35:13Z
##      (sfn.WQoRtQuj4CewbQGy4W6WdFgE-RtCfJz-jiWcC54tj10.perl)
##      Support patch chunks and patch levels.  Chunk truncation is not
##      yet tested in this new implementation.
##      (safe_map): New function.
##      (add_range): Likewise.
##      (bsearch_map): Likewise.
##      ($entry_re): Allow for patch level suffixes.
##      (%acc): Now a hash of array references (was: of hash references.)

## 0.1  2022-11-08 17:20:37Z
##      (sfn.sqYntQ6AIEuBUsnHzB7TZa1Z6yJhjilV7DWfr-oVxP8.perl)
##      Initial revision.

### Code:

use common::sense;
use English qw (-no_match_vars);

require Getopt::Long;

our $enc_s
    = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
sub decode
{
    our $enc_s;
    my $r;
    for (my ($x) = @_; $x ne ""; ) {
        my $v
            = index ($enc_s, substr ($x, 0, 1));
        ## .
        return $r
            unless ($v >= 0);
        $r <<= 5;
        $r |= $v;
        substr ($x, 0, 1, "");
    }
    ## .
    $r;
}

sub safestr
{
    ## .
    s/[^ -~]/${ \sprintf ("\\%03o"); }/rg;
}

sub safe_map
{
    ## .
    join (", ", map {
        ## .
        join (" ", map {
            $_ //= "undef";
            ## .
            s/[^ ,-~]/${ \sprintf ("\\%03o"); }/rg;
        } (@$_));
    } (@_));
}

sub add_range
{
    my ($map, $o, $z, $new) = @_;
    our ($debug_p);

    warn ("D: map ", safe_map (@$map), "\n") if ($debug_p);
    warn ("D: adding ", join (" ", $z . "+" . $o, safe_map ($new)), "\n")
        if ($debug_p);

    ## NB: the first element of an entry is its offset
    if (1 > @$map) {
        @$map
            =  (($o > 0 ? ([ 0 ]) : ()),
                [ $o, @$new[1 .. $#$new] ],
                [ $z + $o ]);
        ## .
        return;
    }

    my ($lf, $rg)
        = map { bsearch_map ($map, $_); } ($o, $z + $o);
    warn ("D: adding at ", $lf, ":", $rg, "\n") if ($debug_p);
    ## NB: introduce ranges at $o and $o + $z sharp
    # if ($o < $map->[$lf]->[0]) {
    #     splice (@$map, $lf, 0, [ $o, @$new[1 .. $#$new] ]);
    #     ++$lf, ++$rg;
    # }
    if ($o > $map->[$lf]->[0]) {
        splice (@$map, 1 + $lf, 0,
                [ $o, @{$map->[$lf]}[1 .. $#{$map->[$lf]}] ]);
        warn ("D: map cloned an entry at #", $lf, "\n") if ($debug_p);
        ++$lf, ++$rg;
    }
    if ($z + $o > $map->[$rg]->[0]) {
        splice (@$map, 1 + $rg, 0,
                [ $z + $o, @{$map->[$rg]}[1 .. $#{$map->[$rg]}] ]);
        warn ("D: map cloned an entry at #", $rg, "\n") if ($debug_p);
        ++$rg;
    }
    warn ("D: adding at ", $lf, ":", $rg, "\n") if ($debug_p);

    ## NB: the second element of an entry is its patch level
    my $pl
        = $new->[1];
    for (my $k = $lf; $k < $rg; ++$k) {
        my $ent
            = $map->[$k];
        warn ("D: map at #", $k, ": ", safe_map ($map->[$k]),
              " (patch level ", $ent->[1], " vs. new ", $pl, ")\n")
            if ($debug_p);
        ## FIXME: this condition depends on the order of calls thence
        next
            if (1 < @$ent && $pl < $ent->[1]);
        my $k_o
            = $ent->[0];
        $map->[$k]
            =   [ $k_o,     @$new[1 .. $#$new] ];
        warn ("D: map changed at #", $k,
              ": ", safe_map ($map->[$k]), "\n") if ($debug_p);
    }
    ## .
    return;
}

sub bsearch_map
{
    my ($map, $val, $from, $to) = @_;
    $from
        //= 0;
    $to //= @$map;
    while ($from < -1 + $to) {
        my $i = (1 + $from + $to) >> 1;
        my $c;
        if (($c = ($map->[$i]->[0] <=> $val)) == 0) {
            ## .
            return $i;
        } elsif ($c < 0) {
            $from = $i;
        } else {
            $to = $i;
        }
    }
    ## .
    $from;
}

## main

Getopt::Long::Configure (qw (gnu_compat));

our ($debug_p);
my ($dry_run_p, $command_s, $filter_s)
    = (0, "dmsetup create --readonly --", "");
my ($blk_z)
    = (2048);

## FIXME: make configurable? currently handles xorriso(1) report_lba output
our $source_dev_re
    = qr {^Drive current:.*?(/dev/[^[:blank:]'"]+)};
our $dm_valid
    = "0-9a-zA-Z_.-";
our $entry_re = qr {
    ^ File\s data\s lba: \s* 0\s*,
      \s* ([0-9]+) \s*, \s* (?:[0-9]+) \s*,
      \s* ([0-9]+) \s*,
      .*/([${dm_valid}]+)/([${enc_s}]+)
      (?: \._ ([${enc_s}]{2}))?
    \b
}x;

my $parsable_p
    = Getopt::Long::GetOptions (q (block-size=i)    => \$blk_z,
                                q (debug!)      => \$debug_p,
                                "n|dry-run!"    => \$dry_run_p,
                                "e|execute=s"   => \$command_s,
                                q (filter=s)    => \$filter_s)
    or die ("Fatal: Cannot parse command line arguments");

die ("Fatal: --block-size=", $blk_z,
     " is not an positive integer divisible by 512")
    unless ($blk_z > 0 && 0 == $blk_z % 512);
$blk_z
    >>= 9;

our $filter_re
    = qr {${filter_s}};
""  =~ m {${filter_re}};

my ($s_dev, %acc);
while (<<>>) {
    chomp ();
    my ($dev)
        = m {${source_dev_re}}o;
    if (defined ($dev)) {
        $s_dev
            = $dev;
        next;
    }
    my ($so, $z, $ta, $ko, $pl_s)
        = m {${entry_re}}o;
    unless (defined ($ko)) {
        warn ("D: ", $_, ": Line not recognized; ignored")
            if ($debug_p);
        next;
    }
    my $pl
        = (defined ($pl_s) ? decode ($pl_s) : -1);
    warn ("D: ", join (" ", $so, $z, $ta, $ko, $pl, safestr ($_)))
        if ($debug_p);
    unless ($ta =~ m {${filter_re}}o) {
        warn ("D: ", $ta, ": Target rejected by the --filter= RE")
            if ($debug_p);
        next;
    }
    $acc{$ta}
        //= [ ];

    die ("Fatal: ", $ta, "/", $ko, ": Source device not known\n")
        unless (defined ($s_dev));

    my ($t_o, $b_z)
        =  (decode ($ko),
            $z > 0 ? 1 + ((-1 + $z) >> 9) : 0);
    ##  NB: the first element of @$new is set to $t_o in add_range;
    ##      the second is used as the patch level
    my $new
        = [ undef, $pl, $blk_z * $so - $t_o, $s_dev ];
    add_range ($acc{$ta}, $t_o, $b_z, $new);
}

my @command
    = split (" ", $command_s);

foreach my $ta (keys (%acc)) {
    my ($map, $dm)
        = ($acc{$ta});
    if (0 >= @$map) {
        warn ("Warning: ", $ta, ": Would have zero size; skipping\n");
        next;
    }
    warn ("D: map: ", join (", ", map { join (" ", @$_); } (@$map)))
        if ($debug_p);
    if ($dry_run_p) {
        $dm = \*STDOUT;
        $dm->print ("## ", $ta, "\n");
    } elsif (! open ($dm, "|-", @command, $ta, "/dev/stdin")) {
        warn ("Warning: ", $ta, ": ", $!, "; skipping\n");
        next;
    }

    for (my $i = 0; $i < -1 + @$map; ++$i) {
        my $ent
            = $map->[$i];
        my $z
            = $map->[1 + $i]->[0] - $ent->[0];
        warn ("D: ", join (", ", $z, @$ent)) if ($debug_p);
        if (1 >= @$ent) {
            $dm->printf ("%d %d zero \n", $ent->[0], $z);
            next;
        }
        $dm->printf ("%d %d linear %s %d \n",
            $ent->[0], $z, $ent->[3], $ent->[2] + $ent->[0]);
    }
    $dm->close ()
        unless ($dm eq \*STDOUT);
}

### dmmosaic.perl ends here
