#!/usr/bin/perl
### blora-optimize.perl  -*- Perl -*-
## Reduce the number of ranges in an mtree(8)-like range map file.

### Ivan Shmakov, 2022

## To the extent possible under law, the author(s) have dedicated
## all copyright and related and neighboring rights to this software
## to the public domain worldwide.  This software is distributed
## without any warranty.

## You should have received a copy of the CC0 Public Domain Dedication
## along with this software.  If not, see
## <http://creativecommons.org/publicdomain/zero/1.0/>.

### History:

## 0.1  2022-11-15
##      Initial revision.

### Code:

use common::sense;
use English qw (-no_match_vars);

require Getopt::Long;

our $enc_s
    = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
sub decode
{
    our $enc_s;
    my $r;
    for (my ($x) = @_; $x ne ""; ) {
        my $v
            = index ($enc_s, substr ($x, 0, 1));
        ## .
        return $r
            unless ($v >= 0);
        $r <<= 5;
        $r |= $v;
        substr ($x, 0, 1, "");
    }
    ## .
    $r;
}

sub safestr
{
    ## .
    ($_[0] =~ s/[^ -~]/${ \sprintf ("\\%03o", ord ($&)); }/rg);
}

sub gap_fac
{
    our ($min_ra);
    ## .
    return 0
        unless (@_ >= $min_ra);

    my ($sta, $zsu, $sum, $ssq, $min, $m_i)
        = ($_[0]->[0] + $_[0]->[1], $_[0]->[1], 0, 0);
    shift (@_);
    for (my $i = 0; $i < @_; ++$i) {
        my $ent = $_[$i];
        my $g = $ent->[0] - $sta;
        ($min, $m_i)
            = ($g, $i)
            if (! defined ($min) || $g < $min);
        $sta  = $ent->[0] + $ent->[1];
        $zsu += $ent->[1];
        $sum +=  $g;
        $ssq += ($g * $g);
    }

    my $tot = ($zsu + $sum);
    my $gra = ($sum / $tot);
    our ($nom_z);
    my $nom =  ($tot < 0x8000 ? 0x4000
                : ($tot >> 1) < $nom_z ? ($tot >> 1)
                : $nom_z);
    my $a_c = $zsu / (1 + @_);
    my $a_g = $sum / @_;
    my $r_g = (sqrt (($ssq - $sum * $sum / @_) / @_) / $a_g);
    our ($debug_p);
    warn ("D: gap size ", $sum, " (", $gra, ") of ", $tot,
          " avg. chunk ", $a_c, " nom. ", $nom,
          " rel. gap avg. ", $a_g, " rms ", $r_g,
          " weakest [", $m_i, "]\n")
        if ($debug_p);
    ## .
    (.5 * exp (log (2) * (1 - $a_c / $nom)) + .5 * $r_g,
     $m_i);
}

our ($a_k, $a_z, @acc)
    = ();
sub may_be_print
{
    ## .
    return
        unless (@acc > 0);
    our ($debug_p);
    my @so
        = sort { $a->[0] <=> $b->[0]; } (@acc);
    my ($gaf, $ga_i)
        = gap_fac (@so);
    warn ("D: ", scalar (@so), " entries",
          " (gap factor: ", $gaf, ") for ", $a_k, "\n")
        if ($debug_p);
    while ($gaf >= 1) {
        my $w
            = $so[$ga_i];
        my $z
            = ($so[1 + $ga_i]->[0] + $so[1 + $ga_i]->[1] - $w->[0]);
        my $new
            = [ $w->[0], $z,
                ($w->[2] =~ s/(\ssize=)[0-9]+(\s)/${1}${z}${2}/r) ];
        splice (@so, $ga_i, 2, $new);
        ## FIXME: recomputing this every iteration may be slow
        ($gaf, $ga_i)
            = gap_fac (@so);
        warn ("D: ", scalar (@so), " entries",
              " (gap factor: ", $gaf, ") for ", $a_k, "\n")
            if ($debug_p);
    }
    our ($weight);
    foreach my $ent (@so) {
        print ($ent->[2], "xo-w=", $weight, " \n");
    }
    $a_z
        = ($so[$#so]->[0] + $so[$#so]->[1]);
    ## .
    STDOUT->flush ();
}

our ($p_dn, %zca)
    = ();
sub pr_z
{
    ##  NB: it is possible to specify size once for all the snapshots
    ##      by omitting the -xTIMESTAMP directory name suffix
    my $z_e
        = $zca{$p_dn} // $zca{($p_dn =~ s/-x[0-9a-f]+$//r)};
    our ($debug_p);
    warn ("D: ", $p_dn, " size ", join (", ", $a_z, @{$z_e // [ ]}), "\n")
        if ($debug_p);
    our ($weight);
    ## .
    print ($p_dn, $z_e->[1], "xo-w=", $weight, " \n")
        if (defined ($z_e) && $z_e->[0] > $a_z);
}

## main

Getopt::Long::Configure (qw (gnu_compat));

our ($debug_p)
    = (0);
our ($weight)
    = (0x100000);
our ($min_ra, $nom_z)
    = (2, 0x1000000);

my $parsable_p
    = Getopt::Long::GetOptions (q (debug!)      => \$debug_p,
                                q (min-ranges=i)    => \$min_ra,
                                q (nominal=s)   => \$nom_z,
                                q (nominal-size=s)  => \$nom_z)
    or die ("Fatal: Cannot parse command line arguments");

die ("--min-ranges= should be at least 2")
    unless ($min_ra >= 2);
die ("--nominal-size= should be a positive number")
    unless ($nom_z > 0);

while (<<>>) {
    s/\S$/$& /;
    if (m {^/set\s()|\stype=(\S+)\s}
        && (defined ($1) || "file" ne $2)) {
        print ($_);
        next;
    }
    chomp ();

    ## FIXME: removing leading zeros assuming size under 2 ^ 40 blocks
    s {^(\S*/)0+([${enc_s}]{8})\b}{$1$2}o if (1);
    my ($en, $dn, $bn)
        = m {^((\S*?)/([${enc_s}]+))\s}o
        or die ("Input line format not recognized");
    pos ()
        = (-1 + length ($&));
    my ($en_safe, $kv)
        = (safestr ($en), substr ($_, pos ()));
    my %kv
        = /\G\s+(\S+)=(\S*)/gc;
    die ($en_safe, ": Trailing data? ", safestr (substr ($_, pos ())))
        if (/\G\s*\S/);
    my ($z, $c_o, $c_f)
        =  (($kv{"size"} =~ /^([0-9]+)$/),
            ($kv{"cut-out"} =~ /^\+([0-9]+),(.*)$/));
    my $o
        = (decode ($bn) << 9);
    unless (defined ($c_o)) {
        die ("Non-zero size (", $z, ") and no cut-out=?")
            unless (0 eq $z);
        $zca{$dn}
            = [ $o, s,^.*/,/,r ];
        next;
    }
    my $key
        = ($o - $c_o) . "," . $c_f;
    if ($a_k ne $key) {
        may_be_print ();
        ($a_k, @acc)
            = ($key);
    }
    push (@acc, [ $o, $z, $_ ]);
    unless ($p_dn eq $dn) {
        pr_z ();
        $p_dn = $dn;
        --$weight;
    }
}
may_be_print ();
pr_z ();

### blora-optimize.perl ends here
