/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.compress.estim;

import java.util.HashMap;
import org.apache.commons.lang.NotImplementedException;
import org.apache.sysds.runtime.compress.CompressionSettings;
import org.apache.sysds.runtime.compress.colgroup.AColGroup;
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory;
import org.apache.sysds.runtime.compress.lib.BitmapEncoder;
import org.apache.sysds.runtime.compress.utils.ABitmap;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockMCSR;
import org.apache.sysds.runtime.data.SparseRow;
import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.util.UtilFunctions;

public class CompressedSizeEstimatorSample
extends CompressedSizeEstimator {
    private int[] _sampleRows;
    private MatrixBlock _sample;
    private HashMap<Integer, Double> _solveCache = null;

    public CompressedSizeEstimatorSample(MatrixBlock data, CompressionSettings cs, int sampleSize) {
        super(data, cs);
        this._sample = this.sampleData(sampleSize);
    }

    public MatrixBlock getSample() {
        return this._sample;
    }

    public MatrixBlock sampleData(int sampleSize) {
        MatrixBlock sampledMatrixBlock;
        this._sampleRows = CompressedSizeEstimatorSample.getSortedUniformSample(this._numRows, sampleSize, this._cs.seed);
        this._solveCache = new HashMap();
        if (this._data.isInSparseFormat() && !this._cs.transposed) {
            sampledMatrixBlock = new MatrixBlock(this._sampleRows.length, this._data.getNumColumns(), true);
            SparseRow[] rows = new SparseRow[this._sampleRows.length];
            SparseBlock in = this._data.getSparseBlock();
            for (int i = 0; i < this._sampleRows.length; ++i) {
                rows[i] = in.get(this._sampleRows[i]);
            }
            sampledMatrixBlock.setSparseBlock(new SparseBlockMCSR(rows, false));
            sampledMatrixBlock.recomputeNonZeros();
            this._transposed = true;
            sampledMatrixBlock = LibMatrixReorg.transposeInPlace(sampledMatrixBlock, 16);
        } else {
            MatrixBlock select = this._cs.transposed ? new MatrixBlock(this._data.getNumColumns(), 1, false) : new MatrixBlock(this._data.getNumRows(), 1, false);
            for (int i = 0; i < this._sampleRows.length; ++i) {
                select.appendValue(this._sampleRows[i], 0, 1.0);
            }
            sampledMatrixBlock = this._data.removeEmptyOperations(new MatrixBlock(), !this._cs.transposed, true, select);
        }
        if (sampledMatrixBlock.isEmpty()) {
            return null;
        }
        return sampledMatrixBlock;
    }

    @Override
    public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes, int nrUniqueUpperBound) {
        ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, this._sample, this._transposed);
        EstimationFactors sampleFacts = EstimationFactors.computeSizeEstimationFactors(ubm, false, colIndexes);
        AMapToData map = MapToFactory.create(ubm);
        EstimationFactors em = this.estimateCompressionFactors(sampleFacts, map, colIndexes, nrUniqueUpperBound);
        return new CompressedSizeInfoColGroup(em, this._cs.validCompressions, map);
    }

    @Override
    public CompressedSizeInfoColGroup estimateJoinCompressedSize(int[] joined, CompressedSizeInfoColGroup g1, CompressedSizeInfoColGroup g2) {
        int g1V = g1.getMap().getUnique();
        int g2V = g2.getMap().getUnique();
        int nrUniqueUpperBound = g1V * g2V;
        AMapToData map = MapToFactory.join(g1.getMap(), g2.getMap());
        EstimationFactors sampleFacts = EstimationFactors.computeSizeEstimation(joined, map, this._cs.validCompressions.contains((Object)AColGroup.CompressionType.RLE), map.size(), false);
        EstimationFactors em = this.estimateCompressionFactors(sampleFacts, map, joined, nrUniqueUpperBound);
        return new CompressedSizeInfoColGroup(em, this._cs.validCompressions, map);
    }

    private EstimationFactors estimateCompressionFactors(EstimationFactors sampleFacts, AMapToData map, int[] colIndexes, int nrUniqueUpperBound) {
        int numZerosInSample = sampleFacts.numRows - sampleFacts.numOffs;
        int sampleSize = this._sampleRows.length;
        if (numZerosInSample == sampleSize) {
            int nCol = sampleFacts.cols.length;
            int largestInstanceCount = this._numRows - 1;
            return new EstimationFactors(colIndexes, 1, 1, largestInstanceCount, new int[]{largestInstanceCount}, 2, 1, this._numRows, sampleFacts.lossy, true, 1.0 / (double)this._numRows, 1.0 / (double)nCol);
        }
        double scalingFactor = (double)this._numRows / (double)sampleSize;
        int totalCardinality = Math.max(map.getUnique(), Math.min(this._numRows, this.getEstimatedDistinctCount(sampleFacts.frequencies, nrUniqueUpperBound)));
        double C = Math.max(1.0 - (double)sampleFacts.numSingle / (double)sampleSize, (double)sampleSize / (double)this._numRows);
        int numNonZeros = Math.max((int)Math.floor((double)this._numRows - scalingFactor * C * (double)numZerosInSample), totalCardinality);
        int totalNumRuns = this.getNumRuns(map, sampleFacts.numVals, sampleSize, this._numRows, this._sampleRows);
        int largestInstanceCount = Math.min(this._numRows, (int)Math.floor((double)sampleFacts.largestOff * scalingFactor));
        return new EstimationFactors(colIndexes, totalCardinality, numNonZeros, largestInstanceCount, sampleFacts.frequencies, totalNumRuns, sampleFacts.numSingle, this._numRows, sampleFacts.lossy, sampleFacts.zeroIsMostFrequent, sampleFacts.overAllSparsity, sampleFacts.tupleSparsity);
    }

    private int getEstimatedDistinctCount(int[] frequencies, int upperBound) {
        return Math.min(SampleEstimatorFactory.distinctCount(frequencies, this._numRows, this._sampleRows.length, this._cs.estimationType, this._solveCache), upperBound);
    }

    private int getNumRuns(AMapToData map, int numVals, int sampleSize, int totalNumRows, int[] sampleRows) {
        return this._cs.validCompressions.contains((Object)AColGroup.CompressionType.RLE) && numVals > 0 ? CompressedSizeEstimatorSample.getNumRuns(map, sampleSize, this._numRows, this._sampleRows) : 0;
    }

    private static int getNumRuns(ABitmap ubm, int sampleSize, int totalNumRows, int[] sampleRows) {
        int numVals = ubm.getNumValues();
        double numRuns = 0.0;
        for (int vi = 0; vi < numVals; ++vi) {
            double nonOffsetProb;
            double additionalOffsets;
            int intervalSize;
            int intervalEnd;
            int[] offsets = ubm.getOffsetsList(vi).extractValues();
            int offsetsSize = ubm.getNumOffsets(vi);
            double offsetsRatio = (double)offsetsSize / (double)sampleSize;
            double avgAdditionalOffsets = offsetsRatio * (double)totalNumRows / (double)sampleSize;
            if (avgAdditionalOffsets < 1.0) {
                numRuns += (double)offsetsSize * (double)totalNumRows / (double)sampleSize;
                continue;
            }
            double prevNonOffsetProb = 1.0;
            boolean reachedSampleEnd = false;
            int intervalStart = -1;
            if (sampleRows[0] == 0) {
                intervalStart = 0;
            } else {
                intervalEnd = sampleRows[0];
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                intervalStart = intervalEnd;
                prevNonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
            }
            boolean withinSepRun = false;
            boolean seenNonOffset = false;
            boolean startedWithOffset = false;
            boolean endedWithOffset = false;
            int offsetsPtrs = 0;
            for (int ix = 1; ix < sampleSize; ++ix) {
                if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                    startedWithOffset = true;
                    ++offsetsPtrs;
                    endedWithOffset = true;
                } else {
                    seenNonOffset = true;
                    endedWithOffset = false;
                }
                while (intervalStart + 1 == sampleRows[ix]) {
                    intervalStart = sampleRows[ix];
                    if (seenNonOffset) {
                        if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                            withinSepRun = true;
                            ++offsetsPtrs;
                            endedWithOffset = true;
                        } else {
                            numRuns += (double)withinSepRun;
                            withinSepRun = false;
                            endedWithOffset = false;
                        }
                    } else if (offsetsPtrs < offsetsSize && offsets[offsetsPtrs] == intervalStart) {
                        ++offsetsPtrs;
                        endedWithOffset = true;
                    } else {
                        seenNonOffset = true;
                        endedWithOffset = false;
                    }
                    if (++ix != sampleSize) continue;
                    reachedSampleEnd = true;
                    break;
                }
                if (reachedSampleEnd) break;
                intervalEnd = sampleRows[ix];
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                nonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
                if (seenNonOffset) {
                    if (startedWithOffset) {
                        numRuns += prevNonOffsetProb;
                    }
                    if (endedWithOffset) {
                        numRuns += nonOffsetProb;
                    }
                } else {
                    numRuns += prevNonOffsetProb * nonOffsetProb;
                }
                prevNonOffsetProb = nonOffsetProb;
                intervalStart = intervalEnd;
                endedWithOffset = false;
                startedWithOffset = false;
                seenNonOffset = false;
                withinSepRun = false;
            }
            if (intervalStart != totalNumRows - 1) {
                intervalEnd = totalNumRows;
                intervalSize = intervalEnd - intervalStart - 1;
                additionalOffsets = offsetsRatio * (double)intervalSize;
                numRuns += ((double)intervalSize - additionalOffsets) * additionalOffsets / (double)intervalSize;
                nonOffsetProb = ((double)intervalSize - additionalOffsets) / (double)intervalSize;
            } else {
                nonOffsetProb = 1.0;
            }
            boolean bl = endedWithOffset = intervalStart == offsets[offsetsSize - 1];
            if (seenNonOffset) {
                if (startedWithOffset) {
                    numRuns += prevNonOffsetProb;
                }
                if (!endedWithOffset) continue;
                numRuns += nonOffsetProb;
                continue;
            }
            if (!endedWithOffset) continue;
            numRuns += prevNonOffsetProb * nonOffsetProb;
        }
        return (int)Math.min(Math.round(numRuns), Integer.MAX_VALUE);
    }

    private static int getNumRuns(AMapToData map, int sampleSize, int totalNumRows, int[] sampleRows) {
        throw new NotImplementedException("Not Supported ever since the ubm was replaced by the map");
    }

    private static int[] getSortedUniformSample(int range, int sampleSize, long seed) {
        return UtilFunctions.getSortedSampleIndexes(range, sampleSize, seed);
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(super.toString());
        sb.append(" sampleSize: ");
        sb.append(this._sampleRows.length);
        sb.append(" transposed: ");
        sb.append(this._transposed);
        sb.append(" cols: ");
        sb.append(this._numCols);
        sb.append(" rows: ");
        sb.append(this._numRows);
        return sb.toString();
    }
}

