Source code for trisicell.pp._binary

import trisicell as tsc


def binarym_filter_private_mutations(df):
    df.drop(df.columns[df.sum() == 1], axis=1, inplace=True)


def binarym_filter_clonal_mutations(df):
    x = (df == 1).sum()
    x = x[x == df.shape[0]]
    df.drop(x.index, axis=1, inplace=True)


def binarym_filter_nonsense_mutations(df, alt_in=2, ref_in=1):
    df.drop(
        df.columns[
            ~(
                ((df == 1).sum() >= alt_in)
                & (
                    ((df == 0).sum() >= ref_in)
                    | ((df == 1).sum() == df.shape[0])
                    | ((df == 1).sum() >= (df == 3).sum())
                )
            )
        ],
        axis=1,
        inplace=True,
    )


def binarym_statistics(df):
    t = df.shape[0] * df.shape[1]
    a = (df == 0).sum().sum()
    b = (df == 1).sum().sum()
    d = (df == 3).sum().sum()
    tsc.logg.info(f"size = {df.shape[0]} × {df.shape[1]}")
    tsc.logg.info(f"    REF     = {a:6d} ({100*a/t:2.1f}%)")
    tsc.logg.info(f"    HET     = {b:6d} ({100*b/t:2.1f}%)")
    tsc.logg.info(f"    UNKNOWN = {d:6d} ({100*d/t:2.1f}%)")


[docs]def consensus_combine(df): """Combine cells in genotype matrix. This function combines the replicates or cells that have the same origin prior to running Trisicell-Cons. The replicates or cells that are supposed to be merged must be designated with `_`. For instance: input: {`{Cell1}_{ID1}`, `{Cell1}_{ID2}`, `{Cell2}_{ID1}`, `{Cell2}_{ID2}`}. output: {`{Cell1}`, `{Cell2}`}. Parameters ---------- df : :class:`pandas.DataFrame` The input genotype matrix in conflict-free format. Returns ------- :class:`pandas.DataFrame` The combine genotype matrix. """ df2 = df.groupby(df.index.str.split("_").str[0]).transform("prod") df2 = df2.groupby(df2.index.str.split("_").str[0]).first() return df2