“Eliminar los valores atípicos en DataFrame” Código de respuesta

Eliminar los valores atípicos python pandas

#------------------------------------------------------------------------------
# accept a dataframe, remove outliers, return cleaned data in a new dataframe
# see http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm
#------------------------------------------------------------------------------
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out
Handsome Hawk

Eliminar los valores atípicos en DataFrame

# Solution is based on this article: 
# http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm

import pandas as pd
import numpy as np

def remove_outliers_from_series(series):
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return series[(series > fence_low) & (series < fence_high)]


def remove_outliers_from_dataframe(self, df, col):
    q1 = df[col].quantile(0.25)
    q3 = df[col].quantile(0.75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return df.loc[(df[col] > fence_low) & (df[col] < fence_high)]


def remove_outliers_from_np_array(self, arr):
    q1 = np.percentile(arr, 25)
    q3 = np.percentile(arr, 75)
    intraquartile_range = q3 - q1
    fence_low  = q1 - 1.5 * intraquartile_range
    fence_high = q3 + 1.5 * intraquartile_range
    return arr[(arr > fence_low) & (arr < fence_high)]


def remove_outliers_from_python_list(self, _list):
    arr = np.array(_list)
    return list(remove_outliers_from_np_array(arr))


def remove_outliers(*args, **kwargs):
        if isinstance(args[0], pd.DataFrame):
            return remove_outliers_from_dataframe(*args, **kwargs)
        elif isinstance(args[0], pd.Series):
            return remove_outliers_from_series(*args, **kwargs)
        elif isinstance(args[0], np.ndarray):
            return remove_outliers_from_np_array(*args, **kwargs)
        elif isinstance(args[0], list):
            return remove_outliers_from_python_list(*args, **kwargs)
        else:
            raise TypeError(f'{type(args[0])} is not supported.')
Wrong Whale

Pandas de extracción de valores atípicos

df = pd.DataFrame(np.random.randn(100, 3))

from scipy import stats
df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]
Frantic Fox

Pandas eliminando valores atípicos de DataFrame

df[(df["col"] >= x ) & (df["col"] <= y )]

but it's more readable to use:

df[df["col"].between(x,y)]
rudythealchemist

Respuestas similares a “Eliminar los valores atípicos en DataFrame”

Preguntas similares a “Eliminar los valores atípicos en DataFrame”

Más respuestas relacionadas con “Eliminar los valores atípicos en DataFrame” en Python

Explore las respuestas de código populares por idioma

Explorar otros lenguajes de código