Some text some message..
Back Steps to Address Skewness in Numerical Data 06 Jul, 2024

import pandas as pd

import numpy as np

from scipy.stats import skew

 

# Sample DataFrame

data = {

    'Category': ['A', 'B', 'A', 'B', 'A'],

    'Numeric1': [100, 200, 300, 400, 500],

    'Numeric2': [10, 20, 30, 40, 50]

}

df = pd.DataFrame(data)

# Select numerical columns for skewness check and correction

numerical_cols =  df.select_dtypes(include=['int64', 'float64']).columns

# Check skewness of numerical columns

skewness = df[numerical_cols].apply(lambda x: skew(x))

print("Skewness of numerical columns:")

print(skewness)

# Correct skewness if necessary (e.g., apply log transformation)

skewed_cols = skewness[skewness > 0.75].index  # Example threshold for skewness

for col in skewed_cols:

    df[col] = np.log1p(df[col])  # Applying log transformation (log1p to handle zero values)

# Check skewness again after transformation

skewness_corrected = df[numerical_cols].apply(lambda x: skew(x))

print("\nSkewness after transformation:")

print(skewness_corrected)