Source code for geoanalytics.normalization.RootTransformation
# RootTransformation Class for Feature Normalization Using Root Scaling
# **Importing and Using the RootTransformation Class in a Python Program**
#
# import pandas as pd
#
# from geoanalytics.normalization import RootTransformation
#
# df = pd.read_csv("input.csv")
#
# transformer = RootTransformation(df, root=3)
#
# normalized_df = transformer.run()
#
# transformer.getRuntime()
#
# transformer.getMemoryUSS()
#
# transformer.getMemoryRSS()
#
# transformer.save("RootTransformation.csv")
#
__copyright__ = """
Copyright (C) 2022 Rage Uday Kiran
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
import time
import psutil
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
[docs]
class RootTransformation:
"""
**About this algorithm**
:**Description**:
RootTransformation applies a root-based transformation to all feature values
in the input dataset. This is useful for reducing the effect of large outliers
and compressing the range of high magnitude values. For example, a square root
(root=2) transformation is commonly used to stabilize variance in skewed datasets.
:**Parameters**:
- `dataframe` (*pd.DataFrame*): Input DataFrame with 'x', 'y' coordinates and feature columns.
- `root` (*int*, optional): Degree of the root transformation. Defaults to 2 (square root).
:**Attributes**:
- **df** (*pd.DataFrame*): Original input DataFrame with renamed first two columns as 'x' and 'y'.
- **normalizedDF** (*pd.DataFrame*): DataFrame containing root-transformed features.
- **startTime, endTime** (*float*): Execution timestamps.
- **memoryUSS, memoryRSS** (*float*): Memory usage statistics in KB.
**Execution methods**
**Calling from a Python program**
.. code-block:: python
import pandas as pd
from geoanalytics.normalization import RootTransformation
df = pd.read_csv("input.csv")
transformer = RootTransformation(df, root=3)
normalized_df = transformer.run()
transformer.getRuntime()
transformer.getMemoryUSS()
transformer.getMemoryRSS()
transformer.save("RootTransformation.csv")
**Credits**
Developed by Raashika and M. Charan Teja, supervised by Professor Rage Uday Kiran.
"""
def __init__(self, dataframe, root = 2):
"""
Initializes the RootTransformation object with a copy of the dataframe.
"""
self.df = dataframe.copy()
self.root = root
self.df.columns = ['x', 'y'] + list(self.df.columns[2:])
self.normalizedDF = None
self.startTime = None
self.endTime = None
self.memoryUSS = None
self.memoryRSS = None
[docs]
def getRuntime(self):
"""
Prints the total runtime of the clustering algorithm.
"""
print("Total Execution time of proposed Algorithm:", self.endTime - self.startTime, "seconds")
[docs]
def getMemoryUSS(self):
"""
Prints the memory usage (USS) of the process in kilobytes.
"""
print("Memory (USS) of proposed Algorithm in KB:", self.memoryUSS)
[docs]
def getMemoryRSS(self):
"""
Prints the memory usage (RSS) of the process in kilobytes.
"""
print("Memory (RSS) of proposed Algorithm in KB:", self.memoryRSS)
[docs]
def run(self):
"""
Executes the root transformation on the dataset.
Returns:
pd.DataFrame: DataFrame with 'x', 'y', and root-transformed features.
"""
self.startTime = time.time()
xy = self.df[['x', 'y']].reset_index(drop=True)
data = self.df.drop(['x', 'y'], axis=1).reset_index(drop=True)
normalizedData = data ** (1 / self.root)
self.normalizedDF = pd.concat([xy, normalizedData], axis=1)
self.endTime = time.time()
process = psutil.Process()
self.memoryUSS = process.memory_full_info().uss / 1024
self.memoryRSS = process.memory_full_info().rss / 1024
return self.normalizedDF
[docs]
def save(self, outputFile='RootTransformation.csv'):
"""
Saves the Normalized DataFrame to a CSV file.
"""
if self.normalizedDF is not None:
try:
self.normalizedDF.to_csv(outputFile, index=False)
print(f"Normalized data saved to: {outputFile}")
except Exception as e:
print(f"Failed to save labels: {e}")
else:
print("No Normalized data to save. Execute run() method first")