Source code for kinetics_kalculator.utils

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import linregress


[docs] def convert_to_concentration_using_linear_standard_curve( df: pd.DataFrame, slope: float, y_intercept: float ) -> None: """Converts generic values to concentrations. Convert the 'value' column in the DataFrame to concentration units using the provided slope and y-intercept of the standard curve. NOTE: Only support linear standard curves of the form y = mx + c. Args: df (pd.DataFrame): A DataFrame containing a 'value' column. slope (float): The slope of the standard curve. y_intercept (float): The y-intercept of the standard curve. Returns: DataFrame: The DataFrame with the 'value' column converted to concentration units. Raises: ValueError: If the DataFrame does not contain the required 'value' column. Example: >>> df = pd.DataFrame({"value": [1, 2, 3, 4, 5]}) # E.g., units of absorbance >>> convert_to_concentration_using_standard_curves(df, 2, 1) >>> df value 0 0.5 1 1.0 2 1.5 3 2.0 4 2.5 """ # Ensure the DataFrame contains the required 'value' column if "value" not in df.columns: raise ValueError("DataFrame must contain a 'value' column.") # Apply the conversion using the standard curve parameters df["value"] = df["value"].apply(lambda x: (x - y_intercept) / slope) return df
[docs] def filter_by_time_range( df: pd.DataFrame, lower_bound: float, upper_bound: float ) -> pd.DataFrame: """Filter the DataFrame to only include rows where the 'time' column is within the specified range. Args: df (pd.DataFrame): A DataFrame containing a 'time' column. lower_bound (float): The lower bound of the time range. upper_bound (float): The upper bound of the time range. Returns: pd.DataFrame: A new DataFrame containing only the rows within the specified time range. Raises: ValueError: If the DataFrame does not contain the required 'time' column. """ # Ensure the DataFrame contains the required 'time' column if "time" not in df.columns: raise ValueError("DataFrame must contain a 'time' column.") # Apply the filter to include only rows within the specified time range filtered_df = df[(df["time"] >= lower_bound) & (df["time"] <= upper_bound)] # Assert that the filtered DataFrame is not empty assert not filtered_df.empty, "No data within the specified time range!" # Assert that the filtered dataframes has no time values outside the specified range assert ( not filtered_df["time"].gt(upper_bound).any() ), "Time values outside the specified range." assert ( not filtered_df["time"].lt(lower_bound).any() ), "Time values outside the specified range." return filtered_df
[docs] def adjust_rates_for_background( df: pd.DataFrame, negative_control: str, epsilon: float, rate_column: str = "rate", sample_type_column: str = "sample_type", ) -> None: """Adjusts the rates in the DataFrame to account for background rates using a specified negative control. Args: df (pd.DataFrame): A DataFrame containing a sample type column and a column for rates. negative_control (str): The value in the sample type column to identify negative control samples. epsilon (float): A small positive value to replace any negative rates after background adjustment. rate_column (str): The name of the column containing the rates to be adjusted. Default is 'rate'. sample_type_column (str): The name of the column containing the sample type information. Default is 'sample_type'. Returns: None: The DataFrame is modified in-place. Raises: ValueError: If the DataFrame does not contain the required columns. """ # Ensure the DataFrame contains the required columns if sample_type_column not in df.columns or rate_column not in df.columns: raise ValueError( "DataFrame must contain both the specified rate column and sample type column in order to adjust for background activity." ) # Calculate the mean background rate from negative control samples background_rate_mean = df[df[sample_type_column] == negative_control][ rate_column ].mean() # Subtract the background rate from all rates df[f"{rate_column}_minus_background"] = df[rate_column] - background_rate_mean # Clip any negative rates to epsilon df.loc[ df[f"{rate_column}_minus_background"] < 0, f"{rate_column}_minus_background" ] = epsilon return df
[docs] def fit_line(group: pd.DataFrame, x_column: str, y_column: str) -> pd.Series: """Fits a linear model to the data in the specified group using the given x and y columns. Args: group (pd.DataFrame): A DataFrame group containing the data to fit. x_column (str): The name of the column to use as the independent variable. y_column (str): The name of the column to use as the dependent variable. Returns: pd.Series: A Series containing the slope (rate), intercept, and other statistics from the linear regression. - "rate": The slope of the fitted line. - "intercept": The y-intercept of the fitted line. - "r_value": The correlation coefficient. - "p_value": The two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero. - "std_err": The standard error of the estimated gradient. """ # Perform linear regression using scipy's linregress slope, intercept, r_value, p_value, std_err = linregress( group[x_column], group[y_column] ) # Return the results as a pandas Series return pd.Series( { "rate": slope, "intercept": intercept, "r_value": r_value, "p_value": p_value, "std_err": std_err, } )
[docs] def add_rate_column( df: pd.DataFrame, x_column: str, y_column: str, group_by_columns: list[str], ) -> pd.DataFrame: """Adds specified columns to the DataFrame by fitting a linear model to the specified x and y columns for each group. Args: df (pd.DataFrame): A DataFrame containing the data to fit. x_column (str): The name of the column to use as the independent variable. y_column (str): The name of the column to use as the dependent variable. group_by_columns (list): A list of columns to group the data by before fitting the model. E.g., "well", or "replicate". Returns: pd.DataFrame: A new DataFrame with specified columns added, representing the results of the linear fit for each group. The new columns include: - "rate": The slope of the fitted line. - "intercept": The y-intercept of the fitted line. - "r_value": The correlation coefficient. - "p_value": The two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero. - "std_err": The standard error of the estimated gradient. """ # Ensure we are not adding columns that already exist in the DataFrame columns_to_add = ["rate", "intercept", "r_value", "p_value", "std_err"] assert all( column not in df.columns for column in columns_to_add ), "Columns to add already exist in DataFrame." # Apply the fit_line function to each group and reset index df_fits = ( df.groupby(group_by_columns) .apply(fit_line, x_column=x_column, y_column=y_column) .reset_index() ) # Merge the results back into the original DataFrame df = df.merge(df_fits, on=group_by_columns, how="left") return df
[docs] def calculate_michaelis_menten_constants( df: pd.DataFrame, substrate_concentration_column: str = "substrate_concentration" ) -> dict: """Calculate Michaelis-Menten constants Vmax and Km. Args: df (pd.DataFrame): A DataFrame containing columns for substrate concentration and initial rates. substrate_concentration_column (str): The name of the column containing the substrate concentrations. Default is 'substrate_concentration'. Returns: dict: A dictionary containing Vmax and Km. """ def michaelis_menten(S, Vmax, Km): return (Vmax * S) / (Km + S) assert ( substrate_concentration_column in df.columns ), f"Column '{substrate_concentration_column}' not found in the DataFrame." assert ( "rate_minus_background" in df.columns ), "Column 'rate_minus_background' not found in the DataFrame. Please calculate and adjust rates for background first." initial_rates = df["rate_minus_background"].values substrate_concentrations = df[substrate_concentration_column].values # Fit the Michaelis-Menten equation to the data popt, _ = curve_fit( michaelis_menten, substrate_concentrations, initial_rates, bounds=(0, np.inf), ) Vmax, Km = popt return {"Vmax": Vmax, "Km": Km}