Skip to content

Profile

CharStats

Source code in python/scouter/profile/_profile.pyi
class CharStats:
    @property
    def min_length(self) -> int:
        """Minimum string length"""

    @property
    def max_length(self) -> int:
        """Maximum string length"""

    @property
    def median_length(self) -> int:
        """Median string length"""

    @property
    def mean_length(self) -> float:
        """Mean string length"""

max_length property

Maximum string length

mean_length property

Mean string length

median_length property

Median string length

min_length property

Minimum string length

DataProfile

Data profile of features

Source code in python/scouter/profile/_profile.pyi
class DataProfile:
    """Data profile of features"""

    @property
    def features(self) -> Dict[str, FeatureProfile]:
        """Returns dictionary of features and their data profiles"""

    def __str__(self) -> str:
        """Return string representation of the data profile"""

    def model_dump_json(self) -> str:
        """Return json representation of data profile"""

    @staticmethod
    def model_validate_json(json_string: str) -> "DataProfile":
        """Load Data profile from json

        Args:
            json_string:
                JSON string representation of the data profile
        """

    def save_to_json(self, path: Optional[Path] = None) -> Path:
        """Save data profile to json file

        Args:
            path:
                Optional path to save the data profile. If None, outputs to `data_profile.json`

        Returns:
            Path to the saved data profile

        """

features property

Returns dictionary of features and their data profiles

__str__()

Return string representation of the data profile

Source code in python/scouter/profile/_profile.pyi
def __str__(self) -> str:
    """Return string representation of the data profile"""

model_dump_json()

Return json representation of data profile

Source code in python/scouter/profile/_profile.pyi
def model_dump_json(self) -> str:
    """Return json representation of data profile"""

model_validate_json(json_string) staticmethod

Load Data profile from json

Parameters:

Name Type Description Default
json_string str

JSON string representation of the data profile

required
Source code in python/scouter/profile/_profile.pyi
@staticmethod
def model_validate_json(json_string: str) -> "DataProfile":
    """Load Data profile from json

    Args:
        json_string:
            JSON string representation of the data profile
    """

save_to_json(path=None)

Save data profile to json file

Parameters:

Name Type Description Default
path Optional[Path]

Optional path to save the data profile. If None, outputs to data_profile.json

None

Returns:

Type Description
Path

Path to the saved data profile

Source code in python/scouter/profile/_profile.pyi
def save_to_json(self, path: Optional[Path] = None) -> Path:
    """Save data profile to json file

    Args:
        path:
            Optional path to save the data profile. If None, outputs to `data_profile.json`

    Returns:
        Path to the saved data profile

    """

DataProfiler

Source code in python/scouter/profile/_profile.pyi
class DataProfiler:
    def __init__(self):
        """Instantiate DataProfiler class that is
        used to profile data"""

    def create_data_profile(
        self,
        data: Any,
        data_type: Optional[DataType] = None,
        bin_size: int = 20,
        compute_correlations: bool = False,
    ) -> DataProfile:
        """Create a data profile from data.

        Args:
            data:
                Data to create a data profile from. Data can be a numpy array,
                a polars dataframe or pandas dataframe.

                **Data is expected to not contain any missing values, NaNs or infinities**

                These types are incompatible with computing
                quantiles, histograms, and correlations. These values must be removed or imputed.

            data_type:
                Optional data type. Inferred from data if not provided.
            bin_size:
                Optional bin size for histograms. Defaults to 20 bins.
            compute_correlations:
                Whether to compute correlations or not.

        Returns:
            DataProfile
        """

__init__()

Instantiate DataProfiler class that is used to profile data

Source code in python/scouter/profile/_profile.pyi
def __init__(self):
    """Instantiate DataProfiler class that is
    used to profile data"""

create_data_profile(data, data_type=None, bin_size=20, compute_correlations=False)

Create a data profile from data.

Parameters:

Name Type Description Default
data Any

Data to create a data profile from. Data can be a numpy array, a polars dataframe or pandas dataframe.

Data is expected to not contain any missing values, NaNs or infinities

These types are incompatible with computing quantiles, histograms, and correlations. These values must be removed or imputed.

required
data_type Optional[DataType]

Optional data type. Inferred from data if not provided.

None
bin_size int

Optional bin size for histograms. Defaults to 20 bins.

20
compute_correlations bool

Whether to compute correlations or not.

False

Returns:

Type Description
DataProfile

DataProfile

Source code in python/scouter/profile/_profile.pyi
def create_data_profile(
    self,
    data: Any,
    data_type: Optional[DataType] = None,
    bin_size: int = 20,
    compute_correlations: bool = False,
) -> DataProfile:
    """Create a data profile from data.

    Args:
        data:
            Data to create a data profile from. Data can be a numpy array,
            a polars dataframe or pandas dataframe.

            **Data is expected to not contain any missing values, NaNs or infinities**

            These types are incompatible with computing
            quantiles, histograms, and correlations. These values must be removed or imputed.

        data_type:
            Optional data type. Inferred from data if not provided.
        bin_size:
            Optional bin size for histograms. Defaults to 20 bins.
        compute_correlations:
            Whether to compute correlations or not.

    Returns:
        DataProfile
    """

Distinct

Source code in python/scouter/profile/_profile.pyi
class Distinct:
    @property
    def count(self) -> int:
        """total unique value counts"""

    @property
    def percent(self) -> float:
        """percent value uniqueness"""

count property

total unique value counts

percent property

percent value uniqueness

FeatureProfile

Source code in python/scouter/profile/_profile.pyi
class FeatureProfile:
    @property
    def id(self) -> str:
        """Return the id."""

    @property
    def numeric_stats(self) -> Optional[NumericStats]:
        """Return the numeric stats."""

    @property
    def string_stats(self) -> Optional[StringStats]:
        """Return the string stats."""

    @property
    def timestamp(self) -> str:
        """Return the timestamp."""

    @property
    def correlations(self) -> Optional[Dict[str, float]]:
        """Feature correlation values"""

    def __str__(self) -> str:
        """Return the string representation of the feature profile."""

correlations property

Feature correlation values

id property

Return the id.

numeric_stats property

Return the numeric stats.

string_stats property

Return the string stats.

timestamp property

Return the timestamp.

__str__()

Return the string representation of the feature profile.

Source code in python/scouter/profile/_profile.pyi
def __str__(self) -> str:
    """Return the string representation of the feature profile."""

Histogram

Source code in python/scouter/profile/_profile.pyi
class Histogram:
    @property
    def bins(self) -> List[float]:
        """Bin values"""

    @property
    def bin_counts(self) -> List[int]:
        """Bin counts"""

bin_counts property

Bin counts

bins property

Bin values

NumericStats

Source code in python/scouter/profile/_profile.pyi
class NumericStats:
    @property
    def mean(self) -> float:
        """Return the mean."""

    @property
    def stddev(self) -> float:
        """Return the stddev."""

    @property
    def min(self) -> float:
        """Return the min."""

    @property
    def max(self) -> float:
        """Return the max."""

    @property
    def distinct(self) -> Distinct:
        """Distinct value counts"""

    @property
    def quantiles(self) -> Quantiles:
        """Value quantiles"""

    @property
    def histogram(self) -> Histogram:
        """Value histograms"""

distinct property

Distinct value counts

histogram property

Value histograms

max property

Return the max.

mean property

Return the mean.

min property

Return the min.

quantiles property

Value quantiles

stddev property

Return the stddev.

Quantiles

Source code in python/scouter/profile/_profile.pyi
class Quantiles:
    @property
    def q25(self) -> float:
        """25th quantile"""

    @property
    def q50(self) -> float:
        """50th quantile"""

    @property
    def q75(self) -> float:
        """75th quantile"""

    @property
    def q99(self) -> float:
        """99th quantile"""

q25 property

25th quantile

q50 property

50th quantile

q75 property

75th quantile

q99 property

99th quantile

StringStats

Source code in python/scouter/profile/_profile.pyi
class StringStats:
    @property
    def distinct(self) -> Distinct:
        """Distinct value counts"""

    @property
    def char_stats(self) -> CharStats:
        """Character statistics"""

    @property
    def word_stats(self) -> WordStats:
        """word statistics"""

char_stats property

Character statistics

distinct property

Distinct value counts

word_stats property

word statistics

WordStats

Source code in python/scouter/profile/_profile.pyi
class WordStats:
    @property
    def words(self) -> Dict[str, Distinct]:
        """Distinct word counts"""

words property

Distinct word counts