Coverage for functions \ flipdare \ analysis \ fee \ future_fee_estimator.py: 96%
48 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-05-08 12:22 +1000
« prev ^ index » next coverage.py v7.13.0, created at 2026-05-08 12:22 +1000
1#!/usr/bin/env python
2# Copyright (c) 2026 Flipdare Pty Ltd. All rights reserved.
3#
4# This file is part of Flipdare's proprietary software and contains
5# confidential and copyrighted material. Unauthorised copying,
6# modification, distribution, or use of this file is strictly
7# prohibited without prior written permission from Flipdare Pty Ltd.
8#
9# This software includes third-party components licensed under MIT,
10# BSD, and Apache 2.0 licences. See THIRD_PARTY_NOTICES for details.
11#
13import statistics
14import numpy as np
15from scipy import stats
17from flipdare.generated.shared.model.user.app_fee_type import AppFeeType
18from flipdare.generated.shared.stripe.stripe_currency_code import StripeCurrencyCode
19from flipdare.payments.data.fee_calculator import FeeCalculator
21__all__ = ["FutureFeeEstimator"]
24class FutureFeeEstimator:
25 """
26 Used to remove outliers from pledges to determine the future pledge amount for a dare.
27 """
29 def __init__(self, values: list[FeeCalculator]) -> None:
30 self._values: list[FeeCalculator] = values
32 @classmethod
33 def from_raw(
34 cls,
35 values: list[int],
36 currency: StripeCurrencyCode,
37 fee_type: AppFeeType,
38 ) -> "FutureFeeEstimator":
39 fee_calculators = [
40 FeeCalculator(
41 amount=v, fee_type=fee_type, from_currency=currency, to_currency=currency
42 )
43 for v in values
44 ]
45 return cls(fee_calculators)
47 def _unit_values(self) -> list[float]:
48 values = [float(v.amount) for v in self._values]
49 if not values:
50 raise ValueError("Input list must contain at least one numeric value.")
51 return values
53 def highest(self) -> int:
54 self._unit_values()
55 # try mad first since it is more robust to outliers, then fallback to iqr if mad fails
56 try:
57 return self.highest_mad()
58 except ValueError:
59 return self.highest_iqr()
61 def highest_mad(self) -> int:
62 data = np.array(self._unit_values())
64 # scale='normal' makes it comparable to standard deviation for normal data
65 mad = stats.median_abs_deviation(data, scale="normal")
67 median = np.median(data)
68 lower_bound = median - 3 * mad
69 upper_bound = median + 3 * mad
70 filtered_data = data[(data >= lower_bound) & (data <= upper_bound)]
71 if len(filtered_data) == 0:
72 raise ValueError("All values are outliers or list is empty after filtering.")
74 return int(np.max(filtered_data))
76 def highest_iqr(self) -> int:
77 """
78 Returns the highest value in a list after removing statistical outliers
79 using the Inter-quartile Range (IQR) method.
80 Alternatives were :
81 1. Z-score based version (better for normally distributed data)
82 2. Median Absolute Deviation (MAD) based version (more robust to outliers)
84 1. IQR Method:
86 Sort the list to compute quartiles.
87 Calculate Q1 and Q3 (25th and 75th percentiles).
88 Compute IQR = Q3 - Q1.
89 Define bounds:
91 Lower bound = Q1 - 1.5 x IQR
92 Upper bound = Q3 + 1.5 x IQR
94 Filter out values outside bounds.
95 Return the maximum from the filtered list.
98 ✅ Advantages:
100 Works well for skewed data.
101 Removes extreme values before finding the max.
102 No external libraries needed.
104 """
105 values = self._unit_values()
107 # Sort values for quartile calculation
108 sorted_vals = sorted(values)
110 # Calculate Q1 (25th percentile) and Q3 (75th percentile)
111 q1 = statistics.quantiles(sorted_vals, n=4)[0]
112 q3 = statistics.quantiles(sorted_vals, n=4)[2]
114 # Calculate IQR
115 iqr = q3 - q1
117 # Define bounds for non-outliers
118 lower_bound = q1 - 1.5 * iqr
119 upper_bound = q3 + 1.5 * iqr
121 # Filter out outliers
122 filtered_vals = [v for v in sorted_vals if lower_bound <= v <= upper_bound]
124 if not filtered_vals:
125 raise ValueError("All values are outliers or list is empty after filtering.")
127 max_value = max(filtered_vals)
128 return int(max_value)