{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"base = pd.read_parquet('data/pl_threshold_jan_11022020.parquet')\n",
"part1 = pd.read_parquet('data/pl_threshold_feats_11022020_part1.parquet')\n",
"# REMOVE old util_non_pl\n",
"part1 = part1.drop(columns=['util_non_pl'])\n",
"fixed_util = pd.read_parquet('data/fixed_pl_threshold_util_non_pl.parquet')\n",
"# ATTACH new util_non_pl\n",
"part1 = part1.merge(fixed_util, how='left', on=['user_id', 'trx_id'])\n",
"\n",
"part2 = pd.read_parquet('data/pl_threshold_feats_11022020_part2.parquet')\n",
"part3 = pd.read_parquet('data/pl_threshold_feats_18022020_part3.parquet')\n",
"pf_df = pd.read_parquet('data/pl_threshold_pf_feats_11022020.parquet')\n",
"pf_df = pf_df[['user_id', 'trx_id', 'pf_delin_max_dpd_12mo']]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(79853, 10)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(79853, 10)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(79853, 8)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(79853, 3)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"(79853, 3)"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"display(base.shape)\n",
"display(part1.shape)\n",
"display(part2.shape)\n",
"display(part3.shape)\n",
"display(pf_df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"base_df = base.merge(part2, how='left', on=['user_id', 'trx_id'])\\\n",
" .merge(part3, how='left', on=['user_id', 'trx_id'])\\\n",
" .merge(pf_df, how='left', on=['user_id', 'trx_id'])\\\n",
" .merge(part1, how='left', on=['user_id', 'trx_id'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(79853, 26)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Feature Transformations"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"base_df['payment_type'] = np.where(base_df['payment_type'] == '3_months', 0, 1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# NaN values in time_approve_to_pl_hour are caused by missing approval time in DB\n",
"# fill with 0. (There are only 4 cases)\n",
"base_df['time_appr_to_pl_hour'] = base_df['time_appr_to_pl_hour'].fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Negative values caused by error in loan table. User's previous pl\n",
"# settlement date is after the current pl transaction date.\n",
"# Replace with median values. (There are only 2 cases)\n",
"neg_index = base_df[base_df['time_from_last_sett_pl_hour'] < 0].index\n",
"med_val = 17.0\n",
"base_df.loc[neg_index, 'time_from_last_sett_pl_hour'] = med_val"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"base_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute PLV3 Proba"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"mod_lgb = pickle.load(open('lgbm_pl_score_v3.p', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1079"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mod_lgb.best_iteration_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predict Proba"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"features = ['ap_co_plo',\n",
" 'current_dpd',\n",
" 'date_of_month',\n",
" 'delin_max_dpd_90',\n",
" 'nondgtl_nonpl_trx_co_180',\n",
" 'oth_last_rep_days',\n",
" 'oth_last_rep_dpd',\n",
" 'payment_type',\n",
" 'pf_delin_max_dpd_12mo',\n",
" 'pl_settle_to_due_last_180',\n",
" 'pl_trx_suc_co_90',\n",
" 'time_appr_to_pl_hour',\n",
" 'time_from_last_sett_pl_hour',\n",
" 'trx_denied_co_90',\n",
" 'trx_sett_sum_30',\n",
" 'util_non_pl',\n",
" 'util_pl'\n",
" ]\n",
"features = sorted(features)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"base_df['proba_v3'] = mod_lgb.predict_proba(base_df[features], num_iteration=mod_lgb.best_iteration_)[:,1]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"base_df['proba_v3'] = base_df['proba_v3'].apply(lambda x: 0.0045+0.622*x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Threshold at 0.114"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"base_df"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fcb39617e50>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD6CAYAAABDPiuvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAXc0lEQVR4nO3df7DldX3f8ecrbNH4E5R6a3d3upispghmClclzTRzFYVFMiwzxZllSFksnZ0a/NGGTlhqO8yoTDHJlEqj2I1shYzDQqgtW0HJipw6mZEfoggCwb0iA1eIaBaJV6POmnf/OJ+7OS5n95577q+99zwfM2f2fN/fz/d7Pu+5cF/3++Ock6pCkjTafmm5JyBJWn6GgSTJMJAkGQaSJAwDSRKGgSSJAcIgyc4kzyT5xkH19yZ5NMlDSf6gp35Zksm27oye+qZWm0yyvad+fJK7k+xNcmOSoxeqOUnSYDLb+wyS/BYwDVxfVSe22luADwBnVdVPk7yqqp5JcgJwA/Am4B8DXwBe23b1TeDtwBRwL3BeVT2c5CbgM1W1K8kngK9X1TWzTfy4446rDRs2DNzoj370I1784hcPPH41sOfRMIo9w2j2vRA933fffd+vqn/4vBVVNesD2AB8o2f5JuBtfcZdBlzWs3w78BvtcfvB44AA3wfWtPovjDvc45RTTqm5uPPOO+c0fjWw59Ewij1XjWbfC9Ez8JXq8zt12GsGrwX+RTu98/+SvLHV1wJP9oybarVD1V8J/KCq9h9UlyQtoTXz2O5Y4FTgjcBNSV5D9y/9gxX9r03UYcb3lWQbsA1gbGyMTqcz8ISnp6fnNH41sOfRMIo9w2j2vZg9DxsGU3TP8xdwT5K/A45r9fU949YBT7Xn/erfB45JsqYdHfSOf56q2gHsABgfH6+JiYmBJ9zpdJjL+NXAnkfDKPYMo9n3YvY87Gmi/wO8FSDJa4Gj6f5i3w1sSfKCJMcDG4F76F4w3tjuHDoa2ALsbmFyJ3Bu2+9W4JZhm5EkDWfWI4MkNwATwHFJpoDLgZ3Azna76c+Are0X+0Pt7qCHgf3AxVX187af99C9oHwUsLOqHmovcSmwK8mHga8B1y5gf5KkAcwaBlV13iFW/c4hxl8BXNGnfhtwW5/6Y3RvRZUkLRPfgSxJMgwkSYaBJInhby1d0TZsv/XA88evPGsZZyJJRwaPDCRJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQGCIMkO5M8077v+OB1/yFJJTmuLSfJ1UkmkzyQ5OSesVuT7G2PrT31U5I82La5OkkWqjlJ0mAGOTL4FLDp4GKS9cDbgSd6ymcCG9tjG3BNG/sK4HLgzXS/7/jyJMe2ba5pY2e2e95rSZIW16xhUFVfAvb1WXUV8PtA9dQ2A9dX113AMUleDZwB7KmqfVX1LLAH2NTWvayqvlxVBVwPnDO/liRJczXUN50lORv4TlV9/aCzOmuBJ3uWp1rtcPWpPvVDve42ukcRjI2N0el0Bp7z9PT0gfGXnLT/QH0u+1hpenseFfY8Okax78Xsec5hkORFwAeA0/ut7lOrIep9VdUOYAfA+Ph4TUxMzDbdAzqdDjPjL+z92svzB9/HStPb86iw59Exin0vZs/D3E30K8DxwNeTPA6sA76a5B/R/ct+fc/YdcBTs9TX9alLkpbQnMOgqh6sqldV1Yaq2kD3F/rJVfVXwG7ggnZX0anAc1X1NHA7cHqSY9uF49OB29u6HyY5td1FdAFwywL1Jkka0CC3lt4AfBl4XZKpJBcdZvhtwGPAJPAnwO8CVNU+4EPAve3xwVYDeDfwybbNt4DPDdeKJGlYs14zqKrzZlm/oed5ARcfYtxOYGef+leAE2ebhyRp8fgOZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIY7DuQdyZ5Jsk3emp/mOQvkzyQ5H8nOaZn3WVJJpM8muSMnvqmVptMsr2nfnySu5PsTXJjkqMXskFJ0uwGOTL4FLDpoNoe4MSqegPwTeAygCQnAFuA17dtPp7kqCRHAR8DzgROAM5rYwE+AlxVVRuBZ4GL5tWRJGnOZg2DqvoSsO+g2p9X1f62eBewrj3fDOyqqp9W1beBSeBN7TFZVY9V1c+AXcDmJAHeCtzctr8OOGeePUmS5mjNAuzjXwM3tudr6YbDjKlWA3jyoPqbgVcCP+gJlt7xz5NkG7ANYGxsjE6nM/Akp6enD4y/5KT9B+r//dO3HHh+0tqXD7y/laC351Fhz6NjFPtezJ7nFQZJPgDsBz49U+ozrOh/BFKHGd9XVe0AdgCMj4/XxMTEwHPtdDrMjL9w+619xzx+/uD7Wwl6ex4V9jw6RrHvxex56DBIshX4beC0qpr5BT4FrO8Ztg54qj3vV/8+cEySNe3ooHe8JGmJDHVraZJNwKXA2VX1455Vu4EtSV6Q5HhgI3APcC+wsd05dDTdi8y7W4jcCZzbtt8K3IIkaUkNcmvpDcCXgdclmUpyEfDHwEuBPUnuT/IJgKp6CLgJeBj4PHBxVf28/dX/HuB24BHgpjYWuqHye0km6V5DuHZBO5QkzWrW00RVdV6f8iF/YVfVFcAVfeq3Abf1qT9G924jSdIy8R3IkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJDHYdyDvTPJMkm/01F6RZE+Sve3fY1s9Sa5OMpnkgSQn92yztY3fm2RrT/2UJA+2ba5OkoVuUpJ0eIMcGXwK2HRQbTtwR1VtBO5oywBnAhvbYxtwDXTDA7gceDPd7zu+fCZA2phtPdsd/FqSpEU2axhU1ZeAfQeVNwPXtefXAef01K+vrruAY5K8GjgD2FNV+6rqWWAPsKmte1lVfbmqCri+Z1+SpCWyZsjtxqrqaYCqejrJq1p9LfBkz7ipVjtcfapPva8k2+geRTA2Nkan0xl4wtPT0wfGX3LS/r5j5rK/laC351Fhz6NjFPtezJ6HDYND6Xe+v4ao91VVO4AdAOPj4zUxMTHwxDqdDjPjL9x+a98xj58/+P5Wgt6eR4U9j45R7Hsxex72bqLvtlM8tH+fafUpYH3PuHXAU7PU1/WpS5KW0LBhsBuYuSNoK3BLT/2CdlfRqcBz7XTS7cDpSY5tF45PB25v636Y5NR2F9EFPfuSJC2RWU8TJbkBmACOSzJF966gK4GbklwEPAG8sw2/DXgHMAn8GHgXQFXtS/Ih4N427oNVNXNR+t1071j6ZeBz7SFJWkKzhkFVnXeIVaf1GVvAxYfYz05gZ5/6V4ATZ5uHJGnx+A5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkphnGCT590keSvKNJDckeWGS45PcnWRvkhuTHN3GvqAtT7b1G3r2c1mrP5rkjPm1JEmaq6HDIMla4H3AeFWdCBwFbAE+AlxVVRuBZ4GL2iYXAc9W1a8CV7VxJDmhbfd6YBPw8SRHDTsvSdLczfc00Rrgl5OsAV4EPA28Fbi5rb8OOKc939yWaetPS5JW31VVP62qbwOTwJvmOS9J0hysGXbDqvpOkj8CngD+Fvhz4D7gB1W1vw2bAta252uBJ9u2+5M8B7yy1e/q2XXvNr8gyTZgG8DY2BidTmfg+U5PTx8Yf8lJ+/uOmcv+VoLenkeFPY+OUex7MXseOgySHEv3r/rjgR8Afwac2WdozWxyiHWHqj+/WLUD2AEwPj5eExMTA8+30+kwM/7C7bf2HfP4+YPvbyXo7XlU2PPoGMW+F7PnocMAeBvw7ar6HkCSzwD/HDgmyZp2dLAOeKqNnwLWA1PttNLLgX099Rm92yypDT0h8fiVZy3HFCRpWcznmsETwKlJXtTO/Z8GPAzcCZzbxmwFbmnPd7dl2vovVlW1+pZ2t9HxwEbgnnnMS5I0R/O5ZnB3kpuBrwL7ga/RPYVzK7AryYdb7dq2ybXAnyaZpHtEsKXt56EkN9ENkv3AxVX182HnJUmau/mcJqKqLgcuP6j8GH3uBqqqnwDvPMR+rgCumM9cJEnD8x3IkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJDHPMEhyTJKbk/xlkkeS/EaSVyTZk2Rv+/fYNjZJrk4ymeSBJCf37GdrG783ydb5NiVJmpv5Hhl8FPh8Vf0a8OvAI8B24I6q2gjc0ZYBzgQ2tsc24BqAJK+g+z3Kb6b73cmXzwSIJGlprBl2wyQvA34LuBCgqn4G/CzJZmCiDbsO6ACXApuB66uqgLvaUcWr29g9VbWv7XcPsAm4Ydi5LYQN22898PzxK89axplI0uIbOgyA1wDfA/5nkl8H7gPeD4xV1dMAVfV0kle18WuBJ3u2n2q1Q9WfJ8k2ukcVjI2N0el0Bp7s9PT0gfGXnLR/4O2AOb3OkaS351Fhz6NjFPtezJ7nEwZrgJOB91bV3Uk+yt+fEuonfWp1mPrzi1U7gB0A4+PjNTExMfBkO50OM+Mv7PmrfxCPnz/46xxJenseFfY8Okax78XseT7XDKaAqaq6uy3fTDccvttO/9D+faZn/Pqe7dcBTx2mLklaIkOHQVX9FfBkkte10mnAw8BuYOaOoK3ALe35buCCdlfRqcBz7XTS7cDpSY5tF45PbzVJ0hKZz2kigPcCn05yNPAY8C66AXNTkouAJ4B3trG3Ae8AJoEft7FU1b4kHwLubeM+OHMxWZK0NOYVBlV1PzDeZ9VpfcYWcPEh9rMT2DmfuUiShuc7kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkScz/I6xHgt+HLGm188hAkmQYSJIMA0kShoEkiQUIgyRHJflaks+25eOT3J1kb5Ib2/cjk+QFbXmyrd/Qs4/LWv3RJGfMd06SpLlZiCOD9wOP9Cx/BLiqqjYCzwIXtfpFwLNV9avAVW0cSU4AtgCvBzYBH09y1ALMS5I0oHmFQZJ1wFnAJ9tygLcCN7ch1wHntOeb2zJt/Wlt/GZgV1X9tKq+DUwCb5rPvCRJczPf9xn8N+D3gZe25VcCP6iq/W15Cljbnq8FngSoqv1Jnmvj1wJ39eyzd5tfkGQbsA1gbGyMTqcz8ESnp6cPjL/kpP2HH3wYc3nN5dbb86iw59Exin0vZs9Dh0GS3waeqar7kkzMlPsMrVnWHW6bXyxW7QB2AIyPj9fExES/YX11Oh1mxl/Y8yayuXr8/MFfc7n19jwq7Hl0jGLfi9nzfI4MfhM4O8k7gBcCL6N7pHBMkjXt6GAd8FQbPwWsB6aSrAFeDuzrqc/o3UaStASGvmZQVZdV1bqq2kD3AvAXq+p84E7g3DZsK3BLe767LdPWf7GqqtW3tLuNjgc2AvcMO6/FtmH7rQcekrRaLMZnE10K7EryYeBrwLWtfi3wp0km6R4RbAGoqoeS3AQ8DOwHLq6qny/CvCRJh7AgYVBVHaDTnj9Gn7uBquonwDsPsf0VwBULMRdJ0tz5DmRJkmEgSTIMJEkYBpIkDANJEoaBJAm/A3le/G5kSauFRwaSJMNAkmQYSJIwDCRJeAF5wXgxWdJK5pGBJMkwkCQZBpIkDANJEoaBJAnvJloU3lkkaaUZ+sggyfokdyZ5JMlDSd7f6q9IsifJ3vbvsa2eJFcnmUzyQJKTe/a1tY3fm2Tr/NuSJM3FfE4T7Qcuqap/CpwKXJzkBGA7cEdVbQTuaMsAZwIb22MbcA10wwO4HHgz3e9OvnwmQCRJS2PoMKiqp6vqq+35D4FHgLXAZuC6Nuw64Jz2fDNwfXXdBRyT5NXAGcCeqtpXVc8Ce4BNw85LkjR3qar57yTZAHwJOBF4oqqO6Vn3bFUdm+SzwJVV9RetfgdwKTABvLCqPtzq/xn426r6oz6vs43uUQVjY2On7Nq1a+A5Tk9P85KXvASAB7/z3NybHNJJa1++ZK91sN6eR4U9j45R7Hshen7LW95yX1WNH1yf9wXkJC8B/hfw76rqb5IccmifWh2m/vxi1Q5gB8D4+HhNTEwMPM9Op8PM+At7LvAuugd/dODpUl9M7u15VNjz6BjFvhez53ndWprkH9ANgk9X1Wda+bvt9A/t32dafQpY37P5OuCpw9QlSUtkPncTBbgWeKSq/mvPqt3AzB1BW4FbeuoXtLuKTgWeq6qngduB05Mc2y4cn95qq86G7bceeEjSkWQ+p4l+E/hXwINJ7m+1/whcCdyU5CLgCeCdbd1twDuASeDHwLsAqmpfkg8B97ZxH6yqffOYlyRpjoYOg3Yh+FAXCE7rM76Aiw+xr53AzmHnIkmaHz+OQpLkx1EsFz+yQtKRxDA4AhgMkpabp4kkSR4ZHGkOvu3UIwVJS8EjA0mSYSBJ8jTREe9Q71b29JGkheSRgSTJI4OVyttRJS0kw2AVMBgkzZdhsMr0BsOnNr14GWciaSUxDFaxB7/zXN8v8vHoQdLBDIMR5GklSQczDEact65KAsNAh2BISKPFMNCcDPKVnQaGtPIcMWGQZBPwUeAo4JNVdeUyT0lD8qhCWnmOiDBIchTwMeDtwBRwb5LdVfXw8s5MC2mQo4pBGSzSwjoiwgB4EzBZVY8BJNkFbAYMA/V1qGC55KT9fW+nXQwGklaTIyUM1gJP9ixPAW9eprlIA1nII535WMoAnI/e8PTa05HnSAmD9KnV8wYl24BtbXE6yaNzeI3jgO8PMbcV6332PBJWSs/5yIKPXxF9L7CF6Pmf9CseKWEwBazvWV4HPHXwoKraAewY5gWSfKWqxoeb3spkz6NhFHuG0ex7MXs+Uj7C+l5gY5LjkxwNbAF2L/OcJGlkHBFHBlW1P8l7gNvp3lq6s6oeWuZpSdLIOCLCAKCqbgNuW8SXGOr00gpnz6NhFHuG0ex70XpO1fOu00qSRsyRcs1AkrSMVlUYJNmU5NEkk0m291n/giQ3tvV3J9mw9LNceAP0/VtJvppkf5Jzl2OOC22Ann8vycNJHkhyR5K+t9OtJAP0/G+TPJjk/iR/keSE5ZjnQput755x5yapJCv+DqMBftYXJvle+1nfn+TfzPtFq2pVPOheeP4W8BrgaODrwAkHjfld4BPt+RbgxuWe9xL1vQF4A3A9cO5yz3mJen4L8KL2/N0r/Wc9YM8v63l+NvD55Z73UvTdxr0U+BJwFzC+3PNegp/1hcAfL+TrrqYjgwMfaVFVPwNmPtKi12bguvb8ZuC0JP3e8LaSzNp3VT1eVQ8Af7ccE1wEg/R8Z1X9uC3eRfe9KyvZID3/Tc/ii+nzxs0VaJD/rwE+BPwB8JOlnNwiGbTnBbWawqDfR1qsPdSYqtoPPAe8cklmt3gG6Xu1mWvPFwGfW9QZLb6Bek5ycZJv0f3F+L4lmttimrXvJP8MWF9Vn13KiS2iQf/7/pftNOjNSdb3WT8nqykMBvlIi4E+9mKFWY09zWbgnpP8DjAO/OGizmjxDdRzVX2sqn4FuBT4T4s+q8V32L6T/BJwFXDJks1o8Q3ys/6/wIaqegPwBf7+jMfQVlMYDPKRFgfGJFkDvBzYtySzWzwDfZTHKjNQz0neBnwAOLuqfrpEc1ssc/057wLOWdQZLY3Z+n4pcCLQSfI4cCqwe4VfRJ71Z11Vf93z3/SfAKfM90VXUxgM8pEWu4Gt7fm5wBerXY1ZwUbxozxm7bmdOvgfdIPgmWWY40IbpOeNPYtnAXuXcH6L5bB9V9VzVXVcVW2oqg10rw+dXVVfWZ7pLohBftav7lk8G3hk3q+63FfOF/gq/DuAb9K9Ev+BVvsg3f84AF4I/BkwCdwDvGa557xEfb+R7l8bPwL+Gnhouee8BD1/AfgucH977F7uOS9Bzx8FHmr93gm8frnnvBR9HzS2wwq/m2jAn/V/aT/rr7ef9a/N9zV9B7IkaVWdJpIkDckwkCQZBpIkw0CShGEgScIwkCRhGEiSMAwkScD/B3+GmQaG6dKLAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"base_df['proba_v3'].hist(bins=100)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"base_df['reject_by_proba'] = np.where(base_df['proba_v3'] > 0.114, 1, 0)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.09959550674364144"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_df[base_df['proba_v3'] > 0.114].shape[0] / base_df.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.09959550674364144"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_df['reject_by_proba'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"32261500000.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_df[base_df['reject_by_proba'] == 1]['amount'].sum()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Old Rejected Amount: 40200000000.0\n",
"New Rejected Amount: 32176500000.0\n",
"\n",
"Old Rejected Amount Perc: 0.1356510999718237\n",
"New Rejected Amount Perc: 0.10857655766774592\n",
"\n",
"Old percent of trx rejected: 0.11055314139731758\n",
"New percent of trx rejected: 0.09935756953401875\n",
"\n"
]
}
],
"source": [
"thresh_old = 0.25\n",
"thresh_new = 0.11425\n",
"\n",
"# compare amount\n",
"print('Old Rejected Amount:',base_df[base_df['score'] > thresh_old]['amount'].sum())\n",
"print('New Rejected Amount:',base_df[base_df['proba_v3'] > thresh_new]['amount'].sum())\n",
"print()\n",
"\n",
"# # compare Amount ODR\n",
"# print('Old Amount ODR:',base_df[base_df['score'] <= thresh_old]['amount'].sum() / base_df['amount'].sum())\n",
"# print('New Amount ODR:',base_df[base_df['proba_v3'] <= thresh_new]['amount'].sum() / base_df['amount'].sum())\n",
"# print()\n",
"\n",
"# compare Rejected Amount ODR\n",
"print('Old Rejected Amount Perc:',base_df[base_df['score'] > thresh_old]['amount'].sum() / base_df['amount'].sum())\n",
"print('New Rejected Amount Perc:',base_df[base_df['proba_v3'] > thresh_new]['amount'].sum() / base_df['amount'].sum())\n",
"print()\n",
"\n",
"# compare percent of users rejected\n",
"print('Old percent of trx rejected:', base_df[base_df['score'] > thresh_old].shape[0] / base_df.shape[0])\n",
"print('New percent of trx rejected:', base_df[base_df['proba_v3'] > thresh_new].shape[0] / base_df.shape[0])\n",
"print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}