{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pickle\n",
"import lightgbm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"prod_feats = pd.read_csv('test_pl_v3_19022020.csv')\n",
"prod_feats = prod_feats.rename(columns={'trx_date':'transaction_date',\n",
" 'trx id':'trx_id'})\n",
"prod_feats['transaction_date'] = prod_feats['transaction_date'].astype('datetime64[ms]')\n",
"prod_feats = prod_feats[prod_feats['transaction_date'].dt.day == 19].reset_index(drop=True)\n",
"\n",
"local_feats = pd.read_parquet('data/unittest_feats_19022020.parquet')\n",
"local_feats = local_feats[local_feats['trx_id'].isin(prod_feats['trx_id'])].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15, 8)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prod_feats.shape"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"prod_feats"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15, 19)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"local_feats.shape"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"local_feats"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"features = ['ap_co_plo',\n",
" 'current_dpd',\n",
" 'date_of_month',\n",
" 'delin_max_dpd_90',\n",
" 'nondgtl_nonpl_trx_co_180',\n",
" 'oth_last_rep_days',\n",
" 'oth_last_rep_dpd',\n",
" 'payment_type',\n",
" 'pf_delin_max_dpd_12mo',\n",
" 'pl_settle_to_due_last_180',\n",
" 'pl_trx_suc_co_90',\n",
" 'time_appr_to_pl_hour',\n",
" 'time_from_last_sett_pl_hour',\n",
" 'trx_denied_co_90',\n",
" 'trx_sett_sum_30',\n",
" 'util_non_pl',\n",
" 'util_pl'\n",
" ]\n",
"features = sorted(features)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Transform Local Feats"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"local_feats['payment_type'] = np.where(local_feats['payment_type'] == '3_months', 0, 1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# NaN values in time_approve_to_pl_hour are caused by missing approval time in DB\n",
"# fill with median value (2348.0). (There are only 4 cases)\n",
"local_feats['time_appr_to_pl_hour'] = local_feats['time_appr_to_pl_hour'].fillna(2348.0)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Negative values caused by error in loan table. User's previous pl\n",
"# settlement date is after the current pl transaction date.\n",
"# Replace with median value (17.0). (There are only 2 cases)\n",
"neg_index = local_feats[local_feats['time_from_last_sett_pl_hour'] < 0].index\n",
"med_val = 17.0\n",
"local_feats.loc[neg_index, 'time_from_last_sett_pl_hour'] = med_val"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get Local Proba"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"mod_lgb = pickle.load(open('lgbm_pl_score_v3.p', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"local_feats['proba_v3'] = mod_lgb.predict_proba(local_feats[features], \n",
" num_iteration=mod_lgb.best_iteration_)[:,1]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"local_feats['proba_v3'] = local_feats['proba_v3'].apply(lambda x: 0.0046 + (0.6233 * x)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prod Feats"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"nan = np.nan\n",
"prod_feats['features'] = prod_feats.apply(lambda z: eval(z['features']), axis=1)\n",
"prod_feats = prod_feats.rename(columns={'pd':'proba_v3'})\n",
"raw_prod_feats = pd.DataFrame(prod_feats['features'].tolist())[features]\n",
"raw_prod_feats = prod_feats[['user_id', 'trx_id', 'proba_v3']].merge(raw_prod_feats, left_index=True, \n",
" right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"raw_prod_feats"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare Local and Prod Features"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"comp = local_feats.merge(raw_prod_feats, how='left', on=['user_id', 'trx_id'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"comp"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>ap_co_plo_x</th>\n",
" <th>ap_co_plo_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, ap_co_plo_x, ap_co_plo_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>current_dpd_x</th>\n",
" <th>current_dpd_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id current_dpd_x current_dpd_y diff\n",
"1 7484940 58449146 NaN NaN NaN\n",
"3 14941234 58448884 NaN NaN NaN\n",
"8 15072431 58466682 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>date_of_month_x</th>\n",
" <th>date_of_month_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, date_of_month_x, date_of_month_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>delin_max_dpd_90_x</th>\n",
" <th>delin_max_dpd_90_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id delin_max_dpd_90_x delin_max_dpd_90_y diff\n",
"1 7484940 58449146 NaN NaN NaN\n",
"3 14941234 58448884 NaN NaN NaN\n",
"8 15072431 58466682 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>nondgtl_nonpl_trx_co_180_x</th>\n",
" <th>nondgtl_nonpl_trx_co_180_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1059201</td>\n",
" <td>58469495</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>-1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>12079533</td>\n",
" <td>58469572</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id nondgtl_nonpl_trx_co_180_x \\\n",
"1 7484940 58449146 NaN \n",
"3 14941234 58448884 NaN \n",
"8 15072431 58466682 NaN \n",
"13 1059201 58469495 1.0 \n",
"14 12079533 58469572 NaN \n",
"\n",
" nondgtl_nonpl_trx_co_180_y diff \n",
"1 NaN NaN \n",
"3 NaN NaN \n",
"8 NaN NaN \n",
"13 2.0 -1.0 \n",
"14 NaN NaN "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>oth_last_rep_days_x</th>\n",
" <th>oth_last_rep_days_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id oth_last_rep_days_x oth_last_rep_days_y diff\n",
"1 7484940 58449146 NaN NaN NaN\n",
"3 14941234 58448884 NaN NaN NaN\n",
"7 14425982 58466492 NaN NaN NaN\n",
"8 15072431 58466682 NaN NaN NaN\n",
"9 14939260 58466796 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>oth_last_rep_dpd_x</th>\n",
" <th>oth_last_rep_dpd_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id oth_last_rep_dpd_x oth_last_rep_dpd_y diff\n",
"1 7484940 58449146 NaN NaN NaN\n",
"3 14941234 58448884 NaN NaN NaN\n",
"7 14425982 58466492 NaN NaN NaN\n",
"8 15072431 58466682 NaN NaN NaN\n",
"9 14939260 58466796 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>payment_type_x</th>\n",
" <th>payment_type_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, payment_type_x, payment_type_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>pf_delin_max_dpd_12mo_x</th>\n",
" <th>pf_delin_max_dpd_12mo_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>3781021</td>\n",
" <td>58469230</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id pf_delin_max_dpd_12mo_x pf_delin_max_dpd_12mo_y diff\n",
"8 15072431 58466682 NaN NaN NaN\n",
"10 3781021 58469230 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>pl_settle_to_due_last_180_x</th>\n",
" <th>pl_settle_to_due_last_180_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13834158</td>\n",
" <td>58449105</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>5305928</td>\n",
" <td>58466446</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>3781021</td>\n",
" <td>58469230</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id pl_settle_to_due_last_180_x \\\n",
"1 7484940 58449146 NaN \n",
"2 13834158 58449105 NaN \n",
"3 14941234 58448884 NaN \n",
"6 5305928 58466446 NaN \n",
"7 14425982 58466492 NaN \n",
"8 15072431 58466682 NaN \n",
"9 14939260 58466796 NaN \n",
"10 3781021 58469230 NaN \n",
"\n",
" pl_settle_to_due_last_180_y diff \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"6 NaN NaN \n",
"7 NaN NaN \n",
"8 NaN NaN \n",
"9 NaN NaN \n",
"10 NaN NaN "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>pl_trx_suc_co_90_x</th>\n",
" <th>pl_trx_suc_co_90_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, pl_trx_suc_co_90_x, pl_trx_suc_co_90_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>time_appr_to_pl_hour_x</th>\n",
" <th>time_appr_to_pl_hour_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, time_appr_to_pl_hour_x, time_appr_to_pl_hour_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>time_from_last_sett_pl_hour_x</th>\n",
" <th>time_from_last_sett_pl_hour_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>12596360</td>\n",
" <td>58449243</td>\n",
" <td>407.227500</td>\n",
" <td>407.2353</td>\n",
" <td>-0.007800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13834158</td>\n",
" <td>58449105</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12026071</td>\n",
" <td>58448764</td>\n",
" <td>1.781111</td>\n",
" <td>1.7861</td>\n",
" <td>-0.004989</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>11921022</td>\n",
" <td>58466151</td>\n",
" <td>0.077500</td>\n",
" <td>0.0831</td>\n",
" <td>-0.005600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>5305928</td>\n",
" <td>58466446</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>3781021</td>\n",
" <td>58469230</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>10210508</td>\n",
" <td>58469286</td>\n",
" <td>51.747222</td>\n",
" <td>51.7531</td>\n",
" <td>-0.005878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>10210508</td>\n",
" <td>58469358</td>\n",
" <td>51.757500</td>\n",
" <td>51.7622</td>\n",
" <td>-0.004700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1059201</td>\n",
" <td>58469495</td>\n",
" <td>762.848889</td>\n",
" <td>762.8547</td>\n",
" <td>-0.005811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>12079533</td>\n",
" <td>58469572</td>\n",
" <td>2.513611</td>\n",
" <td>2.5186</td>\n",
" <td>-0.004989</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id time_from_last_sett_pl_hour_x \\\n",
"0 12596360 58449243 407.227500 \n",
"1 7484940 58449146 NaN \n",
"2 13834158 58449105 NaN \n",
"3 14941234 58448884 NaN \n",
"4 12026071 58448764 1.781111 \n",
"5 11921022 58466151 0.077500 \n",
"6 5305928 58466446 NaN \n",
"7 14425982 58466492 NaN \n",
"8 15072431 58466682 NaN \n",
"9 14939260 58466796 NaN \n",
"10 3781021 58469230 NaN \n",
"11 10210508 58469286 51.747222 \n",
"12 10210508 58469358 51.757500 \n",
"13 1059201 58469495 762.848889 \n",
"14 12079533 58469572 2.513611 \n",
"\n",
" time_from_last_sett_pl_hour_y diff \n",
"0 407.2353 -0.007800 \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 1.7861 -0.004989 \n",
"5 0.0831 -0.005600 \n",
"6 NaN NaN \n",
"7 NaN NaN \n",
"8 NaN NaN \n",
"9 NaN NaN \n",
"10 NaN NaN \n",
"11 51.7531 -0.005878 \n",
"12 51.7622 -0.004700 \n",
"13 762.8547 -0.005811 \n",
"14 2.5186 -0.004989 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>trx_denied_co_90_x</th>\n",
" <th>trx_denied_co_90_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, trx_denied_co_90_x, trx_denied_co_90_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>trx_sett_sum_30_x</th>\n",
" <th>trx_sett_sum_30_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>12596360</td>\n",
" <td>58449243</td>\n",
" <td>587751.0</td>\n",
" <td>6665738.5</td>\n",
" <td>-6077987.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13834158</td>\n",
" <td>58449105</td>\n",
" <td>23440.0</td>\n",
" <td>536196.0</td>\n",
" <td>-512756.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id trx_sett_sum_30_x trx_sett_sum_30_y diff\n",
"0 12596360 58449243 587751.0 6665738.5 -6077987.5\n",
"1 7484940 58449146 NaN NaN NaN\n",
"2 13834158 58449105 23440.0 536196.0 -512756.0\n",
"3 14941234 58448884 NaN NaN NaN\n",
"7 14425982 58466492 NaN NaN NaN\n",
"8 15072431 58466682 NaN NaN NaN\n",
"9 14939260 58466796 NaN NaN NaN"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>util_non_pl_x</th>\n",
" <th>util_non_pl_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, util_non_pl_x, util_non_pl_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>util_pl_x</th>\n",
" <th>util_pl_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [user_id, trx_id, util_pl_x, util_pl_y, diff]\n",
"Index: []"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id</th>\n",
" <th>trx_id</th>\n",
" <th>proba_v3_x</th>\n",
" <th>proba_v3_y</th>\n",
" <th>diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>12596360</td>\n",
" <td>58449243</td>\n",
" <td>0.022469</td>\n",
" <td>0.024439</td>\n",
" <td>-1.970254e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7484940</td>\n",
" <td>58449146</td>\n",
" <td>0.018400</td>\n",
" <td>0.018043</td>\n",
" <td>3.575876e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13834158</td>\n",
" <td>58449105</td>\n",
" <td>0.009670</td>\n",
" <td>0.008910</td>\n",
" <td>7.597399e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>14941234</td>\n",
" <td>58448884</td>\n",
" <td>0.020853</td>\n",
" <td>0.021020</td>\n",
" <td>-1.671536e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12026071</td>\n",
" <td>58448764</td>\n",
" <td>0.042513</td>\n",
" <td>0.041929</td>\n",
" <td>5.839913e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>11921022</td>\n",
" <td>58466151</td>\n",
" <td>0.074518</td>\n",
" <td>0.076679</td>\n",
" <td>-2.160534e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>5305928</td>\n",
" <td>58466446</td>\n",
" <td>0.012735</td>\n",
" <td>0.012097</td>\n",
" <td>6.384677e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>14425982</td>\n",
" <td>58466492</td>\n",
" <td>0.083455</td>\n",
" <td>0.085168</td>\n",
" <td>-1.712616e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>15072431</td>\n",
" <td>58466682</td>\n",
" <td>0.045488</td>\n",
" <td>0.046737</td>\n",
" <td>-1.248700e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14939260</td>\n",
" <td>58466796</td>\n",
" <td>0.103563</td>\n",
" <td>0.106943</td>\n",
" <td>-3.379802e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>3781021</td>\n",
" <td>58469230</td>\n",
" <td>0.011798</td>\n",
" <td>0.011798</td>\n",
" <td>4.597017e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>10210508</td>\n",
" <td>58469286</td>\n",
" <td>0.100953</td>\n",
" <td>0.100953</td>\n",
" <td>5.689893e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>10210508</td>\n",
" <td>58469358</td>\n",
" <td>0.104775</td>\n",
" <td>0.104775</td>\n",
" <td>7.494005e-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1059201</td>\n",
" <td>58469495</td>\n",
" <td>0.140310</td>\n",
" <td>0.137226</td>\n",
" <td>3.083093e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>12079533</td>\n",
" <td>58469572</td>\n",
" <td>0.211929</td>\n",
" <td>0.211929</td>\n",
" <td>1.387779e-16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id trx_id proba_v3_x proba_v3_y diff\n",
"0 12596360 58449243 0.022469 0.024439 -1.970254e-03\n",
"1 7484940 58449146 0.018400 0.018043 3.575876e-04\n",
"2 13834158 58449105 0.009670 0.008910 7.597399e-04\n",
"3 14941234 58448884 0.020853 0.021020 -1.671536e-04\n",
"4 12026071 58448764 0.042513 0.041929 5.839913e-04\n",
"5 11921022 58466151 0.074518 0.076679 -2.160534e-03\n",
"6 5305928 58466446 0.012735 0.012097 6.384677e-04\n",
"7 14425982 58466492 0.083455 0.085168 -1.712616e-03\n",
"8 15072431 58466682 0.045488 0.046737 -1.248700e-03\n",
"9 14939260 58466796 0.103563 0.106943 -3.379802e-03\n",
"10 3781021 58469230 0.011798 0.011798 4.597017e-16\n",
"11 10210508 58469286 0.100953 0.100953 5.689893e-16\n",
"12 10210508 58469358 0.104775 0.104775 7.494005e-16\n",
"13 1059201 58469495 0.140310 0.137226 3.083093e-03\n",
"14 12079533 58469572 0.211929 0.211929 1.387779e-16"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for feat in features+['proba_v3']:\n",
" out = comp[['user_id', 'trx_id', feat+'_x', feat+'_y']].copy()\n",
" out['diff'] = out[feat+'_x'] - out[feat+'_y']\n",
" out = out[out['diff'] != 0]\n",
" display(out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}