{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import pymysql"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import configparser\n",
"config = configparser.ConfigParser()\n",
"config.read('/home/ec2-user/SageMaker/zhilal/config.ini')\n",
"\n",
"host = config['MYSQL-ROOT']['HOST']\n",
"user = config['MYSQL-ROOT']['USER']\n",
"password = config['MYSQL-ROOT']['PASSWORD']\n",
"\n",
"def connect_sql():\n",
" cnx = pymysql.connect(host=host,\n",
" user=user,\n",
" password=password,\n",
" cursorclass=pymysql.cursors.DictCursor)\n",
" return cnx"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"features = ['ap_co_plo',\n",
" 'current_dpd',\n",
" 'date_of_month',\n",
" 'delin_max_dpd_90',\n",
" 'nondgtl_nonpl_trx_co_180',\n",
" 'oth_last_rep_days',\n",
" 'oth_last_rep_dpd',\n",
" 'payment_type',\n",
" 'pf_delin_max_dpd_12mo',\n",
" 'pl_settle_to_due_last_180',\n",
" 'pl_trx_jumbomini_suc_co_90',\n",
" 'time_appr_to_pl_hour',\n",
" 'time_from_last_sett_pl_hour',\n",
" 'trx_denied_co_90',\n",
" 'trx_sett_sum_30',\n",
" 'util_non_pl',\n",
" 'util_pl'\n",
" ]\n",
"features = sorted(features)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- ap_co_plo\n",
" - --> same as PL V2\n",
"\n",
"\n",
"- current_dpd\n",
"- delin_max_dpd_90 (renamed from delin_max_dpd_3mo)\n",
" - --> same as PL V2 (less features)\n",
"\n",
"\n",
"- oth_last_rep_days\n",
"- oth_last_rep_dpd\n",
" - --> same as PL V2 (less features)\n",
"\n",
"\n",
"- pf_delin_max_dpd_12mo\n",
" - --> same as PL V2\n",
"\n",
"\n",
"- time_appr_to_pl_hour (renamed from time_approve_to_pl_hour)\n",
" - --> same as PL V2\n",
"\n",
"\n",
"- util_non_pl\n",
" - --> same as PL V2\n",
"\n",
"\n",
"- util_pl\n",
" - --> same as PL V2\n",
"\n",
"\n",
"- pl_trx_jumbomini_suc_co_90 (in the code it's named pl_trx_co_jumbomini_4_90)\n",
" - new feature. No NaN values\n",
"\n",
"\n",
"- date_of_month\n",
" - get date of day\n",
"\n",
"\n",
"- payment_type\n",
" - get payment type\n",
"\n",
"\n",
"- nondgtl_nonpl_trx_co_180\n",
" - --> requires adjustments\n",
"\n",
"\n",
"- trx_denied_co_90\n",
" - --> new feature. No NaN values\n",
"\n",
"\n",
"- trx_sett_sum_30\n",
" - --> new feature. NaN value possible. How to handle this?\n",
"\n",
"\n",
"- pl_settle_to_due_last_180 (in the code it's named settle_to_due_last_pl_180)\n",
"- time_from_last_sett_pl_hour (in the code it's named time_from_prev_pl_settle)\n",
" - --> requires adjustments"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Base Query"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_base_pl_trx(tid):\n",
" query_kr = f'''\n",
" SELECT user_id, \n",
" id AS trx_id,\n",
" payment_type, \n",
" transaction_type,\n",
" transaction_date\n",
" FROM l2alpha.`transaction`\n",
" WHERE id = {tid}\n",
" '''\n",
" kr_df = pd.read_sql_query(query_kr, connect_sql())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# New Features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## pl_trx_jumbomini_suc_co_90"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def get_pl_trx(tids):\n",
" pl_trx_query = f'''\n",
" SELECT\n",
" base.user_id,\n",
" base.pl_trx_id AS trx_id,\n",
" COUNT(DISTINCT(CASE WHEN datediff(DATE(pl_transaction_date), transaction_date) <= 90 \n",
" AND datediff(DATE(pl_transaction_date), transaction_date) >= 0 \n",
" THEN base.transaction_id ELSE NULL END)) AS pl_trx_jumbomini_suc_co_90\n",
" FROM\n",
" (\n",
" SELECT\n",
" tr1.user_id,\n",
" tr1.id AS pl_trx_id,\n",
" tr1.pl_transaction_date,\n",
" tr2.id AS transaction_id,\n",
" tr2.transaction_date,\n",
" tr2.amount\n",
" FROM\n",
" (\n",
" SELECT\n",
" user_id,\n",
" id,\n",
" transaction_date AS pl_transaction_date\n",
" FROM\n",
" l2alpha.`transaction`\n",
" WHERE\n",
" id = {tids} ) AS tr1\n",
" LEFT JOIN (\n",
" SELECT\n",
" *\n",
" FROM\n",
" l2alpha.`transaction`\n",
" WHERE\n",
" status = 4\n",
" and transaction_type = 2 ) AS tr2 ON\n",
" tr1.user_id = tr2.user_id\n",
" AND tr2.transaction_date < tr1.pl_transaction_date ) AS base\n",
" GROUP BY\n",
" base.user_id,\n",
" base.pl_transaction_date\n",
" '''\n",
" pl_trx = pd.read_sql(pl_trx_query, connect_sql())\n",
" return pl_trx"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"get_pl_trx(tids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## nondgtl_nonpl_trx_co_180"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def get_trx_history(base_df):\n",
" uids = tuple(base_df['user_id'])\n",
" max_trx_date = base_df['transaction_date'].max()\n",
" \n",
" # Remove all promotional and service related fees\n",
" excluded_product = ('biaya layanan'\n",
" ,'shipping fee'\n",
" ,'admin fee' \n",
" ,'biaya kirim yang dibebankan' \n",
" ,'biaya tambahan' \n",
" ,'discount' \n",
" ,'adminfee' \n",
" ,'shippingfee'\n",
" ,'additionalfee' \n",
" ,'diskon tambahan' \n",
" ,'donasi'\n",
" ,'donation' \n",
" ,'charge kredivo 2.3%' \n",
" ,'lainnya' \n",
" ,'user credit'\n",
" ,'deals') \n",
" \n",
" query = f\"\"\"\n",
" SELECT\n",
" t.user_id,\n",
" t.transaction_id AS trx_id,\n",
" t.transaction_date,\n",
" LOWER(p.name) AS product_name,\n",
" LOWER(p.category) AS category,\n",
" l.loan_amount,\n",
" t.merchant_id\n",
" FROM\n",
" (\n",
" SELECT\n",
" user_id,\n",
" id AS transaction_id,\n",
" transaction_date,\n",
" merchant_id\n",
" FROM\n",
" l2alpha.`transaction`\n",
" WHERE\n",
" user_id IN {uids}\n",
" AND status = 4\n",
" AND transaction_type <> 2\n",
" AND transaction_date <= '{max_trx_date}') AS t\n",
" JOIN (\n",
" SELECT\n",
" name,\n",
" id AS product_id,\n",
" transaction_id,\n",
" category,\n",
" sku\n",
" FROM\n",
" l2alpha.product\n",
" WHERE\n",
" sku_type = 0\n",
" AND name NOT IN {excluded_product}\n",
" AND sku NOT IN {excluded_product}) AS p ON\n",
" t.transaction_id = p.transaction_id\n",
" JOIN (\n",
" SELECT\n",
" transaction_id,\n",
" loan_amount\n",
" FROM\n",
" l2alpha.loan\n",
" WHERE\n",
" is_active = 1\n",
" AND loan_amount > 0) AS l ON\n",
" p.transaction_id = l.transaction_id\n",
" JOIN (\n",
" SELECT\n",
" id,\n",
" name\n",
" FROM\n",
" l2alpha.merchant) AS m ON\n",
" t.merchant_id = m.id\n",
" \"\"\"\n",
" \n",
" df = pd.read_sql(query, connect_sql())\n",
" out_df = base_df[['user_id', 'trx_id', 'transaction_date']].merge(df, how='left', on='user_id')\\\n",
" .rename(columns={'transaction_date_x':'trx_dt_curr',\n",
" 'transaction_date_y':'trx_dt_past',\n",
" 'trx_id_x':'trx_id',\n",
" 'trx_id_y':'trx_id_ref'})\n",
" out_df['time_diff'] = (out_df['trx_dt_curr'] - out_df['trx_dt_past']).dt.total_seconds()\n",
" return out_df"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# def get_ticket(x):\n",
"# if int(x['merchant_id']) in [154,428,48,87,432]:\n",
"# if 'hotel' not in x['product_name'] and 'hotel' not in x['category']:\n",
"# return True\n",
"# elif x['merchant_id'] in [270]:\n",
"# if 'flight' in x['product_name'] or 'flight' in x['category']:\n",
"# return True\n",
"\n",
"# return False\n",
"\n",
"def get_utilities(x):\n",
" for w in ['pdam','pln','tv kabel', 'electricity','electricity_postpaid',\n",
" 'listrik pln', 'pln','pdam','internet & tv kabel','air pdam', \n",
" '75061609','75062176', 'listrik prabayar', 'listrik lainnya','pulsa listrik']:\n",
" if w in x['product_name'] or w in x['category']: \n",
" return True\n",
"\n",
" if re.search(r'(tagihan).*(listrik|telkom)|(voucher|token).*(listrik|pln)|pln.*(prepaid|postpaid|token)|\\bpdam\\b', x['product_name']):\n",
" return True\n",
"\n",
" return False\n",
"\n",
"def get_game(x):\n",
" if int(x['merchant_id']) in [426,197]:\n",
" return True\n",
" elif re.search(r'(voucher|data).*game|game.*(voucher|data)|\\bgamesmax\\b', x['product_name']):\n",
" return True\n",
" elif re.search('voucher', x['category']):\n",
" if re.search('game',x['product_name']):\n",
" return True\n",
" else:\n",
" return False\n",
"\n",
"def get_pulsa(x):\n",
" if int(x['merchant_id']) == 8:\n",
" for n in ['pln','listrik','pdam','game']:\n",
" if n in x['product_name']:\n",
" return False\n",
" if 'rp' in x['product_name']:\n",
" return True\n",
" elif x['merchant_id'] != 8:\n",
" for n in ['pln','listrik','pdam','game', 'flight','mobiles & tablets', 'komputer & aksesoris']:\n",
" if n in x['product_name'] or n in x['category']:\n",
" return False\n",
" for w in ['mobile', 'digital products','pulsa', \n",
" 'virtual','75061635','paket data','digital utilities','pasca bayar']:\n",
" if w in x['category']:\n",
" return True\n",
"\n",
" if re.search(r'\\bpulsa\\b', x['product_name']):\n",
" return True\n",
"\n",
" for w in ['paket data', 'kuota', 'xl xtra', 'indosat' , 'telkomsel', 'voucher 3',\n",
" 'smartfren', 'voucher tri', 'xl xtra combo', 'axis', 'paket internet']:\n",
" if w in x['product_name']:\n",
" return True\n",
"\n",
" else:\n",
" return False\n",
"\n",
"def get_digital_product_status(df):\n",
"# df['trx_cat_ticket'] = df.apply(lambda x: get_ticket(x), axis=1)\n",
" df['trx_cat_game'] = df.apply(lambda x: get_game(x), axis=1)\n",
" df['trx_cat_pulsa'] = df.apply(lambda x: get_pulsa(x), axis=1)\n",
" df['trx_cat_utilities'] = df.apply(lambda x: get_utilities(x), axis=1)\n",
" \n",
" df['trx_cat_digital'] = df[['trx_cat_game', 'trx_cat_pulsa', 'trx_cat_utilities']].any(1)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"def create_prev_digital_trx_per_type(df, keyword, window='all'):\n",
" if keyword not in ['ticket', 'game', 'pulsa', 'utilities', 'digital']:\n",
" raise ValueError(f'{keyword} not available. Try ticket, game, pulsa, utilities, digital.')\n",
" \n",
" if isinstance(window, int):\n",
" df = df[(df['time_diff'] <= window*(24*60*60)) & (df['time_diff'] > 0)]\n",
" name = str(window)\n",
" elif window == 'all':\n",
" df = df[df['time_diff'] > 0]\n",
" name = 'ever'\n",
" elif isinstance(window, int) == False or window != 'all':\n",
" raise ValueError(f'The input \"{window}\" is wrong. Window value must be an integer or \"all\".')\n",
" \n",
" key = 'trx_cat_'+ keyword\n",
" digital = df.groupby(['user_id', 'trx_id', key]).agg({key:'count',\n",
" 'loan_amount':['sum', 'mean']})\\\n",
" .unstack().fillna(0)\n",
" \n",
" columns = []\n",
" for groups in digital.columns.values:\n",
" col_name = []\n",
" for item in groups:\n",
" col_name.append(str(item))\n",
" new_name = '-'.join(col_name)\n",
" columns.append(new_name)\n",
" digital.columns = columns\n",
" \n",
" if keyword == 'digital':\n",
" final = digital.reset_index()\n",
" final.columns = ['user_id', 'trx_id',\n",
" f'nondgtl_nonpl_trx_co_{name}',\n",
" f'dgtl_trx_co_all_{name}',\n",
" f'nondgtl_nonpl_trx_sum_amt_{name}',\n",
" f'dgtl_trx_sum_amt_all_{name}',\n",
" f'nondgtl_nonpl_trx_avg_amt_{name}',\n",
" f'dgtl_trx_avg_amt_all_{name}']\n",
" \n",
" else:\n",
" relevant_columns = [col for col in digital.columns if 'True' in col]\n",
" final = digital[relevant_columns].reset_index()\n",
" final.columns = ['user_id', 'trx_id', \n",
" f'dgtl_trx_co_{keyword}_{name}', \n",
" f'dgtl_trx_sum_amt_{keyword}_{name}',\n",
" f'dgtl_trx_avg_amt_{keyword}_{name}']\n",
" \n",
" return final"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"create_prev_digital_trx_per_type(trx_hist, 'digital', window=180)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## trx_denied_co_90"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"def get_denied_trx(tid):\n",
" denied_trx_q = f'''\n",
" SELECT\n",
" base.user_id,\n",
" base.pl_trx_id AS trx_id,\n",
" COUNT(DISTINCT(CASE WHEN datediff(DATE(pl_transaction_date), transaction_date) <= 90 \n",
" AND datediff(DATE(pl_transaction_date), transaction_date) >= 0 \n",
" THEN base.transaction_id ELSE NULL END)) AS trx_denied_co_90\n",
" FROM\n",
" (\n",
" SELECT\n",
" tr1.user_id,\n",
" tr1.id AS pl_trx_id,\n",
" tr1.pl_transaction_date,\n",
" tr2.id AS transaction_id,\n",
" tr2.transaction_date,\n",
" tr2.amount\n",
" FROM\n",
" (\n",
" SELECT\n",
" user_id,\n",
" id,\n",
" transaction_date AS pl_transaction_date\n",
" FROM\n",
" l2alpha.`transaction`\n",
" WHERE\n",
" id = {tid} ) AS tr1\n",
" LEFT JOIN (\n",
" SELECT\n",
" *\n",
" FROM\n",
" l2alpha.`transaction`\n",
" WHERE\n",
" status = 5 ) AS tr2 ON\n",
" tr1.user_id = tr2.user_id\n",
" AND tr2.transaction_date <= tr1.pl_transaction_date ) AS base\n",
" GROUP BY\n",
" base.user_id,\n",
" base.pl_transaction_date\n",
" '''\n",
" denied_trx = pd.read_sql(denied_trx_q, connect_sql())\n",
" return denied_trx"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"get_denied_trx(tids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## trx_sett_sum_30"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"def get_settled_trx(tid):\n",
" paid_trx_q = f'''\n",
" SELECT base.user_id,\n",
" base.pl_trx_id AS trx_id,\n",
" SUM(CASE WHEN datediff(DATE(pl_transaction_date),start_date) <= 30 AND \n",
" datediff(DATE(pl_transaction_date),start_date) >=0 \n",
" THEN base.loan_amount ELSE 0 END) AS trx_sett_sum_30\n",
" FROM\n",
" (SELECT tr.user_id,\n",
" tr.id AS pl_trx_id,\n",
" tr.pl_transaction_date, \n",
" l.transaction_id,\n",
" l.start_date,\n",
" l.loan_amount,\n",
" l.tenure\n",
"\n",
" FROM \n",
" (SELECT user_id, \n",
" id, \n",
" transaction_date AS pl_transaction_date\n",
" FROM l2alpha.`transaction` WHERE id = {tid}\n",
" ) AS tr\n",
" INNER JOIN \n",
" (SELECT user_id,\n",
" start_date, \n",
" settlement_date,\n",
" loan_amount,\n",
" tenure,\n",
" transaction_id \n",
" FROM l2alpha.loan\n",
" WHERE is_active = 1 AND loan_amount > 0\n",
" ) AS l\n",
" ON l.user_id = tr.user_id AND settlement_date <= pl_transaction_date\n",
" ) AS base\n",
" GROUP BY base.user_id, base.pl_transaction_date\n",
" '''\n",
" settled_trx = pd.read_sql(paid_trx_q, connect_sql())\n",
" return settled_trx"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"get_settled_trx(tids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# pl_settle_to_due_last_180 (from settle_to_due_last_pl_180)\n",
"# time_from_last_sett_pl_hour (from time_from_prev_pl_settle)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"def get_trx_settle_data(base_df):\n",
" uids = tuple(base_df['user_id'])\n",
" max_trx_date = base_df['transaction_date'].max()\n",
"\n",
" early_settle_q = f'''\n",
" SELECT *,\n",
" DATE(transaction_date) + INTERVAL payment_type_int DAY AS due_date\n",
" FROM\n",
" (SELECT tr.user_id, tr.trx_id, transaction_date, payment_type, transaction_type, status,\n",
" CASE WHEN payment_type = '30_days' THEN '30'\n",
" WHEN payment_type = '3_months' THEN '90'\n",
" WHEN payment_type = '6_months' THEN '180' \n",
" WHEN payment_type = '12_months' THEN '360' ELSE NULL END AS payment_type_int,\n",
"\n",
" settlement_date, loan_amount, is_active\n",
" FROM\n",
" (SELECT user_id,\n",
" id AS trx_id,\n",
" transaction_date,\n",
" payment_type,\n",
" transaction_type,\n",
" status\n",
" FROM l2alpha.`transaction`\n",
" WHERE user_id IN {uids}\n",
" AND transaction_type = 2\n",
" AND status = 4\n",
" AND transaction_date <= '{max_trx_date}'\n",
" ) AS tr\n",
"\n",
" LEFT JOIN\n",
"\n",
" (SELECT transaction_id AS trx_id,\n",
" settlement_date,\n",
" loan_amount,\n",
" is_active\n",
" FROM l2alpha.loan\n",
" ) AS lo\n",
"\n",
" ON tr.trx_id = lo.trx_id\n",
" ) AS base\n",
" WHERE loan_amount > 0 AND is_active = 1\n",
" '''\n",
" query_df = pd.read_sql(early_settle_q, connect_sql())\n",
" query_df['settle_to_due'] = (query_df['due_date'] - query_df['settlement_date'].dt.date).dt.days\n",
" df = base_df[['user_id', 'trx_id', 'transaction_date']].merge(query_df, how='left', on='user_id')\\\n",
" .rename(columns={'transaction_date_x':'trx_dt_current',\n",
" 'transaction_date_y':'trx_dt_past',\n",
" 'trx_id_x':'trx_id',\n",
" 'trx_id_y':'trx_id_ref'})\n",
" df['time_diff'] = (df['trx_dt_current'] - df['trx_dt_past']).dt.total_seconds()\n",
" df = df.sort_values(['user_id', 'trx_dt_past']).reset_index(drop=True)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"def create_early_settle_feats(df, base_df, window='all'):\n",
" if isinstance(window, int):\n",
" df = df[(df['time_diff'] <= window*(24*60*60)) & (df['time_diff'] > 0)]\n",
" name = str(window)\n",
" elif window == 'all':\n",
" df = df[df['time_diff'] > 0]\n",
" name = 'ever'\n",
" df = df.sort_values(['user_id', 'trx_id', 'trx_dt_past'])\n",
" \n",
" # Calculate how many times the user has settled early in the last XX days\n",
" co_df = df.copy()\n",
" co_df['flag_early_settle'] = np.where(co_df['settle_to_due'] >= 30, 1, 0)\n",
" co_df['flag_early_settle'] = np.where(co_df['settle_to_due'].isna(), np.nan, co_df['flag_early_settle'])\n",
" co_df = co_df.groupby('trx_id').agg({'flag_early_settle':'sum'}).reset_index()\n",
" co_df.columns = ['trx_id', f'early_settle_co_{name}']\n",
" \n",
" # Calculate the min, max, average, and median days the user has settled early in the last XX days\n",
" # Also get the last time the user settled early (how many days early).\n",
" es_df = df.groupby('trx_id').agg({'settle_to_due':[max, min, 'mean', 'median', 'last']})\n",
" col_names = ['settle_to_due_max_{}', 'settle_to_due_min_{}', 'settle_to_due_avg_{}', \n",
" 'settle_to_due_med_{}', 'settle_to_due_last_pl_{}']\n",
" es_df.columns = [col.format(name) for col in col_names]\n",
" es_df = es_df.reset_index()\n",
" out_df = base_df[['user_id', 'trx_id']].merge(es_df, how='left', on='trx_id')\\\n",
" .merge(co_df, how='left', on='trx_id')\n",
" return out_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"create_early_settle_feats(trx_settle, raw_df, window=180)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def create_time_from_last_pl_feat(df):\n",
" time_from_last_pl = df[df['time_diff'] >= 0]\n",
" time_from_last_pl.loc[(time_from_last_pl['time_diff'] == 0) & \n",
" (time_from_last_pl['settlement_date'].notna()), 'settlement_date'] = np.nan\n",
" time_from_last_pl = time_from_last_pl.sort_values(['user_id', 'trx_id', 'trx_dt_past'])\n",
"\n",
" time_from_last_pl_grp = time_from_last_pl.groupby('trx_id').tail(2)\n",
" time_from_last_pl_grp['settle_shift'] = time_from_last_pl_grp['settlement_date'].shift()\n",
" time_from_last_pl_grp['time_from_prev_pl_settle'] = (time_from_last_pl_grp['trx_dt_current'] - \\\n",
" time_from_last_pl_grp['settle_shift']).dt.total_seconds()/3600\n",
" \n",
" time_from_last_pl_final = time_from_last_pl_grp.groupby('trx_id').tail(1)\n",
" time_from_last_pl_final = time_from_last_pl_final[['user_id', 'trx_id', \n",
" 'time_from_prev_pl_settle']].reset_index(drop=True)\n",
" return time_from_last_pl_final"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"create_time_from_last_pl_feat(trx_settle)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}