Loading exploratory_data_analysis.py +6 −0 Original line number Diff line number Diff line Loading @@ -260,6 +260,10 @@ num_cols = df.select_dtypes(include=[np.number]).columns zero_counts = (df[num_cols] == 0).sum().sort_values(ascending=False) print(zero_counts) print("\n" + "-"*60) print("Cleaning price and playtime columns... (using tags to determine free-to-play games and when playtime=0 change them to NaN)") print("-"*60) # --- 7. Handle price=0 (BEFORE comprehensive summary) --- def clean_price(row): """ Loading Loading @@ -428,6 +432,8 @@ for col in df_fully_complete.columns: if not any_missing: print("No standard missing values (NaN) in fully filtered dataset!") print(f'\ntags (empty dict): {df_fully_complete["tags"].apply(lambda x: len(x) == 0 if isinstance(x, dict) else True).sum()}') # Check that our filtering worked print(f"\n--- Verification of Filtering ---") print(f"Games with empty genres: {df_fully_complete['genres_parsed'].apply(lambda x: len(x) == 0 if isinstance(x, list) else True).sum()}") Loading Loading
exploratory_data_analysis.py +6 −0 Original line number Diff line number Diff line Loading @@ -260,6 +260,10 @@ num_cols = df.select_dtypes(include=[np.number]).columns zero_counts = (df[num_cols] == 0).sum().sort_values(ascending=False) print(zero_counts) print("\n" + "-"*60) print("Cleaning price and playtime columns... (using tags to determine free-to-play games and when playtime=0 change them to NaN)") print("-"*60) # --- 7. Handle price=0 (BEFORE comprehensive summary) --- def clean_price(row): """ Loading Loading @@ -428,6 +432,8 @@ for col in df_fully_complete.columns: if not any_missing: print("No standard missing values (NaN) in fully filtered dataset!") print(f'\ntags (empty dict): {df_fully_complete["tags"].apply(lambda x: len(x) == 0 if isinstance(x, dict) else True).sum()}') # Check that our filtering worked print(f"\n--- Verification of Filtering ---") print(f"Games with empty genres: {df_fully_complete['genres_parsed'].apply(lambda x: len(x) == 0 if isinstance(x, list) else True).sum()}") Loading