Commit 4dc3b81d authored by danielczinege's avatar danielczinege
Browse files

feat: better print

parent 788de70d
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -260,6 +260,10 @@ num_cols = df.select_dtypes(include=[np.number]).columns
zero_counts = (df[num_cols] == 0).sum().sort_values(ascending=False)
print(zero_counts)

print("\n" + "-"*60)
print("Cleaning price and playtime columns... (using tags to determine free-to-play games and when playtime=0 change them to NaN)")
print("-"*60)

# --- 7. Handle price=0 (BEFORE comprehensive summary) ---
def clean_price(row):
    """
@@ -428,6 +432,8 @@ for col in df_fully_complete.columns:
if not any_missing:
    print("No standard missing values (NaN) in fully filtered dataset!")

print(f'\ntags (empty dict): {df_fully_complete["tags"].apply(lambda x: len(x) == 0 if isinstance(x, dict) else True).sum()}')

# Check that our filtering worked
print(f"\n--- Verification of Filtering ---")
print(f"Games with empty genres: {df_fully_complete['genres_parsed'].apply(lambda x: len(x) == 0 if isinstance(x, list) else True).sum()}")