Commit 3915a2bf authored by danielczinege's avatar danielczinege
Browse files

print out columns at the end

parent 9268ed9e
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -173,7 +173,7 @@ for col in two_week_cols:
    if col in df_complete.columns:
        missing_2week = df_complete[col].isnull().sum()
        missing_2week_pct = (missing_2week / len(df_complete)) * 100
        print(f"Remaining NaN in {col}: {missing_2week:,} ({missing_2week_pct:.1f}%) - OK to keep")
        print(f"Remaining NaN in {col}: {missing_2week:,} ({missing_2week_pct:.1f}%)")

# --- 7. ANALYSIS ON FILTERED DATASET ---
print("\n" + "="*60)
@@ -299,3 +299,6 @@ print(pd.Series(lang_counts).sort_values(ascending=False).head(10))
tag_counts = Counter([tag for tags in df_complete['tags'] for tag in tags.keys()])
print("\n--- Top tags (Filtered Dataset) ---")
print(pd.Series(tag_counts).sort_values(ascending=False).head(10))

print("which columns we have:")
print(df.columns)