Systematic loading and testing of individual files for the ASHRAE energy predictor competition.

Collecting the .csv files

get_csvs[source]

get_csvs(data_path:Path=Path('../data'), csv_names_map:dict={})

%%time
csvs = get_csvs()

Loading basic meter info

get_meter_data[source]

get_meter_data(path:Path, nrows:int=-1)

Get the core of the train dataset

%%time
df_meter_train = get_meter_data(csvs['train'], nrows=N_TRAIN)
display(df_meter_train.head(), df_meter_train.info())
%%time
df_meter_test = get_meter_data(csvs['test'], nrows=N_TEST)
display(df_meter_test.head(), df_meter_test.info())

NaNs

get_nan_stats[source]

get_nan_stats(df:DataFrame, col:str)

get_nan_stats(df_meter_train, 'meter_reading')

show_nans[source]

show_nans(df:DataFrame)

%%time
meter_train_nans = show_nans(df_meter_train)
meter_train_nans

test_meter_train_and_test_set[source]

test_meter_train_and_test_set(df_train:DataFrame, df_test:DataFrame)

Get building info

get_building_data[source]

get_building_data(path:Path=Path('../data/building_metadata.csv'))

%%time
df_building = get_building_data(csvs['building'])
df_building.head()

test_building[source]

test_building(df_building:DataFrame, df_core:DataFrame)

Get weather info

get_weather_data[source]

get_weather_data(path:Path=Path('../data/weather_train.csv'))

%%time
df_weather_train = get_weather_data(csvs['weather_train'])
df_weather_train.head()

test_weather[source]

test_weather(df_weather:DataFrame, df_building:DataFrame)

%%time
df_weather_test = get_weather_data(csvs['weather_test'])

Doing all the above

load_all[source]

load_all(data_path:Path=Path('../data'))

Locates csvs, loads them and performs basic sanity checks

%%time
ashrae_data = load_all()