#download Numerai training data and load as a pandas dataframe
TRAINING_DATAPATH = 'https://numerai-public-datasets.s3-us-west-2.amazonaws.com/latest_numerai_training_data.csv.xz'
df = pd.read_csv(TRAINING_DATAPATH)
#create a list of the feature columns
features = [c for c in df if c.startswith("feature")]
#create a list of the column names
col_list = ["id", "era", "data_type"]
col_list = col_list + features + ["target_kazutsugi"]
#create a list of corresponding data types to match the column name list
dtype_list_back = [np.float32] * 311
dtype_list_front = [str, str, str]
dtype_list = dtype_list_front + dtype_list_back
#use Python's zip function to combine the column name list and the data type list
dtype_zip = zip(col_list, dtype_list)
#convert the combined list to a dictionary to conform to pandas convention
dtype_dict = dict(dtype_zip)
#save the dictionary as a joblib file for future use
dump(dtype_dict, 'dtype_dict.joblib')