126 lines
5.7 KiB
Python
Executable File
126 lines
5.7 KiB
Python
Executable File
import csv
|
|
import pandas as pd
|
|
|
|
############
|
|
# read files
|
|
############
|
|
|
|
# file locations
|
|
statistics_meta_archive_file = "input/raw/postgres/statistics_meta.csv"
|
|
statistics_meta_export_file = "input/raw/sqlite/statistics_meta-export.csv"
|
|
statistics_archive_file = "input/raw/postgres/statistics.csv"
|
|
statistics_export_file = "input/raw/sqlite/statistics-export.csv"
|
|
statistics_import_file = "output/statistics-import.csv"
|
|
statistics_short_term_archive_file = "input/raw/postgres/statistics_short_term.csv"
|
|
statistics_short_term_export_file = "input/raw/sqlite/statistics_short_term-export.csv"
|
|
statistics_short_term_import_file = "output/statistics_short_term-import.csv"
|
|
|
|
# read in current export, and the archive
|
|
meta_df = pd.read_csv(statistics_meta_export_file)
|
|
meta_archive_df = pd.read_csv(statistics_meta_archive_file)
|
|
statistics_df = pd.read_csv(statistics_export_file, index_col='id')
|
|
statistics_archive_df = pd.read_csv(statistics_archive_file, index_col='id')
|
|
statistics_short_term_df = pd.read_csv(statistics_short_term_export_file, index_col='id')
|
|
statistics_short_term_archive_df = pd.read_csv(statistics_short_term_archive_file, index_col='id')
|
|
|
|
|
|
#################
|
|
# statistics_meta
|
|
#################
|
|
|
|
# find the id's and the unique statistics from each
|
|
meta_df = meta_df[['id','statistic_id']]
|
|
meta_archive_df = meta_archive_df[['id','statistic_id']]
|
|
|
|
# only keeping statistics from the current system,
|
|
# form a lookup dictionary so the archive can be corrected
|
|
# using id from the current system export.
|
|
meta_lookup = meta_df.merge(meta_archive_df, on=['statistic_id'], how='left', indicator=True)
|
|
meta_lookup.set_index('id_x').to_csv("meta_merge.csv")
|
|
meta_lookup = meta_lookup[['id_y','id_x']]
|
|
meta_lookup = meta_lookup.T.to_dict('records')[0]
|
|
meta_lookup = dict((v,k) for k,v in meta_lookup.items())
|
|
print(meta_lookup)
|
|
|
|
############
|
|
# statistics
|
|
############
|
|
|
|
# correct the meta column
|
|
statistics_archive_df.replace({'metadata_id': meta_lookup}, inplace=True)
|
|
|
|
# drop any statistics not in the existing systems metadata
|
|
statistics_archive_df = statistics_archive_df[statistics_archive_df['metadata_id'].isin(meta_lookup.keys())]
|
|
|
|
# make unique indexes
|
|
statistics_max_id = statistics_df.last_valid_index()
|
|
statistics_archive_df.reset_index(inplace=True)
|
|
statistics_archive_df['id'] += statistics_max_id
|
|
statistics_archive_df.set_index('id',drop=True,inplace=True)
|
|
|
|
# find any duplicates where tuple (start_ts,metadata_id)
|
|
# exist in export and archive, drop the archive
|
|
# read in current export, and the archive
|
|
print(statistics_archive_df.info())
|
|
statistics_df['unique_tuple'] = statistics_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1)
|
|
statistics_archive_df['unique_tuple'] = statistics_archive_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1)
|
|
statistics_archive_df_copy = statistics_archive_df.copy()
|
|
statistics_archive_df_copy = statistics_archive_df_copy[['start_ts','metadata_id','unique_tuple']]
|
|
statistics_df = statistics_df[['start_ts','metadata_id','unique_tuple']]
|
|
unique_lookup = statistics_archive_df_copy.merge(statistics_df, on=['unique_tuple'], how='left', indicator=True)
|
|
unique_lookup = unique_lookup[unique_lookup['_merge']=="both"]
|
|
unique_lookup.to_csv("unique_merge.csv")
|
|
unique_tuples = unique_lookup['unique_tuple']
|
|
statistics_archive_df = statistics_archive_df[~statistics_archive_df['unique_tuple'].isin(unique_tuples)]
|
|
statistics_archive_df.drop(columns='unique_tuple',inplace=True)
|
|
print(statistics_archive_df.info())
|
|
|
|
|
|
|
|
#######################
|
|
# statistics_short_term
|
|
#######################
|
|
|
|
# correct the meta column
|
|
statistics_short_term_archive_df.replace({'metadata_id': meta_lookup}, inplace=True)
|
|
|
|
# drop any statistics not in the existing systems metadata
|
|
statistics_short_term_archive_df = statistics_short_term_archive_df[statistics_short_term_archive_df['metadata_id'].isin(meta_lookup.keys())]
|
|
|
|
# make unique indexes
|
|
statistics_short_term_max_id = statistics_short_term_df.last_valid_index()
|
|
statistics_short_term_archive_df.reset_index(inplace=True)
|
|
statistics_short_term_archive_df['id'] += statistics_short_term_max_id
|
|
statistics_short_term_archive_df.set_index('id',drop=True,inplace=True)
|
|
|
|
# find any duplicates where tuple (start_ts,metadata_id)
|
|
# exist in export and archive, drop the archive
|
|
# read in current export, and the archive
|
|
print(statistics_short_term_archive_df.info())
|
|
statistics_short_term_df['unique_tuple'] = statistics_short_term_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1)
|
|
statistics_short_term_archive_df['unique_tuple'] = statistics_short_term_archive_df.apply(lambda row: (row['start_ts'],row['metadata_id']), axis=1)
|
|
statistics_short_term_archive_df_copy = statistics_short_term_archive_df.copy()
|
|
statistics_short_term_archive_df_copy = statistics_short_term_archive_df_copy[['start_ts','metadata_id','unique_tuple']]
|
|
statistics_short_term_df = statistics_short_term_df[['start_ts','metadata_id','unique_tuple']]
|
|
unique_lookup = statistics_short_term_archive_df_copy.merge(statistics_short_term_df, on=['unique_tuple'], how='left', indicator=True)
|
|
#unique_lookup.to_csv(statistics_short_term_import_file)
|
|
unique_lookup = unique_lookup[unique_lookup['_merge']=="both"]
|
|
#unique_lookup.to_csv("unique_merge.csv")
|
|
unique_tuples = unique_lookup['unique_tuple']
|
|
statistics_short_term_archive_df = statistics_short_term_archive_df[~statistics_short_term_archive_df['unique_tuple'].isin(unique_tuples)]
|
|
statistics_short_term_archive_df.drop(columns='unique_tuple',inplace=True)
|
|
print(statistics_short_term_archive_df.info())
|
|
|
|
|
|
|
|
|
|
|
|
###########################
|
|
# write files for importing
|
|
###########################
|
|
|
|
statistics_archive_df.to_csv(statistics_import_file)
|
|
statistics_short_term_archive_df.to_csv(statistics_short_term_import_file)
|
|
|
|
|