# main.py
import sys
import psycopg2
from config import parse_args, update_db_params, DB_PARAMS
from schema import create_tables, drop_tables
from csv_loader import check_csv_format, load_csv_into_table
from validators import run_tests
def main():
args = parse_args()
update_db_params(args)
# CSV format checks
format_errors = (
check_csv_format(args.spatial, 12) +
check_csv_format(args.linguistic, 15) +
check_csv_format(args.temporal, 13) +
check_csv_format(args.sources, 10)
)
if format_errors:
print("CSV Format/Parsing Errors Detected:")
for err in format_errors:
print(" -", err)
sys.exit(1)
# Connect to PostgreSQL
try:
conn = psycopg2.connect(**DB_PARAMS)
conn.set_client_encoding('UTF8') # Ensure client encoding is UTF-8
except psycopg2.Error as e:
print(f"Failed to connect to PostgreSQL: {e}")
sys.exit(1)
# Setup and load data
try:
drop_tables(conn) # Ensure a clean slate
create_tables(conn)
load_csv_into_table(
conn, args.spatial, "Spatial",
["ID", "LAT", "LON", "OFFNAME", "LANG", "CLASS", "TYPE", "DISTRICT", "DOUBT", "LANDMARK", "COMMENTS", "OTHER"]
)
load_csv_into_table(
conn, args.linguistic, "Linguistic",
["ID", "SPATID", "DOUSPAT", "MAINID", "TOPONYM", "TOPFORMS", "DOUTOPO", "LANG", "DOULANG",
"PRONUNC", "DOUPRON", "ETYM", "ORIGIN", "COMMENTS", "OTHER"]
)
load_csv_into_table(
conn, args.temporal, "Temporal",
["ID", "LINGID", "LINGNAME", "STARTYEAR", "DOUSTART", "ENDYEAR", "DOUEND", "EVENT", "OBJID",
"OBJNAME", "COMMENTS", "OTHER", "FULLTEXT"]
)
load_csv_into_table(
conn, args.sources, "Sources",
["ID", "TYPE", "AUTHOR", "TITLE", "YEAR", "PUBLISHER", "CITATION", "COMMENTS", "PDF", "OTHER"]
)
# Run validations
errors = run_tests(conn)
if errors:
print("Validation Errors Detected:")
for e in errors:
print(" -", e)
print(f"Total errors: {len(errors)}")
sys.exit(1)
else:
print("All validations passed successfully!")
sys.exit(0)
finally:
conn.close()
if __name__ == "__main__":
main()