#!/usr/bin/env python3 # -*- coding: UTF-8 -*- #Nom : : taxref.py #Description : #Copyright : 2021, CEN38 #Auteur : Colas Geier #Version : 1.0 import pandas as pd import numpy as np from pycen import bdd from sqlalchemy import create_engine IMPORT = False # Parametres bdd user = 'postgres' pwd = 'tutu' adr = '192.168.60.10' base = 'bd_cen' schema = 'ref_habitats' con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False) # Connexion bdd bd = bdd.CEN( user = user, pwd = pwd, adr = adr, base = base # schema = schema ) # Parametres fichier d'entré PATHIN = '~/Documents/5_BDD/HABITAT/' CORINE = 'TYPO_CORINE_BIOTOPES_50.xlsx' EUNIS = 'TYPO_EUNIS_50.xlsx' CRSP = 'CRSP_CB_EUNIS_50.xlsx' # Name table tab_cb ='corine_biotope' tab_eunis = 'eunis' tab_param_crsp ='param_crsp_cb_eunis' tab_rCbEunis = 'r_cb_eunis' # Order columns order_cb_col = [ 'CD_HAB','CD_HAB_SUP','PATH_CD_HAB','CD_TYPO','NIVEAU', 'LB_NIVEAU','LB_HAB_FR','LB_HAB_FR_COMPLET', 'LB_HAB_EN','FRANCE','FG_VALIDITE','LB_AUTEUR', 'DESCRIPTIF_CB97_FR','LB_DESCRIPTION','CRSP_PHYTO_CB91'] rename_cb_col = {'DESCRIPTIF_CB97_FR': 'DESCRIPTIF_CB_FR', 'CRSP_PHYTO_CB91': 'CRSP_PHYTO_CB','LB_HAB_FR_COMPLET': 'LB_HAB_FR_COMP'} order_col_eunis = [ 'CD_HAB','CD_HAB_SUP','PATH_CD_HAB','CD_TYPO','NIVEAU', 'LB_NIVEAU','LB_HAB_FR','LB_HAB_FR_COMPLET', 'LB_HAB_EN','FRANCE','FG_VALIDITE','LB_AUTEUR', 'DESCRIPTIF_FR_2012','LB_DESCRIPTION','NOTE','LIEN_SITE'] rename_eunis_col = {'DESCRIPTIF_FR_2012': 'DESCRIPTIF_FR','LB_HAB_FR_COMPLET': 'LB_HAB_FR_COMP'} order_col_RcbEunis =['CODE_CB','RELATION','DEGRE_INFO','CODE_EUNIS'] rename_RcbEunis_col = {'CODE_CB': 'ID_CB','CODE_EUNIS': 'ID_EUNIS'} # INSERT INTO ref_habitats.corine_biotope # ("LB_CODE", "CD_HAB", "CD_HAB_SUP", "PATH_CD_HAB", "CD_TYPO", "NIVEAU", "LB_NIVEAU", "LB_HAB_FR", "LB_HAB_FR_COMPLET", "LB_HAB_EN", "FRANCE", "FG_VALIDITE", "LB_AUTEUR", "DESCRIPTIF_CB_FR", "LB_DESCRIPTION", "CRSP_PHYTO_CB") # VALUES # (%(LB_CODE)s, %(CD_HAB)s, %(CD_HAB_SUP)s, %(PATH_CD_HAB)s, %(CD_TYPO)s, %(NIVEAU)s, %(LB_NIVEAU)s, %(LB_HAB_FR)s, %(LB_HAB_FR_COMPLET)s, %(LB_HAB_EN)s, %(FRANCE)s, %(FG_VALIDITE)s, %(LB_AUTEUR)s, %(DESCRIPTIF_CB_FR)s, %(LB_DESCRIPTION)s, %(CRSP_PHYTO_CB)s) ############### ### Import data measurer = np.vectorize(len) # Corine Biotope corine_meta = pd.read_excel(PATHIN+CORINE, sheet_name = 0) corine_desc = pd.read_excel(PATHIN+CORINE, sheet_name = 1) corine_df = pd.read_excel(PATHIN+CORINE, sheet_name = 2, index_col = 'LB_CODE') corine_df = corine_df[order_cb_col] corine_df.rename(columns = rename_cb_col, inplace=True) corine_df.rename(str.lower, axis='columns', inplace=True) corine_df.index.name = 'id' # dict_cb = {} # for col in corine_df.columns: # if corine_df[col].dtype == int: # len_col = len(corine_df[col].max()) # print(corine_df[col].dtype == int) if IMPORT: corine_df.to_sql(name=tab_cb, con=con, schema=schema, index=True, if_exists='append') print('{0} importé !'.format(CORINE)) max_len_col_cb = dict(zip(corine_df, measurer(corine_df.values.astype(str)).max(axis=0))) # Eunis eunis_meta = pd.read_excel(PATHIN+EUNIS, sheet_name = 0) eunis_desc = pd.read_excel(PATHIN+EUNIS, sheet_name = 1) eunis_df = pd.read_excel(PATHIN+EUNIS, sheet_name = 2, index_col = 'LB_CODE') eunis_df = eunis_df[order_col_eunis] eunis_df = eunis_df.rename(columns = rename_eunis_col) eunis_df.rename(str.lower, axis='columns', inplace=True) eunis_df.index.name = 'id' if IMPORT: eunis_df.to_sql(name=tab_eunis, con=con, schema=schema, index=True, if_exists='append') print('{0} importé !'.format(EUNIS)) max_len_col_cb = dict(zip(eunis_df, measurer(eunis_df.values.astype(str)).max(axis=0))) print(max_len_col_cb) # CRSP_CB_EUNIS crsp_meta = pd.read_excel(PATHIN+CRSP, sheet_name = 0) crsp_desc = pd.read_excel(PATHIN+CRSP, sheet_name = 1) base_crsp_df = pd.read_excel(PATHIN+CRSP, sheet_name = 2) crsp_df = pd.read_excel(PATHIN+CRSP, sheet_name = 3) crsp_df.sort_values(['CODE_CB', 'CODE_EUNIS'], inplace = True) crsp_df.reset_index(drop = True, inplace = True) crsp_df = crsp_df[order_col_RcbEunis] crsp_df = crsp_df.rename(columns = rename_RcbEunis_col) crsp_df.rename(str.lower, axis='columns', inplace=True) crsp_df.index.name = 'id' if IMPORT: crsp_df.to_sql(name=tab_rCbEunis, con=con, schema=schema, index=False, if_exists='append') print('{0} importé !'.format(CRSP)) max_len_col_cb = dict(zip(crsp_df, measurer(crsp_df.values.astype(str)).max(axis=0))) # Param CRSP param_crsp_cb_eunis = bd.get_table( schema = schema, table = 'param_crsp_cb_eunis') param_crsp_cb_eunis = param_crsp_cb_eunis[param_crsp_cb_eunis.type == 'degre'] param_crsp_cb_eunis.drop(columns=['type'], inplace = True) # Pour les df à importer list_df = [corine_df, eunis_df, crsp_df] # for df in list_df: # # df.columns = df.columns.str.lower().tolist() # # Suppression des colonnes vides # for col in df.columns: # if df[~df[col].isnull()].empty : # df.drop(columns=col, inplace=True) base_crsp_df.sort_values(['CD_HAB_ENTRE','LB_CODE_SORTIE'], inplace=True) base_crsp_df['tmp'], base_crsp_df['value'] = base_crsp_df.LB_REMARQUES.str.split('Correspondance ', 1).str base_crsp_df.drop(columns=['LB_REMARQUES','tmp'], inplace=True) base_crsp_df['DEGRE'], base_crsp_df['tmp'] = base_crsp_df.value.str.split('.', 1).str base_crsp_df.drop(columns=['value','tmp'], inplace=True) param_crsp_cb_eunis.set_index('description', inplace = True) param_crsp_cb_eunis = param_crsp_cb_eunis.to_dict() base_crsp_df.DEGRE = base_crsp_df.DEGRE.map(param_crsp_cb_eunis['id_param']) for row in param_crsp_cb_eunis : print(row) crsp_df.degre.replace(to_replace=r'^ba.$', value='new', regex=True) # pd.merge(crsp_df,param_crsp_cb_eunis, left_on='degre', right_on='description', how="left") crsp_df.index.name = 'id' if IMPORT: crsp_df.to_sql(name='crsp_corine_eunis', con=con, schema=schema, index=True, index_label='id' ) measurer = np.vectorize(len) max_len_col_cb = dict(zip(corine_df, measurer(corine_df.values.astype(str)).max(axis=0)))