166 lines
6.0 KiB
Python
166 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: UTF-8 -*-
|
|
#Nom : : taxref.py
|
|
#Description :
|
|
#Copyright : 2021, CEN38
|
|
#Auteur : Colas Geier
|
|
#Version : 1.0
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from pycen import bdd
|
|
from sqlalchemy import create_engine
|
|
|
|
IMPORT = False
|
|
# Parametres bdd
|
|
user = 'postgres'
|
|
pwd = 'tutu'
|
|
adr = '192.168.60.10'
|
|
base = 'bd_cen'
|
|
schema = 'ref_habitats'
|
|
|
|
con = create_engine('postgresql+psycopg2://{0}:{1}@{2}/{3}'.format(user,pwd,adr,base), echo=False)
|
|
# Connexion bdd
|
|
bd = bdd.CEN(
|
|
user = user,
|
|
pwd = pwd,
|
|
adr = adr,
|
|
base = base
|
|
# schema = schema
|
|
)
|
|
|
|
# Parametres fichier d'entré
|
|
PATHIN = '~/Documents/5_BDD/HABITAT/'
|
|
CORINE = 'TYPO_CORINE_BIOTOPES_50.xlsx'
|
|
EUNIS = 'TYPO_EUNIS_50.xlsx'
|
|
CRSP = 'CRSP_CB_EUNIS_50.xlsx'
|
|
|
|
# Name table
|
|
tab_cb ='corine_biotope'
|
|
tab_eunis = 'eunis'
|
|
tab_param_crsp ='param_crsp_cb_eunis'
|
|
tab_rCbEunis = 'r_cb_eunis'
|
|
|
|
|
|
# Order columns
|
|
order_cb_col = [
|
|
'CD_HAB','CD_HAB_SUP','PATH_CD_HAB','CD_TYPO','NIVEAU',
|
|
'LB_NIVEAU','LB_HAB_FR','LB_HAB_FR_COMPLET',
|
|
'LB_HAB_EN','FRANCE','FG_VALIDITE','LB_AUTEUR',
|
|
'DESCRIPTIF_CB97_FR','LB_DESCRIPTION','CRSP_PHYTO_CB91']
|
|
rename_cb_col = {'DESCRIPTIF_CB97_FR': 'DESCRIPTIF_CB_FR', 'CRSP_PHYTO_CB91': 'CRSP_PHYTO_CB','LB_HAB_FR_COMPLET': 'LB_HAB_FR_COMP'}
|
|
|
|
order_col_eunis = [
|
|
'CD_HAB','CD_HAB_SUP','PATH_CD_HAB','CD_TYPO','NIVEAU',
|
|
'LB_NIVEAU','LB_HAB_FR','LB_HAB_FR_COMPLET',
|
|
'LB_HAB_EN','FRANCE','FG_VALIDITE','LB_AUTEUR',
|
|
'DESCRIPTIF_FR_2012','LB_DESCRIPTION','NOTE','LIEN_SITE']
|
|
rename_eunis_col = {'DESCRIPTIF_FR_2012': 'DESCRIPTIF_FR','LB_HAB_FR_COMPLET': 'LB_HAB_FR_COMP'}
|
|
|
|
order_col_RcbEunis =['CODE_CB','RELATION','DEGRE_INFO','CODE_EUNIS']
|
|
rename_RcbEunis_col = {'CODE_CB': 'ID_CB','CODE_EUNIS': 'ID_EUNIS'}
|
|
|
|
|
|
|
|
# INSERT INTO ref_habitats.corine_biotope
|
|
# ("LB_CODE", "CD_HAB", "CD_HAB_SUP", "PATH_CD_HAB", "CD_TYPO", "NIVEAU", "LB_NIVEAU", "LB_HAB_FR", "LB_HAB_FR_COMPLET", "LB_HAB_EN", "FRANCE", "FG_VALIDITE", "LB_AUTEUR", "DESCRIPTIF_CB_FR", "LB_DESCRIPTION", "CRSP_PHYTO_CB")
|
|
# VALUES
|
|
# (%(LB_CODE)s, %(CD_HAB)s, %(CD_HAB_SUP)s, %(PATH_CD_HAB)s, %(CD_TYPO)s, %(NIVEAU)s, %(LB_NIVEAU)s, %(LB_HAB_FR)s, %(LB_HAB_FR_COMPLET)s, %(LB_HAB_EN)s, %(FRANCE)s, %(FG_VALIDITE)s, %(LB_AUTEUR)s, %(DESCRIPTIF_CB_FR)s, %(LB_DESCRIPTION)s, %(CRSP_PHYTO_CB)s)
|
|
###############
|
|
### Import data
|
|
measurer = np.vectorize(len)
|
|
|
|
# Corine Biotope
|
|
corine_meta = pd.read_excel(PATHIN+CORINE, sheet_name = 0)
|
|
corine_desc = pd.read_excel(PATHIN+CORINE, sheet_name = 1)
|
|
corine_df = pd.read_excel(PATHIN+CORINE, sheet_name = 2, index_col = 'LB_CODE')
|
|
corine_df = corine_df[order_cb_col]
|
|
corine_df.rename(columns = rename_cb_col, inplace=True)
|
|
corine_df.rename(str.lower, axis='columns', inplace=True)
|
|
corine_df.index.name = 'id'
|
|
# dict_cb = {}
|
|
# for col in corine_df.columns:
|
|
# if corine_df[col].dtype == int:
|
|
# len_col = len(corine_df[col].max())
|
|
# print(corine_df[col].dtype == int)
|
|
if IMPORT:
|
|
corine_df.to_sql(name=tab_cb, con=con, schema=schema, index=True, if_exists='append')
|
|
print('{0} importé !'.format(CORINE))
|
|
max_len_col_cb = dict(zip(corine_df, measurer(corine_df.values.astype(str)).max(axis=0)))
|
|
|
|
# Eunis
|
|
eunis_meta = pd.read_excel(PATHIN+EUNIS, sheet_name = 0)
|
|
eunis_desc = pd.read_excel(PATHIN+EUNIS, sheet_name = 1)
|
|
eunis_df = pd.read_excel(PATHIN+EUNIS, sheet_name = 2, index_col = 'LB_CODE')
|
|
eunis_df = eunis_df[order_col_eunis]
|
|
eunis_df = eunis_df.rename(columns = rename_eunis_col)
|
|
eunis_df.rename(str.lower, axis='columns', inplace=True)
|
|
eunis_df.index.name = 'id'
|
|
if IMPORT:
|
|
eunis_df.to_sql(name=tab_eunis, con=con, schema=schema, index=True, if_exists='append')
|
|
print('{0} importé !'.format(EUNIS))
|
|
max_len_col_cb = dict(zip(eunis_df, measurer(eunis_df.values.astype(str)).max(axis=0)))
|
|
print(max_len_col_cb)
|
|
|
|
# CRSP_CB_EUNIS
|
|
crsp_meta = pd.read_excel(PATHIN+CRSP, sheet_name = 0)
|
|
crsp_desc = pd.read_excel(PATHIN+CRSP, sheet_name = 1)
|
|
base_crsp_df = pd.read_excel(PATHIN+CRSP, sheet_name = 2)
|
|
crsp_df = pd.read_excel(PATHIN+CRSP, sheet_name = 3)
|
|
crsp_df.sort_values(['CODE_CB', 'CODE_EUNIS'], inplace = True)
|
|
crsp_df.reset_index(drop = True, inplace = True)
|
|
crsp_df = crsp_df[order_col_RcbEunis]
|
|
crsp_df = crsp_df.rename(columns = rename_RcbEunis_col)
|
|
crsp_df.rename(str.lower, axis='columns', inplace=True)
|
|
crsp_df.index.name = 'id'
|
|
if IMPORT:
|
|
crsp_df.to_sql(name=tab_rCbEunis, con=con, schema=schema, index=False, if_exists='append')
|
|
print('{0} importé !'.format(CRSP))
|
|
max_len_col_cb = dict(zip(crsp_df, measurer(crsp_df.values.astype(str)).max(axis=0)))
|
|
|
|
# Param CRSP
|
|
param_crsp_cb_eunis = bd.get_table(
|
|
schema = schema,
|
|
table = 'param_crsp_cb_eunis')
|
|
param_crsp_cb_eunis = param_crsp_cb_eunis[param_crsp_cb_eunis.type == 'degre']
|
|
param_crsp_cb_eunis.drop(columns=['type'], inplace = True)
|
|
|
|
|
|
# Pour les df à importer
|
|
list_df = [corine_df, eunis_df, crsp_df]
|
|
# for df in list_df:
|
|
# # df.columns = df.columns.str.lower().tolist()
|
|
|
|
# # Suppression des colonnes vides
|
|
# for col in df.columns:
|
|
# if df[~df[col].isnull()].empty :
|
|
# df.drop(columns=col, inplace=True)
|
|
|
|
|
|
base_crsp_df.sort_values(['CD_HAB_ENTRE','LB_CODE_SORTIE'], inplace=True)
|
|
base_crsp_df['tmp'], base_crsp_df['value'] = base_crsp_df.LB_REMARQUES.str.split('Correspondance ', 1).str
|
|
base_crsp_df.drop(columns=['LB_REMARQUES','tmp'], inplace=True)
|
|
base_crsp_df['DEGRE'], base_crsp_df['tmp'] = base_crsp_df.value.str.split('.', 1).str
|
|
base_crsp_df.drop(columns=['value','tmp'], inplace=True)
|
|
|
|
param_crsp_cb_eunis.set_index('description', inplace = True)
|
|
param_crsp_cb_eunis = param_crsp_cb_eunis.to_dict()
|
|
base_crsp_df.DEGRE = base_crsp_df.DEGRE.map(param_crsp_cb_eunis['id_param'])
|
|
|
|
|
|
|
|
for row in param_crsp_cb_eunis :
|
|
print(row)
|
|
|
|
crsp_df.degre.replace(to_replace=r'^ba.$', value='new', regex=True)
|
|
# pd.merge(crsp_df,param_crsp_cb_eunis, left_on='degre', right_on='description', how="left")
|
|
|
|
crsp_df.index.name = 'id'
|
|
if IMPORT:
|
|
crsp_df.to_sql(name='crsp_corine_eunis', con=con, schema=schema, index=True, index_label='id' )
|
|
|
|
|
|
measurer = np.vectorize(len)
|
|
max_len_col_cb = dict(zip(corine_df, measurer(corine_df.values.astype(str)).max(axis=0)))
|
|
|