#!/usr/bin/env python3 # -*- coding: UTF-8 -*- #Nom : : foncier_insert_table.py #Description : Insertion des données cadastrales à la base après de sa création. #Copyright : 2021, CEN38 #Auteur : Colas Geier #Version : 1.0 import pandas as pd import geopandas as gpd from sqlalchemy import create_engine from geoalchemy2 import Geometry, shape from shapely import wkb from shapely.geometry.multipolygon import MultiPolygon from shapely.ops import unary_union import datetime as dt import sys import gc # Parametres bdd cen_38 #bart (in) # user_in = 'cen_admin' # pwd_in = '#CEN38@venir' # adr_in = '192.168.0.3' # port_in = '5432' # base_in = 'bd_cen38' from pycen import con_bdcen schema_in = 'sites' table_in = 'c_sites_zonages' # Parametres bdd FONCIER NEW (out) # user_out = 'postgres' # pwd_out = 'tutu' # adr_out = '192.168.60.9' # port_out = '5432' # base_out = 'bd_cen' from pycen import con_fon schema_out = 'sites' # Parametres généraux crs = 'EPSG:2154' chunk = None # Définition des fonctions start_time = dt.datetime.today() def time_exec (init_time): time = dt.datetime.today() - init_time return str(time) def drop_exist(df, con, table, schema, chunk=None): exist = pd.read_sql_table( table_name = table, con = con, schema = schema, chunksize = chunk, ) if chunk: exist = pd.concat(exist, ignore_index=True) return df def give_z(x): if x.type == 'Polygon': x = [x] zlist = [] for polygon in x: zlist.extend([c[-1] for c in polygon.exterior.coords[:-1]]) for inner_ring in polygon.interiors: zlist.extend([c[-1] for c in inner_ring.coords[:-1]]) return zlist #return sum(zlist)/len(zlist) #In your case to get mean. Or just return zlist[0] if they are all the same def get_data(table,schema,engine=con_bdcen,chunk=chunk): tmp = pd.read_sql_table( table_name = table, # sql = sql, con = engine, # geom_col = 'geom', schema = schema, # crs = crs, chunksize = chunk, ) # Mise en forme des données # start_time = dt.datetime.today() if chunk: df = pd.concat(tmp, ignore_index=True) else: df = tmp.copy() if 'geom' in df.columns and not df[~df['geom'].isna()].empty: df = gpd.GeoDataFrame(df) df['geom'] = df.geom.apply(lambda x: shape.to_shape(x)) df.set_geometry('geom', inplace=True, crs=crs) return df # Initiation des connexions # con_bdcen = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_in,pwd_in,adr_in,port_in,base_in), echo=False) # con_in = engine_in.connect() # engine_out = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_out,pwd_out,adr_out,port_out,base_out), echo=False) # con_fon = engine_out.connect() ######################## ###### MAIN ###### ######################## # get table milieu tmi = get_data(table = 'd_milieux', schema='sites', engine=con_fon) # Import data print('INIT import data ........... %s sec'%( time_exec(start_time) )) sql = "SELECT * FROM {0}.{1}".format(schema_in, table_in) df = get_data(table = table_in, schema=schema_in, engine=con_bdcen) df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZO'),'nom_site'] = df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZI'),'nom_site'].values[0] df = df[df.date_fin.isna()] df.sort_values('code_site',inplace=True) # Table sites tab_out = 'sites' name_col_out = con_fon.dialect.get_columns(con_fon, tab_out, schema=schema_out) name_col_out = [ sub['name'] for sub in name_col_out ] dictio = { 'code_site': 'site_id', 'nom_site' : 'site_nom', # 'id_mnhn' : 'mnhn_id', # 'idfcen' : 'fcen_id', 'milieux': 'milieu_id', 'date_ajout': 'annee_creation', 'surface_ha': 'surf_m2', } df_sites = gpd.GeoDataFrame(df[dictio.keys()], geometry=df.geom, crs=crs) df_sites.rename(columns= dictio, inplace=True) df_sites.rename(columns= {'geometry': 'geom'}, inplace=True) df_sites.sort_values(['site_id','surf_m2'],inplace=True) df_sites.drop_duplicates(subset=['site_id','site_nom','surf_m2'],inplace=True) df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] + '_ZO' df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] + '_ZI' df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] + ' - ZO' df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] + ' - ZI' # Correspondance site / type_site site_gere = '|'.join(df[df.gestion].code_site.unique().tolist()) site_asst = '|'.join(df[df.assist_cnv|df.assist_col].code_site.unique().tolist()) df_sites['typsite_id'] = '0' df_sites.loc[df_sites.site_id.str.contains(site_asst), 'typsite_id'] = '2-1' df_sites.loc[df_sites.site_id.str.contains(site_gere), 'typsite_id'] = '1-1' # Correspondance site / milieu_lib_simpl df_sites.loc[df_sites['milieu_id'] == 'Tourbières et marais', 'milieu_id'] = 'Milieux humides' df_sites.loc[df_sites['milieu_id'] == 'Gîte à chiroptères', 'milieu_id'] = 'Gîtes à chiroptères' df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes alluviaux', 'milieu_id'] = 'Milieux alluviaux' df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes aquatiques', 'milieu_id'] = 'Milieux aquatiques' df_sites.loc[df_sites['milieu_id'] == 'Pelouses sèches', 'milieu_id'] = 'Pelouses sèches' df_sites.loc[df_sites['milieu_id'].isna(), 'milieu_id'] = 'N.P.' for m in df_sites['milieu_id'].unique(): df_sites.loc[df_sites['milieu_id'] == m, 'milieu_id'] = tmi[ (tmi['milieu_lib_simpl'] == m) | (tmi['milieu_lib'] == m ) ]['milieu_id'].values[0] # Complétion des champs pour test... # df_sites['annee_creation'] = '2021' df_sites.loc[~df_sites.annee_creation.isna(),'annee_creation'] = df_sites.loc[ ~df_sites.annee_creation.isna() ,'annee_creation'].astype(str).str[:4] df_sites['annee_creation'].fillna(9999,inplace=True) if all(df_sites['geom'].has_z): # Suppression de la dimension Z geom_type = df_sites['geom'].geom_type df_sites['geom'] = [wkb.loads(wkb.dumps(geom, output_dimension=2)) for geom in df_sites['geom']] df_sites.set_geometry('geom', drop=False, inplace=True, crs=crs) # df_sites.rename(columns= {'geometry': 'geom'}, inplace=True) df_sites.to_postgis( name = tab_out, con = con_fon, schema = schema_out, index = False, if_exists = 'append', chunksize = chunk, geom_col = 'geom', ) # Table d_typsite # Table d_milieux # print('INIT construction data for d_milieux table ...... %s sec'%( time_exec(start_time) )) # df = pd.DataFrame(DF['tmilieux']) # df['milieu_lib_simpl'] = None # df['milieu_descrip'] = None # df.drop_duplicates(inplace=True) # df.dropna(inplace=True, how='all',axis=0) # df = drop_exist(df, con=con_fon, table='d_milieux', schema=schema_out) # sql = "UPDATE {0}.{1} SET tmilieux = 'Ecosystèmes alluviaux' WHERE tmilieux = 'Ecosystemes alluviaux'".format(schema_in, table_in) # print(sql) # con_in.execute(sql) # for index, d in df.iterrows(): # tmp = df[df.id != d.id].copy() # tmp['intersect'] = tmp.geometry.intersects(d.geom) # if not tmp[tmp.intersect].empty: # print(tmp[tmp.intersect]) # else : # print('No insterction for {}'.format(d.id))