221 lines
7.4 KiB
Python
Executable File
221 lines
7.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: UTF-8 -*-
|
|
#Nom : : foncier_insert_table.py
|
|
#Description : Insertion des données cadastrales à la base <foncier> après de sa création.
|
|
#Copyright : 2021, CEN38
|
|
#Auteur : Colas Geier
|
|
#Version : 1.0
|
|
|
|
|
|
import pandas as pd
|
|
import geopandas as gpd
|
|
from sqlalchemy import create_engine
|
|
from geoalchemy2 import Geometry, shape
|
|
from shapely import wkb
|
|
from shapely.geometry.multipolygon import MultiPolygon
|
|
from shapely.ops import unary_union
|
|
import datetime as dt
|
|
import sys
|
|
import gc
|
|
|
|
|
|
# Parametres bdd cen_38 #bart (in)
|
|
# user_in = 'cen_admin'
|
|
# pwd_in = '#CEN38@venir'
|
|
# adr_in = '192.168.0.3'
|
|
# port_in = '5432'
|
|
# base_in = 'bd_cen38'
|
|
|
|
|
|
from pycen import con_bdcen
|
|
schema_in = 'sites'
|
|
table_in = 'c_sites_zonages'
|
|
|
|
|
|
|
|
# Parametres bdd FONCIER NEW (out)
|
|
# user_out = 'postgres'
|
|
# pwd_out = 'tutu'
|
|
# adr_out = '192.168.60.9'
|
|
# port_out = '5432'
|
|
# base_out = 'bd_cen'
|
|
from pycen import con_fon
|
|
schema_out = 'sites'
|
|
|
|
|
|
|
|
# Parametres généraux
|
|
crs = 'EPSG:2154'
|
|
chunk = None
|
|
|
|
|
|
|
|
# Définition des fonctions
|
|
start_time = dt.datetime.today()
|
|
def time_exec (init_time):
|
|
time = dt.datetime.today() - init_time
|
|
return str(time)
|
|
|
|
|
|
def drop_exist(df, con, table, schema, chunk=None):
|
|
exist = pd.read_sql_table(
|
|
table_name = table,
|
|
con = con,
|
|
schema = schema,
|
|
chunksize = chunk, )
|
|
if chunk:
|
|
exist = pd.concat(exist, ignore_index=True)
|
|
|
|
return df
|
|
|
|
|
|
def give_z(x):
|
|
if x.type == 'Polygon':
|
|
x = [x]
|
|
zlist = []
|
|
for polygon in x:
|
|
zlist.extend([c[-1] for c in polygon.exterior.coords[:-1]])
|
|
for inner_ring in polygon.interiors:
|
|
zlist.extend([c[-1] for c in inner_ring.coords[:-1]])
|
|
return zlist
|
|
#return sum(zlist)/len(zlist) #In your case to get mean. Or just return zlist[0] if they are all the same
|
|
|
|
def get_data(table,schema,engine=con_bdcen,chunk=chunk):
|
|
tmp = pd.read_sql_table(
|
|
table_name = table,
|
|
# sql = sql,
|
|
con = engine,
|
|
# geom_col = 'geom',
|
|
schema = schema,
|
|
# crs = crs,
|
|
chunksize = chunk, )
|
|
|
|
# Mise en forme des données
|
|
# start_time = dt.datetime.today()
|
|
if chunk:
|
|
df = pd.concat(tmp, ignore_index=True)
|
|
else:
|
|
df = tmp.copy()
|
|
if 'geom' in df.columns and not df[~df['geom'].isna()].empty:
|
|
df = gpd.GeoDataFrame(df)
|
|
df['geom'] = df.geom.apply(lambda x: shape.to_shape(x))
|
|
df.set_geometry('geom', inplace=True, crs=crs)
|
|
return df
|
|
|
|
|
|
# Initiation des connexions
|
|
# con_bdcen = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_in,pwd_in,adr_in,port_in,base_in), echo=False)
|
|
# con_in = engine_in.connect()
|
|
# engine_out = create_engine('postgresql+psycopg2://{0}:{1}@{2}:{3}/{4}'.format(user_out,pwd_out,adr_out,port_out,base_out), echo=False)
|
|
# con_fon = engine_out.connect()
|
|
|
|
|
|
|
|
########################
|
|
###### MAIN ######
|
|
########################
|
|
# get table milieu
|
|
tmi = get_data(table = 'd_milieux', schema='sites', engine=con_fon)
|
|
# Import data
|
|
print('INIT import data ........... %s sec'%( time_exec(start_time) ))
|
|
sql = "SELECT * FROM {0}.{1}".format(schema_in, table_in)
|
|
df = get_data(table = table_in, schema=schema_in, engine=con_bdcen)
|
|
df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZO'),'nom_site'] = df.loc[(df.code_site=='RNIP')&(df.type_zonage=='ZI'),'nom_site'].values[0]
|
|
df = df[df.date_fin.isna()]
|
|
df.sort_values('code_site',inplace=True)
|
|
|
|
# Table sites
|
|
tab_out = 'sites'
|
|
name_col_out = con_fon.dialect.get_columns(con_fon, tab_out, schema=schema_out)
|
|
name_col_out = [ sub['name'] for sub in name_col_out ]
|
|
dictio = {
|
|
'code_site': 'site_id',
|
|
'nom_site' : 'site_nom',
|
|
# 'id_mnhn' : 'mnhn_id',
|
|
# 'idfcen' : 'fcen_id',
|
|
'milieux': 'milieu_id',
|
|
'date_ajout': 'annee_creation',
|
|
'surface_ha': 'surf_m2',
|
|
}
|
|
df_sites = gpd.GeoDataFrame(df[dictio.keys()], geometry=df.geom, crs=crs)
|
|
df_sites.rename(columns= dictio, inplace=True)
|
|
df_sites.rename(columns= {'geometry': 'geom'}, inplace=True)
|
|
df_sites.sort_values(['site_id','surf_m2'],inplace=True)
|
|
df_sites.drop_duplicates(subset=['site_id','site_nom','surf_m2'],inplace=True)
|
|
df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='first'),'site_id'] + '_ZO'
|
|
df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] = df_sites.loc[df_sites.site_nom.duplicated(keep='last'),'site_id'] + '_ZI'
|
|
df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZO'),'site_nom'] + ' - ZO'
|
|
df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] = df_sites.loc[df_sites.site_id.str.endswith('ZI'),'site_nom'] + ' - ZI'
|
|
|
|
|
|
# Correspondance site / type_site
|
|
site_gere = '|'.join(df[df.gestion].code_site.unique().tolist())
|
|
site_asst = '|'.join(df[df.assist_cnv|df.assist_col].code_site.unique().tolist())
|
|
df_sites['typsite_id'] = '0'
|
|
df_sites.loc[df_sites.site_id.str.contains(site_asst), 'typsite_id'] = '2-1'
|
|
df_sites.loc[df_sites.site_id.str.contains(site_gere), 'typsite_id'] = '1-1'
|
|
|
|
|
|
# Correspondance site / milieu_lib_simpl
|
|
df_sites.loc[df_sites['milieu_id'] == 'Tourbières et marais', 'milieu_id'] = 'Milieux humides'
|
|
df_sites.loc[df_sites['milieu_id'] == 'Gîte à chiroptères', 'milieu_id'] = 'Gîtes à chiroptères'
|
|
df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes alluviaux', 'milieu_id'] = 'Milieux alluviaux'
|
|
df_sites.loc[df_sites['milieu_id'] == 'Ecosystèmes aquatiques', 'milieu_id'] = 'Milieux aquatiques'
|
|
df_sites.loc[df_sites['milieu_id'] == 'Pelouses sèches', 'milieu_id'] = 'Pelouses sèches'
|
|
df_sites.loc[df_sites['milieu_id'].isna(), 'milieu_id'] = 'N.P.'
|
|
|
|
for m in df_sites['milieu_id'].unique():
|
|
df_sites.loc[df_sites['milieu_id'] == m, 'milieu_id'] = tmi[
|
|
(tmi['milieu_lib_simpl'] == m) |
|
|
(tmi['milieu_lib'] == m )
|
|
]['milieu_id'].values[0]
|
|
|
|
# Complétion des champs pour test...
|
|
# df_sites['annee_creation'] = '2021'
|
|
df_sites.loc[~df_sites.annee_creation.isna(),'annee_creation'] = df_sites.loc[
|
|
~df_sites.annee_creation.isna()
|
|
,'annee_creation'].astype(str).str[:4]
|
|
df_sites['annee_creation'].fillna(9999,inplace=True)
|
|
|
|
if all(df_sites['geom'].has_z):
|
|
# Suppression de la dimension Z
|
|
geom_type = df_sites['geom'].geom_type
|
|
df_sites['geom'] = [wkb.loads(wkb.dumps(geom, output_dimension=2)) for geom in df_sites['geom']]
|
|
df_sites.set_geometry('geom', drop=False, inplace=True, crs=crs)
|
|
# df_sites.rename(columns= {'geometry': 'geom'}, inplace=True)
|
|
|
|
df_sites.to_postgis(
|
|
name = tab_out,
|
|
con = con_fon,
|
|
schema = schema_out,
|
|
index = False,
|
|
if_exists = 'append',
|
|
chunksize = chunk,
|
|
geom_col = 'geom',
|
|
)
|
|
|
|
|
|
|
|
# Table d_typsite
|
|
|
|
# Table d_milieux
|
|
# print('INIT construction data for d_milieux table ...... %s sec'%( time_exec(start_time) ))
|
|
# df = pd.DataFrame(DF['tmilieux'])
|
|
# df['milieu_lib_simpl'] = None
|
|
# df['milieu_descrip'] = None
|
|
# df.drop_duplicates(inplace=True)
|
|
# df.dropna(inplace=True, how='all',axis=0)
|
|
# df = drop_exist(df, con=con_fon, table='d_milieux', schema=schema_out)
|
|
|
|
|
|
# sql = "UPDATE {0}.{1} SET tmilieux = 'Ecosystèmes alluviaux' WHERE tmilieux = 'Ecosystemes alluviaux'".format(schema_in, table_in)
|
|
# print(sql)
|
|
# con_in.execute(sql)
|
|
|
|
# for index, d in df.iterrows():
|
|
# tmp = df[df.id != d.id].copy()
|
|
# tmp['intersect'] = tmp.geometry.intersects(d.geom)
|
|
# if not tmp[tmp.intersect].empty:
|
|
# print(tmp[tmp.intersect])
|
|
# else :
|
|
# print('No insterction for {}'.format(d.id)) |