Python_scripts/0_FONCIER/foncier_insert_cadastre_V3.py

540 lines
17 KiB
Python

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
from pycen import con_fon
from sqlalchemy.sql import text
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd
# user = 'cgeier'
# pwd = 'adm1n*bdCen'
# adr = '91.134.194.221'
# base = 'cadastre'
user = 'postgres'
pwd = 'foncier_test1'
adr = '172.17.0.2'
base = 'postgres'
url = URL.create('postgresql+psycopg2',
username=user,
password=pwd,
host=adr,
database=base,
)
con_cad = create_engine(url)
# from pycen import con_cad
# sql = '''SELECT * FROM "38_202207".suf WHERE parcelle = '3800740000B0705';'''
# df = pd.read_sql_query(sql,con_cad)
# df.drop_duplicates(inplace=True)
# df.annee = '2020'
# df.to_sql('suf',con_cad,"38_202207",if_exists='append',index=False)
# fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg'
# add = gpd.read_file(fadd)
def recup_cols_table(table,con,schema='38_202207'):
lst_cols = con.dialect.get_columns(con,table,schema)
return [x['name'] for x in lst_cols]
def __get_pkey__(engine,table_name,schema):
pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema)
return pk
def _where_parcelle(sql0,schema,list_parid):
if list_parid is not None:
chunk = None
if isinstance(list_parid,str):
list_parid = [list_parid]
LIST_ID = str(tuple(list_parid)).replace(',)',')')
sql1 = '''
WHERE p.parcelle IN {list_id}
;'''.format(sch=schema,list_id=LIST_ID)
else :
chunk = 200000
sql1 = ';'
sql = sql0 + sql1
df = pd.read_sql_query(text(sql),con=con_cad,chunksize=chunk)
# if chunk is not None:
# for d in df:
# print(d.shape[0])
# d.drop_duplicates(inplace=True)
# print(d.drop_duplicates().shape[0])
# else :
# df.drop_duplicates(inplace=True)
return df
def _get_chunk(df1,df2):
# cptp1 = pd.DataFrame()
list_DF1 = []
for d1 in df1:
list_DF1.append(d1)
DF1 = pd.concat(list_DF1)
# cptp2 = pd.DataFrame()
list_DF2 = []
for d2 in df2:
list_DF2.append(d2)
DF2 = pd.concat(list_DF2)
return pd.concat([DF1,DF2]).drop_duplicates()
def __get_parcelles__(sql0,list_parid):
if list_parid is not None:
chunk = None
if isinstance(list_parid,str):
list_parid = [list_parid]
LIST_ID = str(tuple(list_parid)).replace(',)',')')
sql1 = '''
WHERE t1.geo_parcelle IN {list_id}
;'''.format(list_id=LIST_ID)
else :
chunk = None
sql1 = ';'
sql = sql0 + sql1
# print(text(sql))
return gpd.read_postgis(sql=sql,con=con_cad,chunksize=chunk)
def _get_parcelles1(schema='38_202207',list_parid=None):
sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle)
t1.geo_parcelle,
case when t1.geom is null then t2.geom
else t1.geom
end geom,
substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 12) par_id,
substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3) codcom,
substring(p.parcelle from 1 for 2) ccodep,
substring(p.parcelle from 4 for 3) ccocom,
substring(p.parcelle from 7 for 3) ccopre,
substring(p.parcelle from 10 for 2) ccosec,
substring(p.parcelle from 12 for 4) dnupla,
p.annee annee_pci,
t1.update_dat,
p.dparpi,
p.dcntpa,
p.ccocomm,
p.ccoprem,
p.ccosecm,
p.dnuplam,
p.ccovoi,
p.ccoriv,
p.type_filiation "type",
substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id,
(SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) gtoper,
(SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm,
(SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm,
(SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ddenom
FROM "{sch}".{t1} p
LEFT JOIN "{sch}".parcelle_info t2 ON t2.geo_parcelle = p.parcelle
LEFT JOIN "{sch}"."geo_parcelle" t1 ON t1.geo_parcelle = p.parcelle
LEFT JOIN "{sch}".proprietaire t3 USING (ccodep,ccocom,dnupro)
'''.format(
sch=schema,
t1='parcelle')
return __get_parcelles__(sql0,list_parid)
def _get_parcelles2(schema='38_202207',list_parid=None):
sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle)
t1.geo_parcelle,
substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id,
substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3) codcom,
substring(t1.geo_parcelle from 1 for 2) ccodep,
substring(t1.geo_parcelle from 4 for 3) ccocom,
substring(t1.geo_parcelle from 7 for 3) ccopre,
substring(t1.geo_parcelle from 10 for 2) ccosec,
substring(t1.geo_parcelle from 12 for 4) dnupla,
t1.annee annee_pci,
t1.update_dat,
p.dparpi,
p.dcntpa,
p.ccocomm,
p.ccoprem,
p.ccosecm,
p.dnuplam,
p.ccovoi,
p.ccoriv,
p.type_filiation "type",
t1.geom,
substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id,
(SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) gtoper,
(SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm,
(SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm,
(SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ddenom
FROM "{sch}"."{t1}" t1
LEFT JOIN ("{sch}".parcelle_info t2
LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal))
USING (geo_parcelle)
LEFT JOIN "{sch}".parcelle p ON t1.geo_parcelle = p.parcelle
'''.format(
sch=schema,
t1='geo_parcelle')
return __get_parcelles__(sql0,list_parid)
def _get_parcelles(schema='38_202207',list_parid=None):
p1 = _get_parcelles1(schema,list_parid)
print('parcelles from parcelle .......... OK')
p2 = _get_parcelles2(schema,list_parid)
print('parcelles from geo_parcelle ...... OK')
return _get_chunk(p1,p2)
def _get_voie2(schema='38_202207',list_parid=None):
sql0 = '''
SELECT
t1.ccodep||t1.ccocom||p.ccovoi vl_id,
t1.libvoi libelle
FROM "{sch}"."{t1}" t1
JOIN "{sch}".parcelle p USING (voie)
'''.format(
sch=schema,
t1='voie')
return _where_parcelle(sql0,schema,list_parid)
def _get_lots_natcult2(schema='38_202207',list_parid=None):
sql0 = '''
SELECT
CASE WHEN TRIM(dnulot) = ''
THEN substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||'0000000'
ELSE substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||TRIM(dnulot)
END lot_id,
parcelle,
substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12) par_id,
dsgrpf,
cnatsp,
dclssf,
ccosub,
dcntsf
FROM "{sch}"."{t1}"
JOIN "{sch}".parcelle p USING(parcelle)
'''.format(
sch=schema,
t1='suf')
return _where_parcelle(sql0,schema,list_parid)
def _get_lots2(schema='38_202207',list_parid=None):
sql0 = '''
SELECT DISTINCT
CASE WHEN TRIM(t.dnulot) = ''
THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000'
ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot)
END lot_id,
t.parcelle,
substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12) par_id,
CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL
THEN TRIM(l.dnulot)
ELSE TRIM(t.dnulot)
END dnulot,
CASE WHEN l.dcntlo IS NULL
THEN 0
ELSE l.dcntlo
END dcntlo,
geo_sub.geom
FROM "{sch}"."{t1}" t
JOIN "{sch}".parcelle p USING (parcelle)
LEFT JOIN "{sch}".lots l USING (parcelle)
LEFT JOIN "{sch}".lotslocaux ll USING (lots)
LEFT JOIN "{sch}".geo_subdfisc_parcelle geo_sub_p ON p.parcelle = geo_sub_p.geo_parcelle
LEFT JOIN "{sch}".geo_subdfisc geo_sub USING (geo_subdfisc)
'''.format(
sch=schema,
t1='suf')
return _where_parcelle(sql0,schema,list_parid)
def _get_cadastre2(schema='38_202207',list_parid=None):
sql0 = '''
SELECT DISTINCT
CASE WHEN TRIM(t.dnulot) = ''
THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000'
ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot)
END lot_id,
substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro
FROM "{sch}"."{t1}" t
JOIN "{sch}".parcelle p
LEFT JOIN "{sch}".lots l USING (parcelle)
USING (parcelle)
'''.format(
sch=schema,
t1='suf')
return _where_parcelle(sql0,schema,list_parid)
def _get_cptprop1(schema='38_202207',list_parid=None):
sql0 = '''
WITH lot as (
SELECT ccodep,ccocom,dnuprol dnupro
FROM "{sch}".lots
)
SELECT DISTINCT
t.ccodep||t.ccocom||t.dnupro dnupro,
t."annee" annee_matrice
FROM "{sch}"."{t1}" t
--JOIN "{sch}".suf s USING (ccodep,ccocom,dnupro)
--JOIN lot l USING (ccodep,ccocom,dnupro)
JOIN "{sch}".parcelle p USING (ccodep,ccocom,dnupro)
'''.format(
sch=schema,
t1='proprietaire')
return _where_parcelle(sql0,schema,list_parid)
def _get_cptprop2(schema='38_202207',list_parid=None):
sql0 = '''
SELECT DISTINCT
substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro,
t.annee annee_matrice
FROM "{sch}"."{t1}" t
JOIN "{sch}".parcelle p
LEFT JOIN "{sch}".lots l USING (parcelle)
USING (parcelle)
'''.format(
sch=schema,
t1='suf')
return _where_parcelle(sql0,schema,list_parid)
def _get_cptprop(schema='38_202207',list_parid=None):
cptprop1 = _get_cptprop1(schema=schema, list_parid=list_parid)
print('cptprop from proprietaire ... OK')
cptprop2 = _get_cptprop2(schema=schema, list_parid=list_parid)
print('cptprop from suf ............ OK')
return _get_chunk(cptprop1,cptprop2)
def _get_r_prop_cptprop1(schema='38_202207',list_parid=None):
sql0 = '''
SELECT DISTINCT
substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3)||t.dnupro dnupro,
substring(p.parcelle from 1 for 2)||t.dnuper dnuper,
ccodro,
ccodem
FROM "{sch}"."{t1}" t
JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro)
'''.format(
sch=schema,
t1='proprietaire')
return _where_parcelle(sql0,schema,list_parid)
def _get_proprios1(schema='38_202207',list_parid=None):
sql0 = '''
SELECT DISTINCT
t.ccodep||t.dnuper dnuper,
TRIM(ccoqua)::int ccoqua,
TRIM(ddenom) ddenom,
TRIM(dqualp) dqualp,
TRIM(dnomlp) dnomlp,
TRIM(dnomus) dnomus,
TRIM(dprnus) dprnus,
TRIM(dprnlp) dprnlp,
TRIM(epxnee) epxnee,
TRIM(dnomcp) dnomcp,
TRIM(dprncp) dprncp,
TRIM(jdatnss) jdatnss,
TRIM(dldnss) dldnss,
TRIM(dlign3) dlign3,
TRIM(dlign4) dlign4,
TRIM(dlign5) dlign5,
TRIM(dlign6) dlign6,
TRIM(gtoper)::int gtoper,
TRIM(ccogrm)::int ccogrm,
TRIM(dnatpr) dnatpr,
TRIM(dsglpm) dsglpm,
t."annee" annee_matrice
FROM "{sch}"."{t1}" t
JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro)
'''.format(
sch=schema,
t1='proprietaire')
return _where_parcelle(sql0,schema,list_parid)
def _to_cadaste_table(df,con,pkey,table,schema):
# Si la clé-primaire de la table est > 1 colonne
if len(pkey) > 1 :
lstid = str(
tuple(
df[pkey]\
.drop_duplicates()\
.itertuples(index=False, name=None)
)
).replace(',)',')')
pk = str(tuple(pkey)).replace("'",'"')
# Si la clé-primaire de la table est 1 colonne
else:
pk = pkey[0]
lstid = str(tuple(df[pk].drop_duplicates()))\
.replace(',)',')')
# Récupération des données déjà en BDD
sql = '''
SELECT * FROM {sch}.{tab}
WHERE {id} IN {lst}
;'''.format(
sch = schema,
tab = table,
id = pk,
lst = lstid
)
indb = pd.read_sql_query(sql,con,index_col=pkey)
# Exclusion des données déjà en base
df.set_index(pkey,inplace=True)
todb = df[
~df.index.isin(indb.index)
].copy()
df.reset_index(drop=False, inplace=True)
todb.reset_index(drop=False, inplace=True)
indb.reset_index(drop=False, inplace=True)
# Envoie du tableau dans la bdd si non vide
if not todb.empty:
# Adaptation des types des champs si nécessaire
dtyp = {}
if 'geom' in todb.columns:
from geoalchemy2 import Geometry
todb = todb.to_wkt()
dtyp = {'geom':Geometry(geometry_type='MULTIPOLYGON',srid=2154)}
cols = todb.columns[todb.columns.isin(indb.columns)]
todb[cols]\
.to_sql(
table,
con,
schema,
if_exists='append',
index=False,
dtype = dtyp
)
print('INSERT %s news data ! OK'%todb.shape[0])
else :
print('NO news data to insert !')
def to_vl(df,con):
table = 'vl'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
def to_parcelles(df,con):
table = 'parcelles'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
def to_lots(df,con):
table = 'lots'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
def to_lots_natcult(df,con):
table = 'lots_natcult'
schema = 'cadastre'
# pkey = __get_pkey__(con,table,schema)['constrained_columns']
pkey = ['lot_id']
_to_cadaste_table(df,con,pkey,table,schema)
def to_cadastre(df,con):
table = 'cadastre'
schema = 'cadastre'
# pkey = __get_pkey__(con,table,schema)['constrained_columns']
pkey = ['lot_id','dnupro']
_to_cadaste_table(df,con,pkey,table,schema)
def to_r_prop_cptprop(df,con):
table = 'r_prop_cptprop'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
def to_proprios(df,con):
table = 'proprios'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
def to_cptprop(df,con):
table = 'cptprop'
schema = 'cadastre'
pkey = __get_pkey__(con,table,schema)['constrained_columns']
_to_cadaste_table(df,con,pkey,table,schema)
if __name__ == "__main__":
from pycen import update_to_sql
# par = '3805050000E0523'
par = None
sch = '38_202007'
# cptprop1 = _get_cptprop1(schema=sch, list_parid=par)
# cptprop2 = _get_cptprop2(schema=sch, list_parid=par)
cptprop = _get_cptprop(schema=sch, list_parid=par)
proprios = _get_proprios1(schema=sch, list_parid=par)
r_prop_cptprop = _get_r_prop_cptprop1(schema=sch, list_parid=par)
voie = _get_voie2(schema=sch, list_parid=par)
# parcelles = _get_parcelles2(schema=sch, list_parid=par)
parcelles = _get_parcelles(schema=sch, list_parid=par)
lots = _get_lots2(schema=sch, list_parid=par)
lots_natcult = _get_lots_natcult2(schema=sch, list_parid=par)
cadastre = _get_cadastre2(schema=sch, list_parid=par)
to_cptprop(cptprop,con_fon)
to_proprios(proprios,con_fon)
to_r_prop_cptprop(r_prop_cptprop,con_fon)
to_vl(voie,con_fon)
to_parcelles(parcelles,con_fon)
to_lots(lots,con_fon)
to_lots_natcult(lots_natcult,con_fon)
to_cadastre(cadastre,con_fon)