Python_scripts/0_FONCIER/foncier_insert_cadastre_V3.py

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

from pycen import con_fon
from sqlalchemy.sql import text
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import pandas as pd
import geopandas as gpd

# user   = 'cgeier'
# pwd    = 'adm1n*bdCen'
# adr    = '91.134.194.221'
# base   = 'cadastre'
user   = 'postgres'
pwd    = 'foncier_test1'
adr    = '172.17.0.2'
base   = 'postgres'
url = URL.create('postgresql+psycopg2',
  username=user,
  password=pwd,
  host=adr,
  database=base,
)
con_cad = create_engine(url)
# from pycen import con_cad


# sql = '''SELECT * FROM "38_202207".suf WHERE parcelle = '3800740000B0705';'''
# df = pd.read_sql_query(sql,con_cad)
# df.drop_duplicates(inplace=True)
# df.annee = '2020'
# df.to_sql('suf',con_cad,"38_202207",if_exists='append',index=False)


# fadd = '/home/colas/Documents/tmp/FONCIER_FEDE/add_parc.gpkg'
# add = gpd.read_file(fadd)


def recup_cols_table(table,con,schema='38_202207'):
    lst_cols = con.dialect.get_columns(con,table,schema)
    return [x['name'] for x in lst_cols]


def __get_pkey__(engine,table_name,schema):
  pk = engine.dialect.get_pk_constraint(engine,table_name=table_name,schema=schema)
  return pk


def _where_parcelle(sql0,schema,list_parid):

    if list_parid is not None:
        chunk = None
        if isinstance(list_parid,str):
            list_parid = [list_parid]

        LIST_ID = str(tuple(list_parid)).replace(',)',')')

        sql1 = '''
        WHERE p.parcelle IN {list_id}
        ;'''.format(sch=schema,list_id=LIST_ID)
    else :
        chunk = 200000
        sql1 = ';'

    sql = sql0 + sql1
    df = pd.read_sql_query(text(sql),con=con_cad,chunksize=chunk)

    # if chunk is not None:
    #     for d in df:
    #         print(d.shape[0])
    #         d.drop_duplicates(inplace=True)
    #         print(d.drop_duplicates().shape[0])
    # else :
    #     df.drop_duplicates(inplace=True)

    return df


def _get_chunk(df1,df2):
    # cptp1 = pd.DataFrame()
    list_DF1 = []
    for d1 in df1:
        list_DF1.append(d1)

    DF1 = pd.concat(list_DF1)

    # cptp2 = pd.DataFrame()
    list_DF2 = []
    for d2 in df2:
        list_DF2.append(d2)

    DF2 = pd.concat(list_DF2)

    return  pd.concat([DF1,DF2]).drop_duplicates()


def __get_parcelles__(sql0,list_parid):

    if list_parid is not None:
        chunk = None
        if isinstance(list_parid,str):
            list_parid = [list_parid]

        LIST_ID = str(tuple(list_parid)).replace(',)',')')

        sql1 = '''
        WHERE t1.geo_parcelle IN {list_id}
        ;'''.format(list_id=LIST_ID)
    else :
        chunk = None
        sql1 = ';'

    sql = sql0 + sql1
    # print(text(sql))
    return gpd.read_postgis(sql=sql,con=con_cad,chunksize=chunk)


def _get_parcelles1(schema='38_202207',list_parid=None):
    sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle)
        t1.geo_parcelle,
        case when t1.geom is null then t2.geom
        	else t1.geom
        end geom,
        substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 12) par_id,
        substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3) codcom,
        substring(p.parcelle from 1 for 2) ccodep,
        substring(p.parcelle from 4 for 3) ccocom,
        substring(p.parcelle from 7 for 3) ccopre,
        substring(p.parcelle from 10 for 2) ccosec,
        substring(p.parcelle from 12 for 4) dnupla,
        p.annee annee_pci,
        t1.update_dat,
        p.dparpi,
        p.dcntpa,
        p.ccocomm,
        p.ccoprem,
        p.ccosecm,
        p.dnuplam,
        p.ccovoi,
        p.ccoriv,
        p.type_filiation "type",
        substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id,
        (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) gtoper,
        (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm,
        (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ccogrm,
        (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE (ccodep,ccocom,dnupro) = (t3.ccodep,t3.ccocom,t3.dnupro)) ddenom
    FROM "{sch}".{t1} p
        LEFT JOIN "{sch}".parcelle_info t2 ON t2.geo_parcelle = p.parcelle
        LEFT JOIN "{sch}"."geo_parcelle" t1 ON t1.geo_parcelle = p.parcelle
        LEFT JOIN "{sch}".proprietaire t3 USING (ccodep,ccocom,dnupro)
    '''.format(
        sch=schema,
        t1='parcelle')

    return __get_parcelles__(sql0,list_parid)


def _get_parcelles2(schema='38_202207',list_parid=None):

    sql0 = '''SELECT DISTINCT ON (t1.geo_parcelle)
        t1.geo_parcelle,
        substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 12) par_id,
        substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3) codcom,
        substring(t1.geo_parcelle from 1 for 2) ccodep,
        substring(t1.geo_parcelle from 4 for 3) ccocom,
        substring(t1.geo_parcelle from 7 for 3) ccopre,
        substring(t1.geo_parcelle from 10 for 2) ccosec,
        substring(t1.geo_parcelle from 12 for 4) dnupla,
        t1.annee annee_pci,
        t1.update_dat,
        p.dparpi,
        p.dcntpa,
        p.ccocomm,
        p.ccoprem,
        p.ccosecm,
        p.dnuplam,
        p.ccovoi,
        p.ccoriv,
        p.type_filiation "type",
        t1.geom,
        substring(t1.geo_parcelle from 1 for 2)||substring(t1.geo_parcelle from 4 for 3)||p.ccovoi vl_id,
        (SELECT STRING_AGG(DISTINCT gtoper::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) gtoper,
        (SELECT STRING_AGG(DISTINCT ccogrm::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm,
        (SELECT STRING_AGG(DISTINCT CONCAT(gtoper::text||COALESCE('_'||ccogrm::text,'')),',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ccogrm,
        (SELECT STRING_AGG(DISTINCT TRIM(ddenom)::text,',') FROM "{sch}".proprietaire WHERE comptecommunal = t3.comptecommunal) ddenom
    FROM "{sch}"."{t1}" t1
        LEFT JOIN ("{sch}".parcelle_info t2
            LEFT JOIN "{sch}".proprietaire t3 USING (comptecommunal))
        USING (geo_parcelle)
        LEFT JOIN "{sch}".parcelle p ON t1.geo_parcelle = p.parcelle
    '''.format(
        sch=schema,
        t1='geo_parcelle')

    return __get_parcelles__(sql0,list_parid)


def _get_parcelles(schema='38_202207',list_parid=None):

    p1 = _get_parcelles1(schema,list_parid)
    print('parcelles from parcelle .......... OK')
    p2 = _get_parcelles2(schema,list_parid)
    print('parcelles from geo_parcelle ...... OK')

    return _get_chunk(p1,p2)


def _get_voie2(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT
        t1.ccodep||t1.ccocom||p.ccovoi vl_id,
        t1.libvoi libelle
    FROM "{sch}"."{t1}" t1
        JOIN "{sch}".parcelle p USING (voie)
    '''.format(
        sch=schema,
        t1='voie')

    return _where_parcelle(sql0,schema,list_parid)


def _get_lots_natcult2(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT
        CASE WHEN TRIM(dnulot) = ''
            THEN substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||'0000000'
            ELSE substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12)||TRIM(dnulot)
        END lot_id,
        parcelle,
        substring(parcelle from 1 for 2)||substring(parcelle from 4 for 12) par_id,
        dsgrpf,
        cnatsp,
        dclssf,
        ccosub,
        dcntsf
    FROM "{sch}"."{t1}"
        JOIN "{sch}".parcelle p USING(parcelle)
    '''.format(
        sch=schema,
        t1='suf')

    return _where_parcelle(sql0,schema,list_parid)


def _get_lots2(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT DISTINCT
        CASE WHEN TRIM(t.dnulot) = ''
            THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000'
            ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot)
        END lot_id,
        t.parcelle,
        substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12) par_id,
        CASE WHEN TRIM(t.dnulot) = '' OR TRIM(t.dnulot) IS NULL
            THEN TRIM(l.dnulot)
            ELSE TRIM(t.dnulot)
        END dnulot,
        CASE WHEN l.dcntlo IS NULL
            THEN 0
            ELSE l.dcntlo
        END dcntlo,
        geo_sub.geom
    FROM "{sch}"."{t1}" t
        JOIN "{sch}".parcelle p USING (parcelle)
        LEFT JOIN "{sch}".lots l USING (parcelle)
        LEFT JOIN "{sch}".lotslocaux ll USING (lots)
        LEFT JOIN "{sch}".geo_subdfisc_parcelle geo_sub_p ON p.parcelle = geo_sub_p.geo_parcelle
        LEFT JOIN "{sch}".geo_subdfisc geo_sub USING (geo_subdfisc)
    '''.format(
        sch=schema,
        t1='suf')

    return _where_parcelle(sql0,schema,list_parid)


def _get_cadastre2(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT DISTINCT
        CASE WHEN TRIM(t.dnulot) = ''
            THEN substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||'0000000'
            ELSE substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 12)||TRIM(t.dnulot)
        END lot_id,
        substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro
    FROM "{sch}"."{t1}" t
        JOIN "{sch}".parcelle p
            LEFT JOIN "{sch}".lots l USING (parcelle)
        USING (parcelle)
    '''.format(
        sch=schema,
        t1='suf')

    return _where_parcelle(sql0,schema,list_parid)


def _get_cptprop1(schema='38_202207',list_parid=None):
    sql0 = '''
    WITH lot as (
        SELECT ccodep,ccocom,dnuprol dnupro
        FROM "{sch}".lots
    )
    SELECT DISTINCT
        t.ccodep||t.ccocom||t.dnupro dnupro,
        t."annee" annee_matrice
    FROM "{sch}"."{t1}" t
        --JOIN "{sch}".suf s USING (ccodep,ccocom,dnupro)
        --JOIN lot l USING (ccodep,ccocom,dnupro)
        JOIN "{sch}".parcelle p USING (ccodep,ccocom,dnupro)
    '''.format(
        sch=schema,
        t1='proprietaire')

    return _where_parcelle(sql0,schema,list_parid)


def _get_cptprop2(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT DISTINCT
        substring(t.parcelle from 1 for 2)||substring(t.parcelle from 4 for 3)||t.dnupro dnupro,
        t.annee annee_matrice
    FROM "{sch}"."{t1}" t
        JOIN "{sch}".parcelle p
            LEFT JOIN "{sch}".lots l USING (parcelle)
        USING (parcelle)
    '''.format(
        sch=schema,
        t1='suf')

    return _where_parcelle(sql0,schema,list_parid)


def  _get_cptprop(schema='38_202207',list_parid=None):
    cptprop1 = _get_cptprop1(schema=schema, list_parid=list_parid)
    print('cptprop from proprietaire ... OK')
    cptprop2 = _get_cptprop2(schema=schema, list_parid=list_parid)
    print('cptprop from suf ............ OK')

    return _get_chunk(cptprop1,cptprop2)


def _get_r_prop_cptprop1(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT DISTINCT
        substring(p.parcelle from 1 for 2)||substring(p.parcelle from 4 for 3)||t.dnupro dnupro,
        substring(p.parcelle from 1 for 2)||t.dnuper dnuper,
        ccodro,
        ccodem
    FROM "{sch}"."{t1}" t
        JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro)
    '''.format(
        sch=schema,
        t1='proprietaire')

    return _where_parcelle(sql0,schema,list_parid)


def _get_proprios1(schema='38_202207',list_parid=None):
    sql0 = '''
    SELECT DISTINCT
        t.ccodep||t.dnuper dnuper,
        TRIM(ccoqua)::int ccoqua,
        TRIM(ddenom) ddenom,
        TRIM(dqualp) dqualp,
        TRIM(dnomlp) dnomlp,
        TRIM(dnomus) dnomus,
        TRIM(dprnus) dprnus,
        TRIM(dprnlp) dprnlp,
        TRIM(epxnee) epxnee,
        TRIM(dnomcp) dnomcp,
        TRIM(dprncp) dprncp,
        TRIM(jdatnss) jdatnss,
        TRIM(dldnss) dldnss,
        TRIM(dlign3) dlign3,
        TRIM(dlign4) dlign4,
        TRIM(dlign5) dlign5,
        TRIM(dlign6) dlign6,
        TRIM(gtoper)::int gtoper,
        TRIM(ccogrm)::int ccogrm,
        TRIM(dnatpr) dnatpr,
        TRIM(dsglpm) dsglpm,
        t."annee" annee_matrice
    FROM "{sch}"."{t1}" t
        JOIN "{sch}".parcelle p USING (ccodep,ccocom, dnupro)
    '''.format(
        sch=schema,
        t1='proprietaire')

    return _where_parcelle(sql0,schema,list_parid)


def _to_cadaste_table(df,con,pkey,table,schema):

    # Si la clé-primaire de la table est > 1 colonne
    if len(pkey) > 1 :
        lstid = str(
            tuple(
                df[pkey]\
                    .drop_duplicates()\
                    .itertuples(index=False, name=None)
            )
        ).replace(',)',')')
        pk = str(tuple(pkey)).replace("'",'"')
    # Si la clé-primaire de la table est 1 colonne
    else:
        pk = pkey[0]
        lstid = str(tuple(df[pk].drop_duplicates()))\
            .replace(',)',')')

    # Récupération des données déjà en BDD
    sql = '''
        SELECT * FROM {sch}.{tab}
        WHERE {id} IN {lst}
    ;'''.format(
        sch = schema,
        tab = table,
        id  = pk,
        lst = lstid
    )
    indb = pd.read_sql_query(sql,con,index_col=pkey)

    # Exclusion des données déjà en base
    df.set_index(pkey,inplace=True)
    todb = df[
        ~df.index.isin(indb.index)
    ].copy()
    df.reset_index(drop=False, inplace=True)
    todb.reset_index(drop=False, inplace=True)
    indb.reset_index(drop=False, inplace=True)

    # Envoie du tableau dans la bdd si non vide
    if not todb.empty:
        # Adaptation des types des champs si nécessaire
        dtyp = {}
        if 'geom' in todb.columns:
            from geoalchemy2 import Geometry
            todb = todb.to_wkt()
            dtyp = {'geom':Geometry(geometry_type='MULTIPOLYGON',srid=2154)}

        cols = todb.columns[todb.columns.isin(indb.columns)]
        todb[cols]\
            .to_sql(
                table,
                con,
                schema,
                if_exists='append',
                index=False,
                dtype = dtyp
            )
        print('INSERT %s news data ! OK'%todb.shape[0])
    else :
        print('NO news data to insert !')


def to_vl(df,con):
    table = 'vl'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_parcelles(df,con):
    table = 'parcelles'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_lots(df,con):
    table = 'lots'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_lots_natcult(df,con):
    table = 'lots_natcult'
    schema = 'cadastre'
    # pkey = __get_pkey__(con,table,schema)['constrained_columns']
    pkey = ['lot_id']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_cadastre(df,con):
    table = 'cadastre'
    schema = 'cadastre'
    # pkey = __get_pkey__(con,table,schema)['constrained_columns']
    pkey = ['lot_id','dnupro']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_r_prop_cptprop(df,con):
    table = 'r_prop_cptprop'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_proprios(df,con):
    table = 'proprios'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


def to_cptprop(df,con):
    table = 'cptprop'
    schema = 'cadastre'
    pkey = __get_pkey__(con,table,schema)['constrained_columns']
    _to_cadaste_table(df,con,pkey,table,schema)


if __name__ == "__main__":

    from pycen import update_to_sql
    # par = '3805050000E0523'
    par = None
    sch = '38_202007'
    # cptprop1 = _get_cptprop1(schema=sch, list_parid=par)
    # cptprop2 = _get_cptprop2(schema=sch, list_parid=par)
    cptprop = _get_cptprop(schema=sch, list_parid=par)
    proprios = _get_proprios1(schema=sch, list_parid=par)
    r_prop_cptprop = _get_r_prop_cptprop1(schema=sch, list_parid=par)
    voie = _get_voie2(schema=sch, list_parid=par)
    # parcelles = _get_parcelles2(schema=sch, list_parid=par)
    parcelles = _get_parcelles(schema=sch, list_parid=par)
    lots = _get_lots2(schema=sch, list_parid=par)
    lots_natcult = _get_lots_natcult2(schema=sch, list_parid=par)
    cadastre = _get_cadastre2(schema=sch, list_parid=par)

    to_cptprop(cptprop,con_fon)
    to_proprios(proprios,con_fon)
    to_r_prop_cptprop(r_prop_cptprop,con_fon)
    to_vl(voie,con_fon)
    to_parcelles(parcelles,con_fon)
    to_lots(lots,con_fon)
    to_lots_natcult(lots_natcult,con_fon)
    to_cadastre(cadastre,con_fon)