pycen/pycen/tools.py

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
#Nom :  : tools.py
#Description :
#Copyright : 2021, CEN38
#Auteur    : Colas Geier
#Version : 1.0

from pandas import Series, Index, read_sql, merge


#####################################
###      Fonctions générales      ###
#####################################
def _aggr_cols(df, lst_col, sep=''):
  df['aggreg'] = ''
  for c,col in enumerate(lst_col):
    add = ''
    if c > 0:
      add = sep
    df.loc[~df[col].isna(),'aggreg'] = df.loc[~df[col].isna(),'aggreg'] + add + df.loc[~df[col].isna(),col]
  return df

def to_tuple(obj):
  if isinstance(obj, (list,Series)): obj = tuple(obj)
  if isinstance(obj, (int, str)) : obj = tuple([obj])
  return obj

def to_colStringSQL(obj):
  if isinstance(obj, (int)) : obj = str(obj)
  if isinstance(obj, (list,Index)): obj = ",".join(obj)    # Don't work with df.columns
  return obj

def to_upper(obj):
  if isinstance(obj, Series): obj = Series([o.upper() for o in list(obj)])
  if isinstance(obj, tuple): obj = tuple([o.upper() for o in list(obj)])
  if isinstance(obj, list) : obj = [o.upper() for o in obj]
  if isinstance(obj, str)  : obj = obj.upper()
  return obj

def to_upperfirst(obj):
  if isinstance(obj, Series): obj = Series([o.upper()[0] + o.lower()[1:] for o in list(obj)])
  if isinstance(obj, tuple): obj = tuple([o.upper()[0] + o.lower()[1:] for o in list(obj)])
  if isinstance(obj, list) : obj = [o.upper()[0] + o.lower()[1:] for o in obj]
  if isinstance(obj, str)  : obj = obj.upper()[0] + obj.lower()[1:]
  return obj

def dropZ(df,geom_col='geom'):
  from shapely import wkb
  df = df.copy()
  df.loc[df[geom_col].has_z, geom_col] = [
    wkb.loads(wkb.dumps(geom, output_dimension=2)) for geom in df.loc[df[geom_col].has_z, geom_col]
  ]
  # if all(df_sites['geom'].has_z):
  #   # Suppression de la dimension Z
  #   geom_type = df_sites['geom'].geom_type
  #   df_sites['geom'] = [wkb.loads(wkb.dumps(geom, output_dimension=2)) for geom in df_sites['geom']]
  #   df_sites.set_geometry('geom', drop=False, inplace=True, crs=crs)
  return df

def remove_empty_keys(d):
  for k in d.keys():
    if not d[k]:
      del d[k]

def _get_table(con, schema, table, ids=None, nom=None, cols=None, params_col={}, statut='actif'):
  '''
  Selection d'une table:

  Parameters
  ----------
  con : Connection sqlalchemy
  schema : str. Nom du schema PostgreSQL.
  table : str. Nom de la table PostgreSQL.
  ids : liste. Identifiant de la table.
    Doit contenir un champ 'id'.
  nom : liste. Nom de la table.
    Doit contenir un champ 'nom'.
  cols : liste. Colonnes de la table à sélectionner.
  params_col : Dict. Paramètre de Séléction IN.
    requete : {'column': [liste]}
    sql trad : 'column IN (liste)'
  statut : str. 'actif', 'history', 'all'.
    Statut des sites à récupérer,
    'actif'; Date_fin IS NULL
    'history'; A une Date_fin
    'all'; Tous les Sites
  '''
  sql = 'SELECT * FROM {sch}.{tab}'.format(sch=schema, tab=table)
  if params_col:
    params_col = { k: v for k, v in params_col.items() if v }

  if cols : sql = sql.replace('*', to_colStringSQL(cols) )
  # Si arg (ids|nom|params_col), ajout de 'WHERE'
  if ids or nom or params_col or (statut!='all' and table=='sites') : sql = sql + ' WHERE '
  if ids : sql = sql + 'id IN %(ids)s'
  if ids and (nom or params_col or (statut!='all' and table=='sites')) : sql = sql + ' AND '
  if nom : sql = sql + 'nom IN %(nom)s'
  if nom and (params_col or (statut!='all' and table=='sites')) : sql = sql + ' AND '
  if statut=='actif' and table=='sites': sql = sql + ' date_fin IS NULL '
  if statut=='history' and table=='sites': sql = sql + ' date_fin IS NOT NULL '
  if params_col and (statut!='all' and table=='sites') : sql = sql + ' AND '
  if params_col :
    sql = sql + ' AND '.join([k + ' IN %({})s'.format(k) for k in params_col.keys()])
    params_col = {key:to_tuple(params_col[key]) for key in params_col.keys()}

  sql = sql.replace("',)","')")
  df = read_sql(
    sql = sql,
    con = con,
    params = {'ids': to_tuple(ids), 'nom': to_tuple(nom), **params_col })
  if 'geom' in df.columns:
    df = _set_geom(df)
  return df

def _set_geom(df, hex=True):
  from shapely.wkb import loads
  import geopandas as gpd         # set_geometry

  if hex:
    # df['geometry'] = [(loads(geom, hex=hex)) for geom in df['geom']]
    geometry = [(loads(geom, hex=hex)) for geom in df['geom']]
    df.drop(columns=['geom'], inplace=True)
    df = gpd.GeoDataFrame(df,geometry=geometry,crs='EPSG:2154')
    df.rename_geometry('geom', inplace=True)
  else:
    df = df.set_geometry('geom', crs='EPSG:2154')

  return df


def _get_param(schema, param_table, type_table=None, type_court=True):
  from .params import con

  if type_table:
    typ = _get_table(con, schema, table=type_table)
    par = _get_table(con, schema, table=param_table, params_col={'id_type':typ.id.tolist()})
    df  = merge(par, typ, left_on='id_type', right_on='id', how='left', suffixes=(None, '_typ')) \
      .drop(columns=['id_type','id_typ'])
    if 'description_typ' in df.columns: del df['description_typ']
    if type_court: df = df.drop(columns=['nom_typ']).rename(columns={'nom_court_typ':'type'})
    else : df = df.drop(columns=['nom_court_typ'],errors='ignore').rename(columns={'nom_typ':'type'})
    index = ['id']
    if 'type' in df.columns:
      index += ['type']
    df = df.set_index(index).reset_index()
  else:
    df = _get_table(con, schema, table=param_table)

  return df


def _get_relation_tab(schema, tab, id_site=None, nom_site=None, last_update=False,
  geom=False,params_col={},milieu=None,statut='actif'):
  '''

  Parameters
  ----------
  schema : str. Default : None.
    Schéma de la database New_cen38.
  tab : str. Default : None.
    Schéma de la database New_cen38.
  id_site : str,list. Default : None.
    Identifiants des sites présent dans la table 'sites'.
  nom_site : str,list.
    Nom des sites présent dans la table 'sites'.
  last_update : bool. Default : False.
    If True, récupération des dernières données à jour.
    If False, récupération des toutes les données.
  geom : bool. Default : False.
    Return les geometries des sites
  params_col : dict. Default : {}.
    Application des conditions de séléction des données
    sous la forme d'un dictionnaire {'nomcolumn': conditions}.
  milieu : str. Default : None.
    Nom d'un milieu référencé dans la table `sites.type_milieu`.
    Liste récupérable avec la fonction `pyzh.sites._get_typ_milieux()`
  statut : str. 'actif', 'history', 'all'.
    Statut des sites à récupérer,
    'actif'; Date_fin IS NULL
    'history'; A une Date_fin
    'all'; Tous les Sites


  Return
  ----------
  df
  '''
  from .params import con
  from .sites.sites import get_sitesGeom
  # from .pers.pers   import _merge_relation, _merge_author

  table = 'sites'
  dfSG  = get_sitesGeom(columns='date', id_site=id_site, nom_site=nom_site,
    last_update=last_update,params_col=params_col,milieu=milieu,statut=statut)

  if not geom and not dfSG.empty:
    dfSG.drop(columns=['geom'],inplace=True)
  ids   = dfSG.id.tolist()
  table = tab

  if ids :
    df = _get_table(con, schema, table, params_col={'id_geom_site':ids})
    if last_update:
      tmp = ['id', 'date', 'valid']
      col = [*df.columns[~df.columns.isin(tmp)]]
      df = df.sort_values(col).reset_index(drop=True)
      df.drop_duplicates(subset=col, keep='last', inplace=True)
      df.reset_index(drop=True, inplace=True)
      df = df[df.valid].copy()

    if 'date' in dfSG.columns and 'date' in df.columns:
      dfSG.rename(columns={'date':'date_geom'}, inplace=True)
      df.rename(columns={'date':'date_'+table.rsplit('_',1)[1][:5]}, inplace=True)
    # if table == 'r_site_sub':
      # print('DF : {}'.format(df))
      # print(df.empty)
      # print('DFSG : {}'.format(dfSG))

    if not df.empty:
      df = merge(dfSG,df, how='left', left_on='id', right_on='id_geom_site', suffixes=('_x', None)) \
        .drop(columns=['id_x','id_geom_site']) \
        .set_index('id').reset_index()

    return df
  else:
    print('PAS de géometries de sites sélectionnées ...')


def _get_relation_autor(df, relation_tab, schema, id_df, id_relation, id_rela_auth='id_auteur'):
  from .pers.pers import _merge_relation, _merge_author
  if 'site' in relation_tab:
    suffixe = relation_tab.split('_')[1].split('site')[1]
    suffixe = '_' + suffixe
  df = _merge_relation(df=df,table=relation_tab,schema=schema,
    left_id  = id_df,
    right_id = id_relation)
  df = _merge_author(df=df, col_aut=id_rela_auth, on_index=True)
  df.rename(columns={'auteur': 'auteur'+suffixe}, inplace=True)
  return df


def _get_relation_autor2(df, relation_tab, schema, id_df, id_relation, id_rela_auth='id_auteur'):
  from .pers.pers import _merge_relation, get_auteur2
  if 'site' in relation_tab:
    suffixe = relation_tab.split('_')[1].split('site')[1]
    suffixe = '_' + suffixe
  df = _merge_relation(df=df,table=relation_tab,schema=schema,
    left_id  = id_df,
    right_id = id_relation)

  aut = get_auteur2()[['nom_prenom']]
  aut_df = df[id_rela_auth].str.split(' & ',expand = True)
  aut_df.replace([*aut.index.astype(str)],[*aut.nom_prenom], inplace=True)
  lst_col = aut_df.columns.drop(0)
  aut_df[id_rela_auth] = aut_df[0]
  for col in lst_col:
    aut_df.loc[~aut_df[col].isna(),id_rela_auth] = aut_df.loc[~aut_df[col].isna(),id_rela_auth] + \
      ' & ' + aut_df.loc[~aut_df[col].isna(),col]
  aut_df.drop(columns=[0,*lst_col], inplace=True)
  aut_df.columns = [id_rela_auth + '_tmp']
  df = df.merge(aut_df,how='left', left_index=True, right_index=True)
  df[id_rela_auth] = df[id_rela_auth+'_tmp']
  del df[id_rela_auth+'_tmp']

  # df[id_rela_auth] = df[id_rela_auth].astype(float)
  # df = df.merge(aut,how='left', left_on=id_rela_auth, right_index=True)
  # df = _merge_author(df=df, col_aut=id_rela_auth, on_index=True)
  df.rename(columns={id_rela_auth: 'auteur'+suffixe}, inplace=True)
  return df


def to_geoms(geometries):
  from shapely.geometry import Polygon,LineString
  for geometry in geometries:
    if isinstance(geometry, (Polygon,LineString)):
      yield geometry
    else:
      yield from geometry


def union_polygons_geometry(df):
  '''
  Transforme un GeoDataFrame de Polygons
  et/ou MultiPolygons en un MultiPolygon unique:

  Parameters
  ----------
  df : GeoDataFrame.
  '''
  from shapely.geometry import MultiPolygon
  name_geom = df.geometry.name

  poly = df.loc[df.geom_type=='Polygon',name_geom].tolist()
  multipoly = df.loc[df.geom_type=='MultiPolygon',name_geom].tolist()

  if poly:
    mp2 = MultiPolygon(poly)
  if poly and multipoly:
    res = MultiPolygon(to_geoms([*mp2, *multipoly]))
  elif not poly and multipoly:
    res = MultiPolygon(to_geoms(multipoly))
  elif not multipoly and poly:
    res = MultiPolygon(poly)

  return res


def union_lines_geometry(df):
  from shapely.geometry import MultiLineString
  name_geom = df.geometry.name

  line = df.loc[df.geom_type=='LineString',name_geom].tolist()
  multiline = df.loc[df.geom_type=='MultiLineString',name_geom].tolist()

  if line:
    mp2 = MultiLineString(line)
  if line and multiline:
    res = MultiLineString(to_geoms([*mp2, *multiline]))
  elif not line and multiline:
    res = MultiLineString(to_geoms([*multiline]))
  elif not multiline and line:
    res = MultiLineString(line)

  return res


def calc_recouvrmt(df1,df2):
  '''
  Calcule le recouvrement de df2 sur df1
  pour chaque géométrie de df1:

  Parameters
  ----------
  df1 : GeoDataFrame.
  df2 : GeoDataFrame.
  '''
  from geopandas import sjoin
  tmp = sjoin(
    df1,
    df2[['geom']],
    op  = 'intersects',
    how = 'left')
  tmp.dropna(subset=['index_right'],inplace=True)
  tmp.index_right = tmp.index_right.astype(int)
  tmp.reset_index(inplace=True)
  tmp = tmp.join(
    df2[['geom']].rename(columns={'geom': 'right_geom'}),
    on=['index_right'], how='left')
  tmp2 = tmp[['index_right','right_geom']].copy() \
    .rename(columns={'right_geom': 'geom'}) \
    .set_geometry('geom')
  tmp1 = tmp[['id_site','geom']].copy() \
    .set_geometry('geom')

  if not tmp1.geom.values.is_valid.all():
    tmp1.loc[~tmp1.geom.values.is_valid,'geom'] = tmp1.loc[~tmp1.geom.values.is_valid,'geom'].buffer(0)
  if not tmp2.geom.values.is_valid.all():
    tmp2.loc[~tmp2.geom.values.is_valid,'geom'] = tmp2.loc[~tmp2.geom.values.is_valid,'geom'].buffer(0)

  tmp['perc_rcvmt'] = (tmp1.intersection(tmp2).area/tmp1.area)*100
  tmp = tmp.groupby(['id_site']).sum().reset_index()
  df1 = df1.merge(tmp[['id_site','perc_rcvmt']], on=['id_site'], how='left')
  df1.perc_rcvmt.fillna(0, inplace=True)
  df1.perc_rcvmt = df1.perc_rcvmt.round(2)
  return df1


def Polygons_to_MultiPolygon(df):
  from shapely.geometry import MultiPolygon
  from pandas import concat
  df = df.copy()
  multi = df.loc[df.geom_type=='MultiPolygon'].copy()
  poly = df.loc[df.geom_type=='Polygon'].copy()
  poly['geom'] = [MultiPolygon([geom]) for geom in df.loc[df.geom_type=='Polygon','geom'] ]
  df = concat([multi,poly])
  df.sort_index(inplace=True)
  return df