func_classif.py

import numpy as np
import math, time
import rasterio
from rasterio import features


def rasterize(in_situ_gdf, in_situ_cal_tif, img_temp_path, no_data = -10000):

    print(f'Raster template file : {img_temp_path}')

    src = rasterio.open(img_temp_path, "r")

    # Update metadata

    out_meta = src.meta
    out_meta.update(nodata=no_data)

    crs_shp = str(in_situ_gdf.crs).split(":",1)[1]
    crs_tif = str(src.crs).split(":",1)[1]

    print(f'The CRS of in situ data is    : {crs_shp}')
    print(f'The CRS of raster template is : {crs_tif}')

    if crs_shp == crs_tif:
        print("CRS are the same")

        print(f'Rasterize starts')

        # Burn the features into the raster and write it out

        dst = rasterio.open(in_situ_cal_tif, 'w+', **out_meta)
        dst_arr = dst.read(1)

        # This is where we create a generator of geom, value pairs to use in rasterizing

        geom_col = in_situ_gdf.geometry
        code_col = in_situ_gdf["labelID"].astype(int)

        shapes = ((geom,value) for geom, value in zip(geom_col, code_col))

        in_situ_arr = features.rasterize(shapes=shapes,
                                        fill=no_data,
                                        out=dst_arr,
                                        transform=dst.transform)
        dst.write_band(1, in_situ_arr)
        print(f'Rasterize is done : {in_situ_cal_tif}')

        # Close rasterio objects
        src.close()
        dst.close()

    else:
        print('CRS are different --> repoject in-situ data shapefile with "to_crs"')


def apply_rf_to_raster(rf, img, no_data = -10000):
    # Take our full image and reshape into long 2d array (nrow * ncol, nband) for classification
    new_shape = (img.shape[0] * img.shape[1], img.shape[2])
    img_as_array = img[:, :, :].reshape(new_shape)
    print(f'Reshaped from {img.shape} to {img_as_array.shape}')

    start_classification = time.time()

    # Now predict for each pixel
    class_prediction = rf.predict(img_as_array)

    # predictions with no_data in any of the input features are replaced by the no_data value
    class_prediction[np.any(img_as_array == no_data, axis=1)] = no_data

    # Reshape our classification map
    class_prediction = class_prediction.reshape(img[:, :, 0].shape)

    end_classification = time.time()

    hours, rem = divmod(end_classification-start_classification, 3600)
    minutes, seconds = divmod(rem, 60)
    print("Random Forest prediction : {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

    return class_prediction


def smooth_classification(class_prediction, window_size, no_data = -10000):
    sizey = class_prediction.shape[0]
    sizex = class_prediction.shape[1]

    rad_wind = math.floor(window_size/2)

    X = np.pad(class_prediction, ((rad_wind,rad_wind),(rad_wind,rad_wind)), 'edge')

    majority = np.empty((sizey,sizex), dtype='int16')

    for i in range(sizey):
        for j in range(sizex):
            window = X[i:i+window_size , j:j+window_size]
            window = window.flatten()
            window = window[window != no_data]
            if len(window) == 0:
                majority[i,j] = no_data
            else:
                counts = np.bincount(window)
                maj = np.argmax(counts)
                majority[i,j]= maj


    majority = majority.reshape((1,sizey,sizex))

    return majority