25.3.13

Point Clustering w/ ArcGIS + Python



Here is a quick example of clustering an arcgis feature class of points.  WARNING: This is more of a "gist" than a production ready version.  I am releasing it to support my presentation at the Esri Developer Summit.  Come back soon and it should be updated with additional error handling, much much more efficient use of the tools.

import json
import multiprocessing
import os
import unittest

from collections import defaultdict

import arcpy

def cluster_worker(sourcePointFeatureClass, outputDirectory, levelInfos=None, whereClause=None, workerName=None, includeLevels=None):
    print '.rectify_worker({})'.format(workerName)
    arcpy.env.overwriteOutput = True
    outputGeodatabaseName = "temp{}.gdb".format(workerName)
    outputGeodatabase = os.path.join(outputDirectory, outputGeodatabaseName)
    if arcpy.Exists(outputGeodatabase):
        arcpy.Delete_management(outputGeodatabase)
    arcpy.CreateFileGDB_management(outputDirectory, outputGeodatabaseName)
    arcpy.env.workspace = outputGeodatabase

    points_feature_layer = 'temp_points'
    points_feature_class = 'source_points'
    
    arcpy.MakeFeatureLayer_management(sourcePointFeatureClass, points_feature_layer, whereClause)
    arcpy.CopyFeatures_management(points_feature_layer, points_feature_class)

    level_infos_json = """{"origin":{"x":-20037508.342787,"y":20037508.342787},"spatialReference":{"wkid":102100},"lods":[{"level":0,"resolution":156543.033928,"scale":591657527.591555},{"level":1,"resolution":78271.5169639999,"scale":295828763.795777},{"level":2,"resolution":39135.7584820001,"scale":147914381.897889},{"level":3,"resolution":19567.8792409999,"scale":73957190.948944},{"level":4,"resolution":9783.93962049996,"scale":36978595.474472},{"level":5,"resolution":4891.96981024998,"scale":18489297.737236},{"level":6,"resolution":2445.98490512499,"scale":9244648.868618},{"level":7,"resolution":1222.99245256249,"scale":4622324.434309},{"level":8,"resolution":611.49622628138,"scale":2311162.217155},{"level":9,"resolution":305.748113140558,"scale":1155581.108577},{"level":10,"resolution":152.874056570411,"scale":577790.554289},{"level":11,"resolution":76.4370282850732,"scale":288895.277144},{"level":12,"resolution":38.2185141425366,"scale":144447.638572},{"level":13,"resolution":19.1092570712683,"scale":72223.819286},{"level":14,"resolution":9.55462853563415,"scale":36111.909643},{"level":15,"resolution":4.77731426794937,"scale":18055.954822},{"level":16,"resolution":2.38865713397468,"scale":9027.977411},{"level":17,"resolution":1.19432856685505,"scale":4513.988705},{"level":18,"resolution":0.597164283559817,"scale":2256.994353},{"level":19,"resolution":0.298582141647617,"scale":1128.497176}],"units":"esriMeters"}"""
    level_infos = json.loads(unicode(level_infos_json, 'latin-1'))
    clustered_feature_classes = []
    for level_info in level_infos['lods']:
        if not includeLevels or int(level_info['level']) in includeLevels:
            cluster_feature_class = create_cluster_layer(points_feature_class, level_info)
            if cluster_feature_class:
                clustered_feature_classes.append(os.path.join(outputGeodatabase, cluster_feature_class))
    return (clustered_feature_classes, workerName, outputGeodatabase)

def create_cluster_layer(sourcePointFeatureClass, levelInfo):
    level, res = (levelInfo['level'], levelInfo['resolution'])

    integrate_name = 'integrate_{}'.format(level)
    cluster_name = 'clusters_{}'.format(level)
    output_name = 'clusters_{}_joined'.format(level)

    if not arcpy.Exists(output_name):
        arcpy.CopyFeatures_management(sourcePointFeatureClass, integrate_name)
        distance = "{} Meters".format(res * 10)
        try:
            arcpy.Integrate_management(integrate_name, distance)
        except:
            message = arcpy.GetMessage(0)
            if 'maximum tolerance exceeded' in message.lower():
                print 'maximum tolerance exceeded: {} - {}'.format(integrate_name, distance)
                return None
            else:
                raise

        cluster_features = arcpy.CollectEvents_stats(integrate_name, cluster_name)
        add_attributes(integrate_name, cluster_name, output_name, )

        arcpy.Delete_management(integrate_name)
        arcpy.Delete_management(cluster_name)

        arcpy.AddField_management(output_name, 'zoom_level', "SHORT")
        expression = "int({})".format(levelInfo['level'])
        arcpy.CalculateField_management(output_name, 'zoom_level', expression, "PYTHON")

    return output_name

def add_attributes(integratedFeatureClass, clustersFeatureClass, outputFeatureClass, mergeNumericFields=[], mergeTextFields=False):
    '''
    TODO: Add join option for string fields; 
    TOOD: Add customizable fields for summary

    summarizes attributes for clusters
    First - The first input value is used.
    Last -The last input value is used.
    Join -Merge the values together using the delimiter value to separate the values (only valid if the output field type is text).
    Min -The smallest input value is used (only valid if the output field type is numeric).
    Max -The largest input value is used (only valid if the output field type is numeric).
    Mean -The mean is calculated using the input values (only valid if the output field type is numeric).
    Median -The median is calculated using the input values (only valid if the output field type is numeric).
    Sum -The sum is calculated using the input values (only valid if the output field type is numeric).
    StDev -The standard deviation is calculated using the input values (only valid if the output field type is numeric).
    Count -The number of values included in statistical calculations. This counts each value except null values.
    '''
    fieldmappings = arcpy.FieldMappings()
    fieldmappings.addTable(clustersFeatureClass)

    integrated_fields = arcpy.ListFields(integratedFeatureClass)
    for f in integrated_fields:
        if f.type in ['Integer', 'SmallInteger', 'Double', 'Single']:
            template = f.name + '_{}'
            for m in ['Min', 'Max', 'Mean', 'Sum', 'Count', 'StDev', 'Median']:
                fieldmap = arcpy.FieldMap()
                fieldmap.addInputField(integratedFeatureClass, f.name)
                field = fieldmap.outputField
                field.name = template.format(m.lower())
                field.aliasName = template.format(m.lower())
                fieldmap.mergeRule = m
                fieldmap.outputField = field
                fieldmappings.addFieldMap(fieldmap)

    arcpy.SpatialJoin_analysis(clustersFeatureClass, integratedFeatureClass, outputFeatureClass, "#", "#", fieldmappings)

def create_membership_where(featureClass, fieldName, values):
    where_membership = ','.join(["'{}'".format(v) for v in values])
    return arcpy.AddFieldDelimiters(featureClass, fieldName) + " IN ({})".format(where_membership)

def get_unique_field_values(featureClass, fieldName, where=None, getValueFunction=None):
    unique_values = []
    with arcpy.da.SearchCursor(featureClass, [fieldName], where) as cursor:
        for r in cursor:
            if getValueFunction:
                unique_values.append(getValueFunction(r[0]))
            else:
                unique_values.append(str(r[0]))
    final_list = list(set(unique_values))
    return final_list

all_results = []
delete_files = []
def cluster_complete(results):
    clustered_feature_classes, workerName, delete_file = results
    print 'worker complete({})'.format(workerName)

    all_results += clustered_feature_classes
    delete_files.append(delete_file)

def run_clustering(featureClass, clusterField, outputDirectory, includeLevels=None):
    global all_results
    global delete_files
    arcpy.gp.overwriteOutput = True
    pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
    for v in get_unique_field_values(featureClass, clusterField):
        where = create_membership_where(featureClass, clusterField, [v])
        worker_arguments = (featureClass, outputDirectory, None, where, v, includeLevels)
        pool.apply_async(cluster_worker, worker_arguments, callback=cluster_complete)
    pool.close()
    pool.join()

    final_output = featureClass + '_clustered'
    arcpy.CopyFeatures_management(all_results.pop(), final_output)
    arcpy.Append_management(all_results, final_output, "TEST")

    for d in delete_files:
        arcpy.Delete_management(d)

#=============================================================================================================
# TESTING
#=============================================================================================================
class TestClustering(unittest.TestCase):
    def test_run_clustering(self):
        feature_class = r'D:\data\EPA\FRS_INTERESTS.gdb\FACILITY_INTERESTS_WM'
        clusterField = 'STATE_CODE'
        outputDirectory = r'D:\data\EPA'
        levels = range(4, 16)
        run_clustering( feature_class, clusterField, outputDirectory, includeLevels=levels )

    def _add_attributes(self):
        arcpy.gp.overwriteOutput = True
        integrated_feature_class = r'C:\gis_working_directory\tempDE.gdb\integrate_10'
        clusters = r'C:\gis_working_directory\tempDE.gdb\clusters_10'
        outputDirectory = r'C:\gis_working_directory\tempDE.gdb\clusters_10_joined'
        add_attributes(integrated_feature_class, clusters, outputDirectory)

if __name__ == '__main__':
    unittest.main()

No comments:

Post a Comment