Here is a quick example of clustering an arcgis feature class of points. WARNING: This is more of a "gist" than a production ready version. I am releasing it to support my presentation at the Esri Developer Summit. Come back soon and it should be updated with additional error handling, much much more efficient use of the tools.
import json
import multiprocessing
import os
import unittest
from collections import defaultdict
import arcpy
def cluster_worker(sourcePointFeatureClass, outputDirectory, levelInfos=None, whereClause=None, workerName=None, includeLevels=None):
print '.rectify_worker({})'.format(workerName)
arcpy.env.overwriteOutput = True
outputGeodatabaseName = "temp{}.gdb".format(workerName)
outputGeodatabase = os.path.join(outputDirectory, outputGeodatabaseName)
if arcpy.Exists(outputGeodatabase):
arcpy.Delete_management(outputGeodatabase)
arcpy.CreateFileGDB_management(outputDirectory, outputGeodatabaseName)
arcpy.env.workspace = outputGeodatabase
points_feature_layer = 'temp_points'
points_feature_class = 'source_points'
arcpy.MakeFeatureLayer_management(sourcePointFeatureClass, points_feature_layer, whereClause)
arcpy.CopyFeatures_management(points_feature_layer, points_feature_class)
level_infos_json = """{"origin":{"x":-20037508.342787,"y":20037508.342787},"spatialReference":{"wkid":102100},"lods":[{"level":0,"resolution":156543.033928,"scale":591657527.591555},{"level":1,"resolution":78271.5169639999,"scale":295828763.795777},{"level":2,"resolution":39135.7584820001,"scale":147914381.897889},{"level":3,"resolution":19567.8792409999,"scale":73957190.948944},{"level":4,"resolution":9783.93962049996,"scale":36978595.474472},{"level":5,"resolution":4891.96981024998,"scale":18489297.737236},{"level":6,"resolution":2445.98490512499,"scale":9244648.868618},{"level":7,"resolution":1222.99245256249,"scale":4622324.434309},{"level":8,"resolution":611.49622628138,"scale":2311162.217155},{"level":9,"resolution":305.748113140558,"scale":1155581.108577},{"level":10,"resolution":152.874056570411,"scale":577790.554289},{"level":11,"resolution":76.4370282850732,"scale":288895.277144},{"level":12,"resolution":38.2185141425366,"scale":144447.638572},{"level":13,"resolution":19.1092570712683,"scale":72223.819286},{"level":14,"resolution":9.55462853563415,"scale":36111.909643},{"level":15,"resolution":4.77731426794937,"scale":18055.954822},{"level":16,"resolution":2.38865713397468,"scale":9027.977411},{"level":17,"resolution":1.19432856685505,"scale":4513.988705},{"level":18,"resolution":0.597164283559817,"scale":2256.994353},{"level":19,"resolution":0.298582141647617,"scale":1128.497176}],"units":"esriMeters"}"""
level_infos = json.loads(unicode(level_infos_json, 'latin-1'))
clustered_feature_classes = []
for level_info in level_infos['lods']:
if not includeLevels or int(level_info['level']) in includeLevels:
cluster_feature_class = create_cluster_layer(points_feature_class, level_info)
if cluster_feature_class:
clustered_feature_classes.append(os.path.join(outputGeodatabase, cluster_feature_class))
return (clustered_feature_classes, workerName, outputGeodatabase)
def create_cluster_layer(sourcePointFeatureClass, levelInfo):
level, res = (levelInfo['level'], levelInfo['resolution'])
integrate_name = 'integrate_{}'.format(level)
cluster_name = 'clusters_{}'.format(level)
output_name = 'clusters_{}_joined'.format(level)
if not arcpy.Exists(output_name):
arcpy.CopyFeatures_management(sourcePointFeatureClass, integrate_name)
distance = "{} Meters".format(res * 10)
try:
arcpy.Integrate_management(integrate_name, distance)
except:
message = arcpy.GetMessage(0)
if 'maximum tolerance exceeded' in message.lower():
print 'maximum tolerance exceeded: {} - {}'.format(integrate_name, distance)
return None
else:
raise
cluster_features = arcpy.CollectEvents_stats(integrate_name, cluster_name)
add_attributes(integrate_name, cluster_name, output_name, )
arcpy.Delete_management(integrate_name)
arcpy.Delete_management(cluster_name)
arcpy.AddField_management(output_name, 'zoom_level', "SHORT")
expression = "int({})".format(levelInfo['level'])
arcpy.CalculateField_management(output_name, 'zoom_level', expression, "PYTHON")
return output_name
def add_attributes(integratedFeatureClass, clustersFeatureClass, outputFeatureClass, mergeNumericFields=[], mergeTextFields=False):
'''
TODO: Add join option for string fields;
TOOD: Add customizable fields for summary
summarizes attributes for clusters
First - The first input value is used.
Last -The last input value is used.
Join -Merge the values together using the delimiter value to separate the values (only valid if the output field type is text).
Min -The smallest input value is used (only valid if the output field type is numeric).
Max -The largest input value is used (only valid if the output field type is numeric).
Mean -The mean is calculated using the input values (only valid if the output field type is numeric).
Median -The median is calculated using the input values (only valid if the output field type is numeric).
Sum -The sum is calculated using the input values (only valid if the output field type is numeric).
StDev -The standard deviation is calculated using the input values (only valid if the output field type is numeric).
Count -The number of values included in statistical calculations. This counts each value except null values.
'''
fieldmappings = arcpy.FieldMappings()
fieldmappings.addTable(clustersFeatureClass)
integrated_fields = arcpy.ListFields(integratedFeatureClass)
for f in integrated_fields:
if f.type in ['Integer', 'SmallInteger', 'Double', 'Single']:
template = f.name + '_{}'
for m in ['Min', 'Max', 'Mean', 'Sum', 'Count', 'StDev', 'Median']:
fieldmap = arcpy.FieldMap()
fieldmap.addInputField(integratedFeatureClass, f.name)
field = fieldmap.outputField
field.name = template.format(m.lower())
field.aliasName = template.format(m.lower())
fieldmap.mergeRule = m
fieldmap.outputField = field
fieldmappings.addFieldMap(fieldmap)
arcpy.SpatialJoin_analysis(clustersFeatureClass, integratedFeatureClass, outputFeatureClass, "#", "#", fieldmappings)
def create_membership_where(featureClass, fieldName, values):
where_membership = ','.join(["'{}'".format(v) for v in values])
return arcpy.AddFieldDelimiters(featureClass, fieldName) + " IN ({})".format(where_membership)
def get_unique_field_values(featureClass, fieldName, where=None, getValueFunction=None):
unique_values = []
with arcpy.da.SearchCursor(featureClass, [fieldName], where) as cursor:
for r in cursor:
if getValueFunction:
unique_values.append(getValueFunction(r[0]))
else:
unique_values.append(str(r[0]))
final_list = list(set(unique_values))
return final_list
all_results = []
delete_files = []
def cluster_complete(results):
clustered_feature_classes, workerName, delete_file = results
print 'worker complete({})'.format(workerName)
all_results += clustered_feature_classes
delete_files.append(delete_file)
def run_clustering(featureClass, clusterField, outputDirectory, includeLevels=None):
global all_results
global delete_files
arcpy.gp.overwriteOutput = True
pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
for v in get_unique_field_values(featureClass, clusterField):
where = create_membership_where(featureClass, clusterField, [v])
worker_arguments = (featureClass, outputDirectory, None, where, v, includeLevels)
pool.apply_async(cluster_worker, worker_arguments, callback=cluster_complete)
pool.close()
pool.join()
final_output = featureClass + '_clustered'
arcpy.CopyFeatures_management(all_results.pop(), final_output)
arcpy.Append_management(all_results, final_output, "TEST")
for d in delete_files:
arcpy.Delete_management(d)
#=============================================================================================================
# TESTING
#=============================================================================================================
class TestClustering(unittest.TestCase):
def test_run_clustering(self):
feature_class = r'D:\data\EPA\FRS_INTERESTS.gdb\FACILITY_INTERESTS_WM'
clusterField = 'STATE_CODE'
outputDirectory = r'D:\data\EPA'
levels = range(4, 16)
run_clustering( feature_class, clusterField, outputDirectory, includeLevels=levels )
def _add_attributes(self):
arcpy.gp.overwriteOutput = True
integrated_feature_class = r'C:\gis_working_directory\tempDE.gdb\integrate_10'
clusters = r'C:\gis_working_directory\tempDE.gdb\clusters_10'
outputDirectory = r'C:\gis_working_directory\tempDE.gdb\clusters_10_joined'
add_attributes(integrated_feature_class, clusters, outputDirectory)
if __name__ == '__main__':
unittest.main()
ReplyDeleteThanks for your great and helpful presentation I like your good service. I always appreciate your post. That is very interesting I love reading and I am always searching for informative information like this.iot training institutes in chennai | industrial iot training chennai | iot training center in chennai | best iot training centre in chennai
great information.
ReplyDeletethank you for posting.
keep sharing.
Best python programming training in Bengaluru
I have read your blog and I gathered some needful information from your blog. Keep update your blog. Awaiting for your next update. Thanks
ReplyDeleteDedicatedHosting4u.com
thanks for sharing the nice article, keep posting or updating news article.
ReplyDeletePython training in bangalore
Python training in Bangalore
Data science with python training in Bangalore
Angular js training in bangalore
Hadoop training in bangalore
DevOPs training in bangalore
Agile and scrum training in bangalore
IntelliMindz is the best IT Training in Coimbatore with placement, offering 200 and more software courses with 100% Placement Assistance.
ReplyDeletePython Course In Coimbatore
Digital Marketing Course In Coimbatore
sap mm training In Coimbatore
sap-fico-training-in-coimbatore
sap hana trainingIn Coimbatore
selenium Course In Coimbatore
seo training In Coimbatore
I thoroughly enjoyed reading your article on point clustering with ArcGIS and Python. Your detailed explanation and step-by-step guide make it accessible for both beginners and experienced users. The use of visuals enhances the understanding of the clustering process, and your inclusion of Python code snippets is incredibly helpful.
ReplyDeleteAdditionally, I would like to highlight the relevance of such spatial analysis techniques in the field of data science. Mastering skills like point clustering is crucial for professionals seeking to excel in data science. Speaking of which, Imarticus Learning's Data Science Course stands out as an excellent resource for individuals eager to delve deeper into this field. Their comprehensive curriculum, hands-on approach, and industry-relevant projects make it a valuable investment for anyone aspiring to build a successful career in data science. I highly recommend checking out Imarticus Learning's Data Science Course for a well-rounded and practical education in this rapidly evolving domain.