-
Notifications
You must be signed in to change notification settings - Fork 0
/
Find Nearest Neighbors.step
executable file
·1 lines (1 loc) · 28.8 KB
/
Find Nearest Neighbors.step
1
{"type":"code","name":"Find Nearest Neighbors.step","displayName":"Find Nearest Neighbors.step","description":"","templates":{"SAS":"/* templated code goes here*/;\n/*-----------------------------------------------------------------------------------------*\n START MACRO DEFINITIONS.\n*------------------------------------------------------------------------------------------*/\n\n/* -----------------------------------------------------------------------------------------* \n Error flag for capture during code execution.\n*------------------------------------------------------------------------------------------ */\n\n%global _fnn_error_flag;\n%let _fnn_error_flag=0;\n\n/* -----------------------------------------------------------------------------------------* \n Global macro variable for the trigger to run this custom step. A value of 1 \n (the default) enables this custom step to run. A value of 0 (provided by upstream code)\n sets this to disabled.\n*------------------------------------------------------------------------------------------ */\n\n%global _fnn_run_trigger;\n\n%if %sysevalf(%superq(_fnn_run_trigger)=, boolean) %then %do;\n\n\t%put NOTE: Trigger macro variable _fnn_run_trigger does not exist. Creating it now.;\n %let _fnn_run_trigger=1;\n\n%end;\n\n/*-----------------------------------------------------------------------------------------*\n Macro variable to capture indicator of a currently active CAS session\n*------------------------------------------------------------------------------------------*/\n\n%global casSessionExists;\n%global _current_uuid_;\n\n/*-----------------------------------------------------------------------------------------*\n Macro to capture indicator and UUID of any currently active CAS session.\n UUID is not expensive and can be used in future to consider graceful reconnect.\n*------------------------------------------------------------------------------------------*/\n\n%macro _fnn_checkSession;\n %if %sysfunc(symexist(_SESSREF_)) %then %do;\n %let casSessionExists= %sysfunc(sessfound(&_SESSREF_.));\n %if &casSessionExists.=1 %then %do;\n proc cas;\n session.sessionId result = sessresults;\n call symputx(\"_current_uuid_\", sessresults[1]);\n %put NOTE: A CAS session &_SESSREF_. is currently active with UUID &_current_uuid_. ;\n quit;\n %end;\n %end;\n%mend _fnn_checkSession;\n\n/*-----------------------------------------------------------------------------------------*\n This macro creates a global macro variable called _usr_nameCaslib\n that contains the caslib name (aka. caslib-reference-name) associated with the libname \n and assumes that the libname is using the CAS engine.\n\n As sysvalue has a length of 1024 chars, we use the trimmed option in proc sql\n to remove leading and trailing blanks in the caslib name.\n*------------------------------------------------------------------------------------------*/\n\n%macro _usr_getNameCaslib(_usr_LibrefUsingCasEngine); \n\n %global _usr_nameCaslib;\n %let _usr_nameCaslib=;\n\n proc sql noprint;\n select sysvalue into :_usr_nameCaslib trimmed from dictionary.libnames\n where libname = upcase(\"&_usr_LibrefUsingCasEngine.\") and upcase(sysname)=\"CASLIB\";\n quit;\n\n%mend _usr_getNameCaslib;\n\n/*-----------------------------------------------------------------------------------------*\n This macro generates additional codepieces based on a condition provided.\n*------------------------------------------------------------------------------------------*/\n\n%macro _gac_generate_additional_code(conditionVar, conditionOperator, conditionVal, desiredVar, desiredVal);\n %global _gac_generated_string;\n %put &conditionVar. &conditionOperator. &conditionVal.;\n %if &conditionVar. &conditionOperator. &conditionVal. %then %do; \n %put NOTE: Hey mama no shoes;\n %let _gac_generated_string = &desiredVar.=&desiredVal.,;\n %end;\n %else %do;\n %let _gac_generated_string = ;\n %end;\n\n%mend;\n\n/*--------------------------------------------------------------------------------------*\n Macro variable to hold the selected input columns to use as matching criteria.\n*---------------------------------------------------------------------------------------*/\n\n%let blankSeparatedCols = %_flw_get_column_list(_flw_prefix=inputColumns);\n\n/*-----------------------------------------------------------------------------------------*\n EXECUTION CODE MACRO \n*------------------------------------------------------------------------------------------*/\n\n%macro _fnn_main_execution_code;\n\n/*-----------------------------------------------------------------------------------------*\n Check for an active CAS session\n*------------------------------------------------------------------------------------------*/\n\n %_fnn_checkSession;\n\n %if &casSessionExists. = 0 %then %do;\n %put ERROR: A CAS session does not exist. Connect to a CAS session upstream. ;\n %let _fnn_error_flag = 1;\n %end;\n %else %do;\n/*-----------------------------------------------------------------------------------------*\n Check Input (base) table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_fnn_error_flag. = 0 %then %do;\n\n %global baseCaslib;\n \n %_usr_getNameCaslib(&baseTable_lib.);\n %let baseCaslib=&_usr_nameCaslib.;\n %put NOTE: &baseCaslib. is the caslib for the base table.;\n %let _usr_nameCaslib=;\n\n %if \"&baseCaslib.\" = \"\" %then %do;\n %put ERROR: Base table caslib is blank. Check if Base table is a valid CAS table. ;\n %let _fnn_error_flag=1;\n %end;\n\n %end;\n\n/*-----------------------------------------------------------------------------------------*\n Check Input (query) table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_fnn_error_flag. = 0 %then %do;\n\n %global queryCaslib;\n \n %_usr_getNameCaslib(&queryTable_lib.);\n %let queryCaslib=&_usr_nameCaslib.;\n %put NOTE: &queryCaslib. is the caslib for the query table.;\n %let _usr_nameCaslib=;\n\n %if \"&queryCaslib.\" = \"\" %then %do;\n %put ERROR: Query table caslib is blank. Check if Query table is a valid CAS table. ;\n %let _fnn_error_flag=1;\n %end;\n\n %end;\n\n/*-----------------------------------------------------------------------------------------*\n Check Output table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_fnn_error_flag. = 0 %then %do;\n\n %global outputCaslib;\n \n %_usr_getNameCaslib(&outputTable_lib.);\n %let outputCaslib=&_usr_nameCaslib.;\n %put NOTE: &outputCaslib. is the output caslib.;\n %let _usr_nameCaslib=;\n\n %if \"&outputCaslib.\" = \"\" %then %do;\n %put ERROR: Output table caslib is blank. Check if Output table is a valid CAS table. ;\n %let _fnn_error_flag=1;\n %end;\n\n %end;\n\n/*-----------------------------------------------------------------------------------------*\n Check Output (distance) table libref to ensure it points to a valid caslib.\n*------------------------------------------------------------------------------------------*/\n\n %if &_fnn_error_flag. = 0 %then %do;\n\n %global outputDistCaslib;\n \n %_usr_getNameCaslib(&outputDistTable_lib.);\n %let outputDistCaslib=&_usr_nameCaslib.;\n %put NOTE: &outputDistCaslib. is the output distance table caslib.;\n %let _usr_nameCaslib=;\n\n %if \"&outputDistCaslib.\" = \"\" %then %do;\n %put ERROR: Output distance table caslib is blank. Check if Output distance table is a valid CAS table. ;\n %let _fnn_error_flag=1;\n %end;\n\n %end;\n\n/*-----------------------------------------------------------------------------------------*\n Run CAS statements\n*------------------------------------------------------------------------------------------*/\n\n %if &_fnn_error_flag. = 0 %then %do;\n \n %local mTreesString;\n %local maxPointsString;\n\n %let desiredVar=mTrees;\n %_gac_generate_additional_code(&searchMethod.,=,\"APPROXIMATE\",&desiredVar., &mTrees.);\n %let mTreesString=&_gac_generated_string.;\n %let _gac_generated_string=;\n\n %let desiredVar=maxPoints;\n %_gac_generate_additional_code(&searchMethod.,=,\"APPROXIMATE\",&desiredVar., &maxPoints.);\n %let maxPointsString=&_gac_generated_string.;\n %let _gac_generated_string=;\n \n proc cas; \n\n/*-----------------------------------------------------------------------------------------*\n Obtain inputs from UI.\n*------------------------------------------------------------------------------------------*/\n\n baseTableName = symget(\"baseTable_name_base\");\n baseTableLib = symget(\"baseCaslib\");\n queryTableName = symget(\"queryTable_name_base\");\n queryTableLib = symget(\"queryCaslib\");\n outputTableName = symget(\"outputTable_name_base\");\n outputTableLib = symget(\"outputCaslib\");\n outputDistTableName = symget(\"outputDistTable_name_base\");\n outputDistTableLib = symget(\"outputDistCaslib\");\n\n idCol = symget(\"idCol\");\n numMatches = symget(\"numMatches\");\n thresholdDistance = symget(\"thresholdDistance\");\n searchMethod = symget(\"searchMethod\");\n mTreesString = symget(\"mTreesString\");\n maxPointsString = symget(\"maxPointsString\");\n\n\n/*-----------------------------------------------------------------------------------------*\n Run Fast KNN action\n Note: We are currently keeping the default parallelization setting for the QUERY\n table currently, due to the chances of some session hangups when running with\n PARALLELIZATION=INPUT. This is temporary and will be revisited.\n*------------------------------------------------------------------------------------------*/\n \n fastknn.fastknn result=r / \n table = {name=baseTableName, caslib=baseTableLib},\n query = {name=queryTableName, caslib=queryTableLib},\n inputs = ${&blankSeparatedCols.},\n id = idCol,\n k = numMatches,\n method = searchMethod,\n &mTreesString.\n &maxPointsString.\n output = { casout= {name=outputTableName, caslib=outputTableLib, replace=True}},\n outDist = { name=outputDistTableName, caslib=outputDistTableLib, replace=True},\n threshDist = thresholdDistance\n ;\n\n/*-----------------------------------------------------------------------------------------*\n Print summary results to output window;\n*------------------------------------------------------------------------------------------*/\n \n print r;\n\n quit;\n\n %end;\n %end;\n\n%mend _fnn_main_execution_code;\n\n\n/*-----------------------------------------------------------------------------------------*\n END MACRO DEFINITIONS.\n*------------------------------------------------------------------------------------------*/\n\n/*-----------------------------------------------------------------------------------------*\n EXECUTION CODE\n The execution code is controlled by the trigger variable defined in this custom step. This\n trigger variable is in an \"enabled\" (value of 1) state by default, but in some cases, as \n dictated by logic, could be set to a \"disabled\" (value of 0) state.\n*------------------------------------------------------------------------------------------*/\n\n%if &_fnn_run_trigger. = 1 %then %do;\n %_fnn_main_execution_code;\n%end;\n%if &_fnn_run_trigger. = 0 %then %do;\n %put NOTE: This step has been disabled. Nothing to do.;\n%end;\n\n\n\n/*-----------------------------------------------------------------------------------------*\n Clean up existing macro variables and macro definitions.\n*------------------------------------------------------------------------------------------*/\n%if %symexist(_fnn_error_flag) %then %do;\n %symdel _fnn_error_flag;\n%end;\n%if %symexist(outputDistCaslib) %then %do;\n %symdel outputDistCaslib;\n%end;\n%if %symexist(queryCaslib) %then %do;\n %symdel queryCaslib;\n%end;\n%if %symexist(baseCaslib) %then %do;\n %symdel baseCaslib;\n%end;\n%if %symexist(_fnn_run_trigger) %then %do;\n %symdel _fnn_run_trigger;\n%end;\n%if %symexist(casSessionExists) %then %do;\n %symdel casSessionExists;\n%end;\n%if %symexist(_current_uuid_) %then %do;\n %symdel _current_uuid_;\n%end;\n%if %symexist(_usr_nameCaslib) %then %do;\n %symdel _usr_nameCaslib;\n%end;\n%if %symexist(outputCaslib) %then %do;\n %symdel outputCaslib;\n%end;\n%if %symexist(_gac_generated_string) %then %do;\n %symdel _gac_generated_string;\n%end;\n%if %symexist(blankSeparatedCols) %then %do;\n %symdel blankSeparatedCols;\n%end;\n\n%sysmacdelete _fnn_checkSession;\n%sysmacdelete _usr_getNameCaslib;\n%sysmacdelete _fnn_main_execution_code;\n%sysmacdelete _gac_generate_additional_code;"},"properties":{},"ui":"{\n\t\"showPageContentOnly\": true,\n\t\"pages\": [\n\t\t{\n\t\t\t\"id\": \"params\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"Parameters\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"inputSelectionSection\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Input Selection\",\n\t\t\t\t\t\"open\": true,\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"baseTable\",\n\t\t\t\t\t\t\t\"type\": \"inputtable\",\n\t\t\t\t\t\t\t\"label\": \"Provide base table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"queryTable\",\n\t\t\t\t\t\t\t\"type\": \"inputtable\",\n\t\t\t\t\t\t\t\"label\": \"Provide query table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"idCol\",\n\t\t\t\t\t\t\t\"type\": \"columnselector\",\n\t\t\t\t\t\t\t\"label\": \"Select ID column:\",\n\t\t\t\t\t\t\t\"order\": false,\n\t\t\t\t\t\t\t\"columntype\": \"n\",\n\t\t\t\t\t\t\t\"max\": 1,\n\t\t\t\t\t\t\t\"min\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"table\": \"queryTable\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"inputColumns\",\n\t\t\t\t\t\t\t\"type\": \"columnselector\",\n\t\t\t\t\t\t\t\"label\": \"Select input columns:\",\n\t\t\t\t\t\t\t\"order\": false,\n\t\t\t\t\t\t\t\"columntype\": \"n\",\n\t\t\t\t\t\t\t\"max\": null,\n\t\t\t\t\t\t\t\"min\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"table\": \"queryTable\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"settingsSection\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Settings\",\n\t\t\t\t\t\"open\": false,\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"searchMethod\",\n\t\t\t\t\t\t\t\"type\": \"dropdown\",\n\t\t\t\t\t\t\t\"label\": \"Select search method:\",\n\t\t\t\t\t\t\t\"items\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"value\": \"APPROXIMATE\",\n\t\t\t\t\t\t\t\t\t\"label\": \"Approximate\"\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"value\": \"EXACT\",\n\t\t\t\t\t\t\t\t\t\"label\": \"Exact\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"maxPoints\",\n\t\t\t\t\t\t\t\"type\": \"numstepper\",\n\t\t\t\t\t\t\t\"label\": \"Number of observations to search in each leaf node:\",\n\t\t\t\t\t\t\t\"required\": false,\n\t\t\t\t\t\t\t\"integer\": false,\n\t\t\t\t\t\t\t\"min\": 0,\n\t\t\t\t\t\t\t\"max\": null,\n\t\t\t\t\t\t\t\"stepsize\": 1,\n\t\t\t\t\t\t\t\"enabled\": [\n\t\t\t\t\t\t\t\t\"$searchMethod\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"APPROXIMATE\"\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\"visible\": [\n\t\t\t\t\t\t\t\t\"$searchMethod\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"APPROXIMATE\"\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"mTrees\",\n\t\t\t\t\t\t\t\"type\": \"numstepper\",\n\t\t\t\t\t\t\t\"label\": \"Number of binary trees:\",\n\t\t\t\t\t\t\t\"required\": false,\n\t\t\t\t\t\t\t\"integer\": false,\n\t\t\t\t\t\t\t\"min\": 0,\n\t\t\t\t\t\t\t\"max\": null,\n\t\t\t\t\t\t\t\"stepsize\": 1,\n\t\t\t\t\t\t\t\"enabled\": [\n\t\t\t\t\t\t\t\t\"$searchMethod\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"APPROXIMATE\"\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\"visible\": [\n\t\t\t\t\t\t\t\t\"$searchMethod\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"APPROXIMATE\"\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"thresholdDistance\",\n\t\t\t\t\t\t\t\"type\": \"numstepper\",\n\t\t\t\t\t\t\t\"label\": \"Specify threshold for distance computation:\",\n\t\t\t\t\t\t\t\"required\": false,\n\t\t\t\t\t\t\t\"integer\": true,\n\t\t\t\t\t\t\t\"min\": null,\n\t\t\t\t\t\t\t\"max\": null,\n\t\t\t\t\t\t\t\"stepsize\": 1\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parallelTable\",\n\t\t\t\t\t\t\t\"type\": \"dropdown\",\n\t\t\t\t\t\t\t\"label\": \"Select table to be parallelized:\",\n\t\t\t\t\t\t\t\"items\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"value\": \"INPUT\",\n\t\t\t\t\t\t\t\t\t\"label\": \"Input\"\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"value\": \"QUERY\",\n\t\t\t\t\t\t\t\t\t\"label\": \"Query\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t],\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"enabled\": [\n\t\t\t\t\t\t\t\t\"true\",\n\t\t\t\t\t\t\t\t\"=\",\n\t\t\t\t\t\t\t\t\"false\"\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"outputSpecificationSection\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Output Specifications\",\n\t\t\t\t\t\"open\": false,\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"numMatches\",\n\t\t\t\t\t\t\t\"type\": \"numstepper\",\n\t\t\t\t\t\t\t\"label\": \"Provide number of matches to return:\",\n\t\t\t\t\t\t\t\"required\": false,\n\t\t\t\t\t\t\t\"integer\": false,\n\t\t\t\t\t\t\t\"min\": null,\n\t\t\t\t\t\t\t\"max\": null,\n\t\t\t\t\t\t\t\"stepsize\": 1\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"outputTable\",\n\t\t\t\t\t\t\t\"type\": \"outputtable\",\n\t\t\t\t\t\t\t\"label\": \"Provide output table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"outputDistTable\",\n\t\t\t\t\t\t\t\"type\": \"outputtable\",\n\t\t\t\t\t\t\t\"label\": \"Provide output distance table:\",\n\t\t\t\t\t\t\t\"required\": true,\n\t\t\t\t\t\t\t\"placeholder\": \"\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t}\n\t\t\t]\n\t\t},\n\t\t{\n\t\t\t\"id\": \"about\",\n\t\t\t\"type\": \"page\",\n\t\t\t\"label\": \"About\",\n\t\t\t\"children\": [\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_description\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Find Nearest Neighbors\\n==================\\nThe Find Nearest Neighbors Custom Step searches a base table to identify nearest neighbors to observations in an input query table, based on a distance formula.\\n\\nThis step facilitates applications in recommendation engines, similarity analysis, search and others. With an increased focus on generative AI, this step can be used to match embeddings to find similar documents and augment semantic search and large language models.\\n\\nThis custom step makes use of the fastknn.fastknn SAS Cloud Analytics Services (CAS) action.\\n\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_parameters\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Parameters\",\n\t\t\t\t\t\"open\": 0,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"This custom step runs on data loaded to a SAS Cloud Analytics Services (CAS) library (known as a caslib). Ensure you are connected to CAS before running this step. \\n\\nThis custom step requires a license for SAS Viya or higher to be able to run the fastknn.fastknn action.\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_input\",\n\t\t\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\t\t\"label\": \"Input Parameters\",\n\t\t\t\t\t\t\t\"open\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"id\": \"input_parameters_text\",\n\t\t\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\t\t\"text\": \"1.Base table (input port, required): attach a CAS table to this port containing data to be searched against.\\n\\n2. Query table (input port, required): attach a CAS table to this port containing data for which you would like to search nearest neighbors.\\n\\n3. ID column (column selector, required): select a numeric column to serve as the unique identifier for each observation.\\n\\n4. Input columns (column selector, required): select one or more numeric columns which will be used to calculate the distance measure which serves as the basis for selecting nearest neighbors.\",\n\t\t\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_settings\",\n\t\t\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\t\t\"label\": \"Settings\",\n\t\t\t\t\t\t\t\"open\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"id\": \"settings_text\",\n\t\t\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\t\t\"text\": \"1. Distance threshold (stepper, default 100): select a threshold which will act as the maximum limit to calculate distance.\\n\\n2. Search Method (drop-down list, default is Approximate): select whether to use the Exact or Approximate search method. Refer documentation for details.\\n\\n3. Number of binary trees (stepper, upon selection of Approximate search method, default 10): select the number of adjacent binary trees to which the search method will confine its search to.\\n\\n4. Number of points (stepper, upon selection of Approximate search method, default 100): select maximum number of points to evaluate in a lead node.\\n\\n5. Parallelization method (set to current default of Query, not modifiable in this version): this setting specifies whether the query table or the base table will be parallelized while conducting the search. We have set this to the default value of Query for this initial version of the step and will explore providing the Input (base table) option in a future version.\",\n\t\t\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t},\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"parameters_output_specs\",\n\t\t\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\t\t\"label\": \"Output Specifications\",\n\t\t\t\t\t\t\t\"open\": 1,\n\t\t\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\"id\": \"output_parameters_text\",\n\t\t\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\t\t\"text\": \"1. Number of matches (stepper, default of 4): select maximum number of neighbors (similar observations) you would like for each observation. Note that selecting a higher number leads to longer computation time, wider set of columns for the output tables, and a larger distance table.\\n\\n2. Output table (output port, required): attach a CAS table to this port. This will hold the output data containing each ID column of the query table along with additional columns containing IDs of the neighbors identified.\\n\\n3. Output Distance table (output port, required): attach a CAS table to this port. This will hold a long dataset containing the ID from the query table, the neighbor ID from the base table and the Euclidean distance measure.\",\n\t\t\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_runtimecontrol\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Run-time Control\",\n\t\t\t\t\t\"open\": 0,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"runtimecontrol_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"Note: Run-time control is optional. You may choose whether to execute the main code of this step or not, based on upstream conditions set by earlier SAS programs. This includes nodes run prior to this custom step earlier in a SAS Studio Flow, or a previous program in the same session.\\n\\nRefer this blog (https://communities.sas.com/t5/SAS-Communities-Library/Switch-on-switch-off-run-time-control-of-SAS-Studio-Custom-Steps/ta-p/885526) for more details on the concept.\\n\\nThe following macro variable,\\n\\n_fnn_run_trigger\\n\\nwill initialize with a value of 1 by default, indicating an \\\"enabled\\\" status and allowing the custom step to run.\\n\\nIf you wish to control execution of this custom step, include code in an upstream SAS program to set this variable to 0. This \\\"disables\\\" execution of the custom step.\\n\\nTo \\\"disable\\\" this step, run the following code upstream:\\n\\n%global _fnn_run_trigger;\\n%let _fnn_run_trigger =0;\\n\\nTo \\\"enable\\\" this step again, run the following (it's assumed that this has already been set as a global variable):\\n\\n%let _fnn_run_trigger =1;\\n\\nIMPORTANT: Be aware that disabling this step means that none of its main execution code will run, and any downstream code which was dependent on this code may fail. Change this setting only if it aligns with the objective of your SAS Studio program.\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"about_documentation\",\n\t\t\t\t\t\"type\": \"section\",\n\t\t\t\t\t\"label\": \"Documentation\",\n\t\t\t\t\t\"open\": 0,\n\t\t\t\t\t\"visible\": \"\",\n\t\t\t\t\t\"children\": [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\"id\": \"documentation_text\",\n\t\t\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\t\t\"text\": \"1. The fastknn.fastknn Cloud Analytics Service (CAS) action: https://go.documentation.sas.com/doc/en/pgmsascdc/default/casactml/cas-fastknn-fastknn.htm\\n\\n2. Useful SAS Communities article on the K-Nearest Neighbors algorithm: http://communities.sas.com/t5/SAS-Communities-Library/A-Simple-Introduction-to-K-Nearest-Neighbors-Algorithm/ta-p/565402\\n\\n3. Details on the optional run-time trigger control: http://communities.sas.com/t5/SAS-Communities-Library/Switch-on-switch-off-run-time-control-of-SAS-Studio-Custom-Steps/ta-p/885526)\\n\",\n\t\t\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t\t\t}\n\t\t\t\t\t]\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"version_text\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Version: 1.0 (29NOV2023)\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\t\"id\": \"contact_text\",\n\t\t\t\t\t\"type\": \"text\",\n\t\t\t\t\t\"text\": \"Created/contact: \\n\\n- Sundaresh Sankaran ([email protected])\\n\",\n\t\t\t\t\t\"visible\": \"\"\n\t\t\t\t}\n\t\t\t]\n\t\t}\n\t],\n\t\"syntaxversion\": \"1.3.0\",\n\t\"values\": {\n\t\t\"baseTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t},\n\t\t\"queryTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t},\n\t\t\"idCol\": [],\n\t\t\"inputColumns\": [],\n\t\t\"searchMethod\": {\n\t\t\t\"value\": \"APPROXIMATE\",\n\t\t\t\"label\": \"Approximate\"\n\t\t},\n\t\t\"maxPoints\": 100,\n\t\t\"mTrees\": 10,\n\t\t\"thresholdDistance\": 100,\n\t\t\"parallelTable\": {\n\t\t\t\"value\": \"QUERY\",\n\t\t\t\"label\": \"Query\"\n\t\t},\n\t\t\"numMatches\": 4,\n\t\t\"outputTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t},\n\t\t\"outputDistTable\": {\n\t\t\t\"library\": \"\",\n\t\t\t\"table\": \"\"\n\t\t}\n\t},\n\t\"columnExclusions\": [\n\t\t[\n\t\t\t\"inputColumns\",\n\t\t\t\"idCol\"\n\t\t]\n\t]\n}","flowMetadata":{"inputPorts":[{"name":"baseTable","displayName":"baseTable","minEntries":1,"maxEntries":1,"type":"table"},{"name":"queryTable","displayName":"queryTable","minEntries":1,"maxEntries":1,"type":"table"}],"outputPorts":[{"name":"outputTable","displayName":"outputTable","minEntries":1,"maxEntries":1,"columnDelta":null,"type":"table"},{"name":"outputDistTable","displayName":"outputDistTable","minEntries":1,"maxEntries":1,"columnDelta":null,"type":"table"}]}}