-- Register Universal Address Module [UAM] BDQ Hive Loqate UDAF Jar
ADD JAR <Directory path>/uam.loqate.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional).
String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION loqatevalidation as 'com.pb.bdq.uam.process.hive.loqate.LoqateAddressingUDF';
-- Adding required files to distributed cache.
ADD FILES <Directory Path>/loqate-core.car;
ADD FILES <Directory Path>/LoqateVerificationLevel.csv;
ADD FILES <Directory Path>/Loqate.csv;
ADD FILES <Directory Path>/countryTables.csv;
ADD FILES <Directory Path>/countryNameTables.csv;
set hive.map.aggr = false;
-- set process configuration
set set hivevar:processconf='{"processType":"VALIDATE","includeMatchedAddressElements":
false,"includeStandardizedInputAddressElements":false,"returnAddressDataBlocks":false,
"casing":"Mixed","outputReportSummary":false,"includeResultCodesforIndividualFields":false,
"returnMultipleAddresses":false,"failedOnMultiMatchFound":false,"countryFormat":"ENGLISH",
"defaultCountry":"USA","scriptAlphabet":"InputScript","returnGeocodedAddressFields":false,
"acceptanceLevel":"Level0","minimumMatchScore":0,"formatDataUsingAMASConventions":false,
"singleFieldDuplicateHandling":false,"multiFieldDuplicateHandling":false,
"nonStandardFieldDuplicateHandling":false,"outputFieldDuplicateHandling":false,
"returnMultipleAddressCount":10,"duplicateHandling":false,"includeStandardAddress":true}';
-- set general configuration
set hivevar:generalconf='{"maxIdle":null,"minIdle":16,"maxActive":16,"maxWait":null,
"whenExhaustedAction":null,"testOnBorrow":null,"testOnReturn":null,"testWhileIdle":null,
"timeBetweenEvictionRunsMillis":null,"numTestsPerEvictionRun":null,"minEvictableIdleTimeMillis":null}';
-- set engine configuration
set hivevar:engineconf='{"verbose":true,"toolInfo":true,"outputAddressFormat":false,
"logInput":false,"logOutput":false,"logFileName":null,"matchScoreAbsoluteThreshold":60,
"matchScoreThresholdFactor":95,"postalCodeMaxResults":10,"strictReferenceMatch":false}';
-- set reference directory path
set hivevar:location='/home/hduser/ReferenceData/AddressQuality/Loqate';
-- set process type
set hivevar:processtype='VALIDATE';
-- set input header
set hivevar:header='InputKeyValue,AddressLine1,City,StateProvince,PostalCode,Country';
select SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],tmp2.record["AddressLine1"],
tmp2.record["StreetName"],tmp2.record["PostalCode"],tmp2.record["DPID"],tmp2.record["Barcode"]
FROM ( SELECT loqatevalidation(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},
${hivevar:processconf},${hivevar:location},${hivevar:header},inputkeyvalue,addressline1,city,stateprovince,
postalcode,country) as mygp from address) as <TABLE_NAME> LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/loqate/' row format delimited
FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE SELECT * FROM
( SELECT tmp2.record["HouseNumber"],tmp2.record["Confidence"],tmp2.record["AddressLine1"]
,tmp2.record["StreetName"],tmp2.record["PostalCode"],tmp2.record["DPID"],tmp2.record["Barcode"]
FROM ( SELECT loqatevalidation(${hivevar:engineconf},${hivevar:generalconf},${hivevar:processtype},
${hivevar:processconf},${hivevar:location},${hivevar:header},inputkeyvalue,addressline1,city,
stateprovince,postalcode,country) as mygp from address) as <TABLE_NAME>
LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
--Sample Input
+----------------+------------------------------------+----------------+-------------+---------------+
| inputkeyvalue | addressline1 | stateprovince | postalcode | country |
+----------------+------------------------------------+----------------+-------------+---------------+
| 1 | 80 Quan Su | | | Vietnam |
| 2 | Final Av. Panteón Foro Libertador | | 1010 | Venezuela |
| 3 | P O Box 834 | | | St Vincent |
| 4 | Colonia 2066 | | | Uruguay |
| 5 | Ave de la Resistance BP127 | | | Burkina Faso |
| 6 | Buyuk Turon Street, 41 | | | Uzbekistan |
| 7 | Empire State Building | NY | 10118 | US |
| 8 | 3 Leontovycha St | | | Ukraine |
| 9 | | Ceredigion | | Wales |
| 10 | 5 Main Street | Ballindalloch | | Scotland |
+----------------+------------------------------------+----------------+-------------+---------------+
-- Sample Output
+-----------+---------------+--------+---+------------------------------------+
|Match Score|StreetName |HouseNumber | addressline1 |
+-----------+---------------+------------+------------------------------------+
| 100.00 | MERIVALE | 80 | 80 Quan Su |
| 100.00 | SERPENTINE | | Final Av. Panteón Foro Libertador |
| 0.00 | VICTORIA | 0 | P O Box 834 |
| 75.00 | O'CONNELL | 2066 | Colonia 2066 |
| 83.33 | BRYGON CREEK | 470 | Ave de la Resistance BP127 |
| 100.00 | GREENE | | Buyuk Turon Street, 41 |
| 96.8254 | BLAIRMOUNT | 41 | Empire State Building |
| 83.950 | FRANCESCO | 350 | 3 Leontovycha St |
| 50.00 | RYANS | 3 | |
| 100 | CHRISTMAS | 5 | 5 Main Street |
+-----------+---------------+------------+------------------------------------+
!quit