-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;
-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.UAMUSAddressingUDAF';
-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin), G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env = LD_LIBRARY_PATH=/home/hduser/~/runtime/lib:
/home/hduser/~/runtime/bin:/home/hduser/~/server/modules/universaladdress/lib, ACU_RUNCBL_JNI_ONLOAD_DISABLE=1, G1RTS=/home/hduser/~/ ;
set hive.map.aggr = false;
-- set engine configuration
set pb.bdq.uam.universaladdress.engine.configurations={ "referenceData":{ "dataDir":"/home/hduser/resources/uam/universaladdress/UAM_universaladdress4.0_Feb15/", "referenceDataPathLocation":"LocaltoDataNodes"}, "cobolRuntimePath":"/home/hduser/tapan/addressquality/", "modulesDir":"/home/hduser/tapan/addressquality/modules", "dpvDbPath":null, "suiteLinkDBPath":null, "ewsDBPath":null, "rdiDBPath":null, "lacsDBPath":null};
-- set input configuration
set pb.bdq.uam.universaladdress.input.configuration={"outputStandardAddress":true, "outputPostalData":false, "outputParsedInput":false, "outputAddressBlocks":true, "performUSProcessing":true, "performCanadianProcessing":false, "performInternationalProcessing":false, "outputFormattedOnFail":false, "outputCasing":"MIXED", "outputPostalCodeSeparator":true, "outputMultinationalCharacters":false, "performDPV":false, "performRDI":false, "performESM":false, "performASM":false, "performEWS":false, "performLACSLink":false, "performLOT":false, "failOnCMRAMatch":false, "extractFirm":false, "extractUrb":false, "outputReport3553":false, "outputReportSERP":false, "outputReportSummary":true, "outputCASSDetail":false, "outputFieldLevelReturnCodes":false, "keepMultimatch":false, "maximumResults":10, "standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT", "standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE", "cityNameFormat":"CITY_FORMAT_STANDARD", "vanityCityFormatLong":true, "outputCountryFormat":"ENGLISH", "homeCountry":"United States", "streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "firmMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "dualAddressLogic":"DUAL_NORMAL", "dpvSuccessfulStatusCondition":"A", "reportListFileName":"", "reportlistProcessorName":"", "reportlistNumber":1, "reportMailerAddress":"", "reportMailerName":"", "reportMailerCityLine":"", "canReportMailerCPCNumber":"", "canReportMailerAddress":"", "canReportMailerName":"", "canReportMailerCityLine":"", "internationalCityStreetSearching":100, "addressLineSearchOnFail":true, "outputStreetAlias":true, "outputVeriMoveBlock":false, "dpvDetermineNoStat":false, "dpvDetermineVacancy":false, "outputAbbreviatedAlias":false, "outputPreferredAlias":false, "outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4", "performSuiteLink":false, "suppressZplusPhantomCarrierR777":false, "canStandardAddressFormat":"D", "canEnglishApartmentLabel":"APT", "canFrenchApartmentLabel":"APP", "canFrenchFormat":"C", "canOutputCityFormat":"D", "canOutputCityAlias":true, "canDualAddressLogic":"D", "canPreferHouseNum":false, "canSSLVRFLG":false, "canRuralRouteFormat":"A", "canNonCivicFormat":"A", "canDeliveryOfficeFormat":"I", "canEnableSERP":false, "canSwitchManagedPostalCodeConfidence":false, "stats":null, "counts":null, "z3seg":null, "serpStats":null, "dpvSeedList":null, "lacsSeedList":null, "zipInputSet":null, "reportName":null, "currentUser":null, "jobName":null, "jobId":null, "jobRequest":false, "properties":{"DPVDetermineVacancy":"N", "DualAddressLogic":"N", "ExtractUrb":"N", "CanFrenchFormat":"C", "AddressLineSearchOnFail":"Y", "OutputFieldLevelReturnCodes":"N", "OutputFormattedOnFail":"N", "OutputStreetNameAlias":"Y", "OutputReportSERP":"N", "OutputAddressBlocks":"Y", "ExtractFirm":"N", "CanEnglishApartmentLabel":"APT", "OutputPreferredCity":"Z", "FirmMatchingStrictness":"M", "CanFrenchApartmentLabel":"APP", "KeepMultimatch":"N", "StandardAddressPMBLine":"N", "PerformSuiteLink":"N", "CanStandardAddressFormat":"D", "DPVSuccessfulStatusCondition":"A", "PerformLACSLink":"N", "PerformUSProcessing":"Y", "PerformEWS":"N", "StandardAddressFormat":"C", "SuppressZplusPhantomCarrierR777":"N", "HomeCountry":"United States", "ReportMailerAddress":"", "OutputReport3553":"N", "OutputVeriMoveDataBlock":"N", "CanDeliveryOfficeFormat":"I", "OutputAbbreviatedAlias":"N", "PerformCanadianProcessing":"N", "PerformDPV":"N", "PerformInternationalProcessing":"N", "CanSSLVRFlg":"N", "StreetMatchingStrictness":"M", "InternationalCityStreetSearching":"100", "canSwitchManagedPostalCodeConfidence":"N", "CanDualAddressLogic":"D", "PerformASM":"N", "OutputCasing":"M", "ReportListFileName":"", "CanReportMailerAddress":"", "ReportMailerCityLine":"", "CanReportMailerCPCNumber":"", "ReportListProcessorName":"", "CanOutputCityAlias":"Y", "DirectionalMatchingStrictness":"M", "CanRuralRouteFormat":"A", "CanOutputCityFormat":"D", "ReportListNumber":"1", "CanReportMailerCityLine":"", "OutputMultinationalCharacters":"N", "EnableSERP":"N", "CanNonCivicFormat":"A", "OutputShortCityName":"S", "OutputPostalCodeSeparator":"Y", "FailOnCMRAMatch":"N", "PerformLOT":"N", "OutputCountryFormat":"E", "CanPreferHouseNum":"N", "CanReportMailerName":"", "PerformRDI":"N", "ReportMailerName":"", "PerformESM":"N", "OutputReportSummary":"Y", "OutputVanityCityFormatLong":"Y", "OutputPreferredAlias":"N", "DPVDetermineNoStat":"N", "MaximumResults":"10"}}};
-- set general configuration
set pb.bdq.uam.universaladdress.general.configuration = {"dFileType":"SPLIT", "dMemoryModel":"MEDIUM", "lacsLinkMemoryModel":"MEDIUM", "suiteLinkMemoryModel":"MEDIUM"};
-- set reference path
set pb.bdq.reference.data.local.location=/media/New Volume/hduser/resources/uam/universaladdress/UAM_universaladdress4.0_Feb15;
-- set process type
set pb.bdq.uam.universaladdress.process.type=VALIDATE;
-- set header
set pb.bdq.header=InputKeyValue,FirmName,AddressLine1,AddressLine2,City,
StateProvince,PostalCode,Text;
-- Execute Query on the desired table, to display the job output on console. This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Confidence"], tmp2.record["AddressLine1"] FROM ( select uamvalidation(inputkeyvalue, firmname, addressline1, addressline2, city, stateprovince, postalcode, text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["Confidence"], tmp2.record["AddressLine1"] FROM ( select uamvalidation(inputkeyvalue, firmname, addressline1, addressline2, city, stateprovince, postalcode, text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid | address.addressline1 | address.city | address.stateprovince | address.postalcode | address.country |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1 | 18 Merivale St | South Brisbane | QLD | 4101 | AUS |
| 2 | 19 Serpentine Rd | Albany | WA | 6330 | AUS |
| 3 | 317 VICTORIA ST GR | BRUNSWICK | VIC | 3056 | AUS |
| 4 | DUPLEX 6/16-18 O'CONNELL ST | AINSLIE | ACT | 2602 | AUS |
| 5 | LOT 154 470 BRYGON CREEK DR | UPPER COOMERA | QLD | 4209 | AUS |
| 6 | 16 GREENE ST | WARRAWONG | ACT | 2502 | AUS |
| 7 | UNIT 47/16 BLAIRMOUNT ST | PARKINSON | QLD | 4115 | AUS |
| 8 | 13-15 FRANCESCO CRES | BELLA VISTA | NSW | 2153 | AUS |
| 9 | 4 RYANS LANE | HEATHCOTE | VIC | 3523 | AUS |
| 10 | 1 CHRISTMAS LN | NORTH POLE | VIC | 1111 | AUS |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName |HouseNumber | AddressLine1 | AddressType |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00 | MERIVALE | 18 | 18 MERIVALE ST | S |
| 99.42 | SERPENTINE | 19 | 19 SERPENTINE RD E | S |
| 97.95 | VICTORIA | 317 | 317 VICTORIA ST | S |
| 100.00 | O'CONNELL | 16-18 | DUP 6 16-18 O'CONNELL ST | S |
| 0.00 | BRYGON CREEK | 470 | LOT 154 470 BRYGON CREEK DR | U |
| 76.99 | GREENE | 16 | 16 GREENE ST | S |
| 100.00 | BLAIRMOUNT | 16 | U 47 16 BLAIRMOUNT ST | S |
| 100.00 | FRANCESCO | 13-15 | 13-15 FRANCESCO CRES | S |
| 100.00 | RYANS | 4 | 4 RYANS LANE | S |
| 0.00 | CHRISTMAS | 1 | 1 CHRISTMAS LN | U |
+-----------+---------------+------------+------------------------------+---------------+