サンプル Hive スクリプト

-- Register Universal Addressing Module [UAM-Global] BDQ Hive UDAF Jar 
ADD JAR <Directory path>/uam.universaladdress.hive.${project.version}.jar;

-- Provide alias to UDAF class (optional). String in quotes represent class names needed for this job to run.
CREATE TEMPORARY FUNCTION uamvalidation as 'com.pb.bdq.uam.process.hive.universaladdress.UAMUSAddressingUDAF';

-- set LD_LIBRARY_PATH(path to modules lib, runtime/lib and runtime/bin), G1RTS(path containing COBOL runtime) and ACU_RUNCBL_JNI_ONLOAD_DISABLE in this configuration
set mapreduce.admin.user.env = LD_LIBRARY_PATH=/home/hduser/~/runtime/lib:
/home/hduser/~/runtime/bin:/home/hduser/~/server/modules/universaladdress/lib, ACU_RUNCBL_JNI_ONLOAD_DISABLE=1, G1RTS=/home/hduser/~/ ;

set hive.map.aggr = false;

-- set engine configuration
set pb.bdq.uam.universaladdress.engine.configurations={ "referenceData":{ "dataDir":"/home/hduser/resources/uam/universaladdress/UAM_universaladdress4.0_Feb15/", "referenceDataPathLocation":"LocaltoDataNodes"}, "cobolRuntimePath":"/home/hduser/tapan/addressquality/", "modulesDir":"/home/hduser/tapan/addressquality/modules", "dpvDbPath":null, "suiteLinkDBPath":null, "ewsDBPath":null, "rdiDBPath":null, "lacsDBPath":null};

-- set input configuration
set pb.bdq.uam.universaladdress.input.configuration={"outputStandardAddress":true, "outputPostalData":false, "outputParsedInput":false, "outputAddressBlocks":true, "performUSProcessing":true, "performCanadianProcessing":false, "performInternationalProcessing":false, "outputFormattedOnFail":false, "outputCasing":"MIXED", "outputPostalCodeSeparator":true, "outputMultinationalCharacters":false, "performDPV":false, "performRDI":false, "performESM":false, "performASM":false, "performEWS":false, "performLACSLink":false, "performLOT":false, "failOnCMRAMatch":false, "extractFirm":false, "extractUrb":false, "outputReport3553":false, "outputReportSERP":false, "outputReportSummary":true, "outputCASSDetail":false, "outputFieldLevelReturnCodes":false, "keepMultimatch":false, "maximumResults":10, "standardAddressFormat":"STANDARD_ADDRESS_FORMAT_COMBINED_UNIT", "standardAddressPMBLine":"STANDARD_ADDRESS_PMB_LINE_NONE", "cityNameFormat":"CITY_FORMAT_STANDARD", "vanityCityFormatLong":true, "outputCountryFormat":"ENGLISH", "homeCountry":"United States", "streetMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "firmMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "directionalMatchingStrictness":"MATCHING_STRICTNESS_MEDIUM", "dualAddressLogic":"DUAL_NORMAL", "dpvSuccessfulStatusCondition":"A", "reportListFileName":"", "reportlistProcessorName":"", "reportlistNumber":1, "reportMailerAddress":"", "reportMailerName":"", "reportMailerCityLine":"", "canReportMailerCPCNumber":"", "canReportMailerAddress":"", "canReportMailerName":"", "canReportMailerCityLine":"", "internationalCityStreetSearching":100, "addressLineSearchOnFail":true, "outputStreetAlias":true, "outputVeriMoveBlock":false, "dpvDetermineNoStat":false, "dpvDetermineVacancy":false, "outputAbbreviatedAlias":false, "outputPreferredAlias":false, "outputPreferredCity":"CITY_OVERRIDE_NAME_ZIP4", "performSuiteLink":false, "suppressZplusPhantomCarrierR777":false, "canStandardAddressFormat":"D", "canEnglishApartmentLabel":"APT", "canFrenchApartmentLabel":"APP", "canFrenchFormat":"C", "canOutputCityFormat":"D", "canOutputCityAlias":true, "canDualAddressLogic":"D", "canPreferHouseNum":false, "canSSLVRFLG":false, "canRuralRouteFormat":"A", "canNonCivicFormat":"A", "canDeliveryOfficeFormat":"I", "canEnableSERP":false, "canSwitchManagedPostalCodeConfidence":false, "stats":null, "counts":null, "z3seg":null, "serpStats":null, "dpvSeedList":null, "lacsSeedList":null, "zipInputSet":null, "reportName":null, "currentUser":null, "jobName":null, "jobId":null, "jobRequest":false, "properties":{"DPVDetermineVacancy":"N", "DualAddressLogic":"N", "ExtractUrb":"N", "CanFrenchFormat":"C", "AddressLineSearchOnFail":"Y", "OutputFieldLevelReturnCodes":"N", "OutputFormattedOnFail":"N", "OutputStreetNameAlias":"Y", "OutputReportSERP":"N", "OutputAddressBlocks":"Y", "ExtractFirm":"N", "CanEnglishApartmentLabel":"APT", "OutputPreferredCity":"Z", "FirmMatchingStrictness":"M", "CanFrenchApartmentLabel":"APP", "KeepMultimatch":"N", "StandardAddressPMBLine":"N", "PerformSuiteLink":"N", "CanStandardAddressFormat":"D", "DPVSuccessfulStatusCondition":"A", "PerformLACSLink":"N", "PerformUSProcessing":"Y", "PerformEWS":"N", "StandardAddressFormat":"C", "SuppressZplusPhantomCarrierR777":"N", "HomeCountry":"United States", "ReportMailerAddress":"", "OutputReport3553":"N", "OutputVeriMoveDataBlock":"N", "CanDeliveryOfficeFormat":"I", "OutputAbbreviatedAlias":"N", "PerformCanadianProcessing":"N", "PerformDPV":"N", "PerformInternationalProcessing":"N", "CanSSLVRFlg":"N", "StreetMatchingStrictness":"M", "InternationalCityStreetSearching":"100", "canSwitchManagedPostalCodeConfidence":"N", "CanDualAddressLogic":"D", "PerformASM":"N", "OutputCasing":"M", "ReportListFileName":"", "CanReportMailerAddress":"", "ReportMailerCityLine":"", "CanReportMailerCPCNumber":"", "ReportListProcessorName":"", "CanOutputCityAlias":"Y", "DirectionalMatchingStrictness":"M", "CanRuralRouteFormat":"A", "CanOutputCityFormat":"D", "ReportListNumber":"1", "CanReportMailerCityLine":"", "OutputMultinationalCharacters":"N", "EnableSERP":"N", "CanNonCivicFormat":"A", "OutputShortCityName":"S", "OutputPostalCodeSeparator":"Y", "FailOnCMRAMatch":"N", "PerformLOT":"N", "OutputCountryFormat":"E", "CanPreferHouseNum":"N", "CanReportMailerName":"", "PerformRDI":"N", "ReportMailerName":"", "PerformESM":"N", "OutputReportSummary":"Y", "OutputVanityCityFormatLong":"Y", "OutputPreferredAlias":"N", "DPVDetermineNoStat":"N", "MaximumResults":"10"}}};

-- set general configuration
set pb.bdq.uam.universaladdress.general.configuration = {"dFileType":"SPLIT", "dMemoryModel":"MEDIUM", "lacsLinkMemoryModel":"MEDIUM", "suiteLinkMemoryModel":"MEDIUM"};

-- set reference path
set pb.bdq.reference.data.local.location=/media/New Volume/hduser/resources/uam/universaladdress/UAM_universaladdress4.0_Feb15;

-- set process type
set pb.bdq.uam.universaladdress.process.type=VALIDATE; 

-- set header
set pb.bdq.header=InputKeyValue,FirmName,AddressLine1,AddressLine2,City,
StateProvince,PostalCode,Text;

-- Execute Query on the desired table, to display the job output on console. This query returns a map of key value pairs containing output fields for each row.
SELECT tmp2.record["Confidence"], tmp2.record["AddressLine1"] FROM ( select uamvalidation(inputkeyvalue, firmname, addressline1, addressline2, city, stateprovince, postalcode, text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;

-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/GlobalAddressing/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE
SELECT tmp2.record["Confidence"], tmp2.record["AddressLine1"] FROM ( select uamvalidation(inputkeyvalue, firmname, addressline1, addressline2, city, stateprovince, postalcode, text) from uam_us) as addressgroup LATERAL VIEW explode(addressgroup.mygp) tmp2 as record ;

+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| address.recordid  |     address.addressline1     |  address.city   | address.stateprovince  | address.postalcode  | address.country  |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+
| 1                 | 18 Merivale St               | South Brisbane  | QLD                    | 4101                | AUS              |
| 2                 | 19 Serpentine Rd             | Albany          | WA                     | 6330                | AUS              |
| 3                 | 317 VICTORIA ST GR           | BRUNSWICK       | VIC                    | 3056                | AUS              |
| 4                 | DUPLEX 6/16-18 O'CONNELL ST  | AINSLIE         | ACT                    | 2602                | AUS              |
| 5                 | LOT 154 470 BRYGON CREEK DR  | UPPER COOMERA   | QLD                    | 4209                | AUS              |
| 6                 | 16 GREENE ST                 | WARRAWONG       | ACT                    | 2502                | AUS              |
| 7                 | UNIT 47/16 BLAIRMOUNT ST     | PARKINSON       | QLD                    | 4115                | AUS              |
| 8                 | 13-15 FRANCESCO CRES         | BELLA VISTA     | NSW                    | 2153                | AUS              |
| 9                 | 4 RYANS LANE                 | HEATHCOTE       | VIC                    | 3523                | AUS              |
| 10                | 1 CHRISTMAS LN               | NORTH POLE      | VIC                    | 1111                | AUS              |
+-------------------+------------------------------+-----------------+------------------------+---------------------+------------------+



+-----------+---------------+--------+----------------------------------+---------------+
|Confidence |StreetName     |HouseNumber |     AddressLine1             | AddressType   |
+-----------+---------------+------------+------------------------------+---------------+
| 100.00  	| MERIVALE      | 18 		 | 18 MERIVALE ST               | S    			|
| 99.42   	| SERPENTINE    | 19     	 | 19 SERPENTINE RD E           | S    			|
| 97.95   	| VICTORIA      | 317    	 | 317 VICTORIA ST              | S    			|
| 100.00  	| O'CONNELL     | 16-18  	 | DUP 6 16-18 O'CONNELL ST     | S    			|
| 0.00    	| BRYGON CREEK  | 470    	 | LOT 154 470 BRYGON CREEK DR  | U    			|
| 76.99   	| GREENE        | 16      	 | 16 GREENE ST                 | S    			|
| 100.00  	| BLAIRMOUNT    | 16     	 | U 47 16 BLAIRMOUNT ST        | S    			|
| 100.00  	| FRANCESCO     | 13-15  	 | 13-15 FRANCESCO CRES         | S    			|
| 100.00  	| RYANS         | 4      	 | 4 RYANS LANE                 | S    			|
| 0.00    	| CHRISTMAS     | 1      	 | 1 CHRISTMAS LN               | U    			|
+-----------+---------------+------------+------------------------------+---------------+