サンプル Hive スクリプト

-- Register Universal Name Module [UNM] BDQ Hive UDF Jar 
ADD JAR <Directory path>/unm.hive.${project.version}.jar;

-- Provide alias to UDF class (optional). String in quotes represent class names needed for this job to run.
-- Open Name Parser is implemented as a UDF (User Defined function). Hence it processes one row at a time and generates a map of key value pairs for each row.
CREATE TEMPORARY FUNCTION opennameparser as 'com.pb.bdq.unm.process.hive.opennameparser.OpenNameParserUDF';

-- set rule
set rule='{"name":"name", "culture":"", "splitConjoinedNames":false, "shortcutThreshold":0, "parseNaturalOrderPersonalNames":false, "naturalOrderPersonalNamesPriority":1, "parseReverseOrderPersonalNames":false, "reverseOrderPersonalNamesPriority":2, "parseConjoinedNames":false, "naturalOrderConjoinedPersonalNamesPriority":3, "reverseOrderConjoinedPersonalNamesPriority":4, "parseBusinessNames":false, "businessNamesPriority":5}';

-- Set Reference Directory. This must be a local path on cluster machines and must be present at the same path on each node of the cluster.
set refdir='/home/hadoop/reference/';

-- set header
set header='inputrecordid,Name,nametype';

-- Execute Query on the desired table, to display the job output on console. This query returns a map of key value pairs containing output fields for each row.
select adTable.adid["Name"], adTable.adid["NameScore"], adTable.adid["CultureCode"] from (select opennameparser(${hiveconf:rule}, ${hiveconf:refdir}, ${hiveconf:header}, inputrecordid, name, nametype) as tmp1 from nameparser) as tmp LATERAL VIEW explode(tmp1) adTable AS adid;


-- Query to dump output data to a file
INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/opennameparser/' row format delimited FIELDS TERMINATED BY ',' lines terminated by '\n' STORED AS TEXTFILE 
select adTable.adid["Name"], adTable.adid["NameScore"], adTable.adid["CultureCode"] from (select opennameparser(${hiveconf:rule}, ${hiveconf:refdir}, ${hiveconf:header}, inputrecordid, name, nametype) as tmp1 from nameparser) as tmp LATERAL VIEW explode(tmp1) adTable AS adid;



--sample input data
+----------------------------------+--------------------------+-----------------------+
| inputrecordid					   | name				      |      nametype         |
+----------------------------------+--------------------------+-----------------------+
| 1                                |JOHN VAN DER LINDEN-JONES |     Simple Name		  |
| 2                                |RYAN JOHN SMITH           | 	Simple Name   	  |
+----------------------------------+--------------------------+-----------------------+

--sample output data
+----------------------------+--------------+---------------------+
| Name  				     | NameScore    |      CultureCode    |
+----------------------------+--------------+---------------------+
| JOHN VAN DER LINDEN-JONES  |   75         |      	True          |
| RYAN JOHN SMITH            |   100        |       True          |
+----------------------------+-----+------------------------------+