Using JSON SerDe in Hive

Datetime:2016-08-22 21:50:26          Topic: Hive           Share

Using JsonSerDe in Hive

  1. Download JSON Serde – https://github.com/rcongiu/Hive-JSON-Serde
  2. Compile command for hive 1.2.1 – “mvn -Pcdh5 -Dcdh5.hive.version=1.2.1 clean package” . change hive version per the environment
  3. Copy json-serde/target/json-serde-1.3.8-SNAPSHOT-jar-with-dependencies.jar (or similar) to hive/lib
  4. Restart hive

Sample JSON with test HiveQLs

-- json sample: {"k2":"v2","k1":"v1"}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"v2","k1":"v1"}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"k2_v","k1":"k1_v","k3":{"k3_k1":"k3_k1_v"}}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string, k3 struct)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select * from json_test;
-- json sample: {"$k2":"k2_v","k1":"k1_v","k3":{"$k3_k1":"$k3_k1_v"}}
drop table json_test;
CREATE TABLE json_test ( k1 string, k2 string, k3 struct)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("mapping.k2" = "$k2", "mapping.k3_k1" = "$k3_k1");
LOAD DATA local INPATH '/private/tmp/t.json' OVERWRITE INTO TABLE json_test;
select *,k3.k3_k1 from json_test;




About List