#!/usr/bin/env bash
set -x
set -e
target_date=`date -d last-day +%Y-%m-%d`
echo 'it works!'
echo "target_date = " $target_date
hive -hiveconf target_date=$target_date -f preprocess.hql
pig \
-useHCatalog \
-Dmapreduce.job.acl-view-job=* \
-Dmapreduce.job.queuename=root.bdp_jmart_tbi_union.bdp_jmart_tbi_dev \
-Dmapred.child.java.opts='-Xmx8192m ' \
-p start_date=$target_date \
-p target_date=$target_date \
es_loader.pig 2>&1
echo 'DONE!'
******************************************************************************
CREATE TABLE IF NOT EXISTS xxx.yyy (
album_id STRING,
album_name STRING,
album_size BIGINT,
album_subtype STRING,
album_type STRING,
alias STRING,
gender BIGINT,
id STRING,
mp3_source STRING,
name STRING,
nation BIGINT,
popularity BIGINT,
position BIGINT,
singer_id STRING,
singer_name STRING,
song_tags STRING
)
PARTITIONED BY(dt STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\u0001'
STORED AS TEXTFILE;
ALTER TABLE xxx.yyy DROP IF EXISTS PARTITION (dt='${hiveconf:target_date}');
****************************************************************************************
REGISTER elasticsearch-hadoop-pig-5.4.1.jar;
REGISTER yyy.jar;
DEFINE YYYFormatter com.jd.nlp.pig.udf.YYYFormatter();
DEFINE EsStorage org.elasticsearch.hadoop.pig.EsStorage (
'es.http.timeout= 5m',
'es.index.auto.create = true',
'es.mapping.pig.tuple.use.field.names = true',
'es.nodes = [ip]',
'es.mapping.id = id',
'es.write.operation = upsert',
'es.output.json=true'
);
data = LOAD 'index/type' USING EsStorage('es.query:?q=*');
data =FOREACH data GENERATE YYYFormatter(*);
decoupledata = FOREACH data GENERATE flatten($0) AS (album_id:chararray,
album_name:chararray,
album_size:long,
album_subtype:chararray,
album_type:chararray,
alias:chararray,
gender:long,
id:chararray,
mp3_source:chararray,
name:chararray,
nation:long,
popularity:long,
position:long,
singer_id:chararray,
singer_name:chararray,
song_tags:chararray
);
data = FOREACH decoupledata GENERATE *, '$target_date' AS dt;
STORE data INTO 'xxx.yyy' USING org.apache.hive.hcatalog.pig.HCatStorer() PARALLEL 1;
**********************************************************************************************
import com.alibaba.fastjson.JSONObject;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import com.alibaba.fastjson.JSON;
import java.io.IOException;
public class YYYFormatter extends EvalFunc<Tuple> {
private TupleFactory tupleFactory = TupleFactory.getInstance();
@Override
public Tuple exec(Tuple tuple) throws IOException {
String jsonString = (String) tuple.get(0);
Tuple result = tupleFactory.newTuple();
if (jsonString.isEmpty()) {
throw new RuntimeException();
}
ZZZBean yyyBean= JSON.parseObject(jsonString, MiguSongBean.class);
result.append(yyyBean .getAlbumId());
result.append(yyyBean .getAlbumName());
return result;
}
}