Hive的Read past end of RLE integer from compressed stream Stream for column
# 错误日志
19/05/22 09:32:59 WARN scheduler.TaskSetManager: Lost task 675.0 in stage 7.0 (TID 2242, hi-prod-10.hillinsight.com): java.io.IOException: java.io.IOException: java.lang.RuntimeException: java.io.EOFException: Read past end of RLE integer from compressed stream Stream for column 1 kind LENGTH position: 4332 length: 4332 range: 0 offset: 519193 limit: 519193 range 0 = 0 to 4332 uncompressed: 4 to 4
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121)
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.doNextWithExceptionHandler(HadoopShimsSecure.java:226)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.next(HadoopShimsSecure.java:136)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at scala.collection.convert.Wrappers$IteratorWrapper.hasNext(Wrappers.scala:29)
at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList$ResultIterator.hasNext(HiveBaseFunctionResultList.java:93)
at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:41)
at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:163)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: java.lang.RuntimeException: java.io.EOFException: Read past end of RLE integer from compressed stream Stream for column 1 kind LENGTH position: 4332 length: 4332 range: 0 offset: 519193 limit: 519193 range 0 = 0 to 4332 uncompressed: 4 to 4
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderNextException(HiveIOExceptionHandlerChain.java:121)
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderNextException(HiveIOExceptionHandlerUtil.java:77)
at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:355)
at org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:105)
at org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.doNext(CombineHiveRecordReader.java:41)
at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.next(HiveContextAwareRecordReader.java:116)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.doNextWithExceptionHandler(HadoopShimsSecure.java:224)
... 16 more
Caused by: java.lang.RuntimeException: java.io.EOFException: Read past end of RLE integer from compressed stream Stream for column 1 kind LENGTH position: 4332 length: 4332 range: 0 offset: 519193 limit: 519193 range 0 = 0 to 4332 uncompressed: 4 to 4
at org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat$VectorizedOrcRecordReader.next(VectorizedOrcInputFormat.java:95)
at org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat$VectorizedOrcRecordReader.next(VectorizedOrcInputFormat.java:49)
at org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader.doNext(HiveContextAwareRecordReader.java:350)
... 20 more
Caused by: java.io.EOFException: Read past end of RLE integer from compressed stream Stream for column 1 kind LENGTH position: 4332 length: 4332 range: 0 offset: 519193 limit: 519193 range 0 = 0 to 4332 uncompressed: 4 to 4
at org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerReaderV2.readValues(RunLengthIntegerReaderV2.java:56)
at org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerReaderV2.next(RunLengthIntegerReaderV2.java:302)
at org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerReaderV2.nextVector(RunLengthIntegerReaderV2.java:346)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl$BytesColumnVectorUtil.commonReadByteArrays(RecordReaderImpl.java:1365)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl$BytesColumnVectorUtil.readOrcByteArrays(RecordReaderImpl.java:1399)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl$StringDirectTreeReader.nextVector(RecordReaderImpl.java:1508)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl$StringTreeReader.nextVector(RecordReaderImpl.java:1347)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl$StructTreeReader.nextVector(RecordReaderImpl.java:1902)
at org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.nextBatch(RecordReaderImpl.java:3191)
at org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat$VectorizedOrcRecordReader.next(VectorizedOrcInputFormat.java:93)
... 22 more
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# 问题原因
此乃ORC文件格式的Bug,升级Hive版本或者换成RCFile等其他格式即可
网上也有说是使用小文件合并的问题,算是hive的一个bug吧
Bug链接:HIVE-10916 (opens new window)
使用以下参数将会出现这些问题:
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=256000000;
1
2
3
2
3
# 解决方案:
# 1. 升级hive版本(推荐)
升级hive到hive 1.3.x以上版本
# 2. 尝试不做小文件合并
关闭小文件合并参数
set hive.merge.mapredfiles = false;
set hive.merge.mapfiles = false;
1
2
2
# 3. 改为其他格式rcfile
create table xxx as store as rcfile
1
最后更新时间: 2022/7/23 10:17:11