Mohit Garg
Jul 20, 2017 · 2 min read

Hi Brandon,

Thanks for this article; it’s really helpful. However, I am getting the following error when trying to index a parquet file in my environment. Could you help me in understanding the root cause?

5635 [main] DEBUG org.apache.hadoop.util.Shell — Failed to detect a valid hadoop home directory
java.io.IOException: HADOOP_HOME or hadoop.home.dir are not set.
at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:351)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:376)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79)
at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:168)
at org.apache.hadoop.security.Groups.<init>(Groups.java:132)
at org.apache.hadoop.security.Groups.<init>(Groups.java:100)
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:435)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:337)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:304)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:891)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:857)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:724)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2978)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2970)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2833)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:387)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296)
at parquet.hadoop.ParquetReader.<init>(ParquetReader.java:113)
at parquet.hadoop.ParquetReader.<init>(ParquetReader.java:77)
at parquet.avro.AvroParquetReader.<init>(AvroParquetReader.java:62)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:168)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.cloudera.bkvarda.MorphlineParquetIndexer.main(MorphlineParquetIndexer.java:36)
5644 [main] DEBUG org.apache.hadoop.util.Shell — setsid exited with exit code 0
5646 [main] DEBUG org.apache.hadoop.security.Groups — Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000; warningDeltaMs=5000
5649 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — hadoop login
5650 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — hadoop login commit
5652 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — using local user:UnixPrincipal: user
5652 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — Using user: “UnixPrincipal: user” with name user
5652 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — User entry: “user”
5653 [main] DEBUG org.apache.hadoop.security.UserGroupInformation — UGI loginUser:user (auth:SIMPLE)
5685 [main] DEBUG org.apache.htrace.core.Tracer — sampler.classes = ; loaded no samplers
5686 [main] DEBUG org.apache.htrace.core.Tracer — span.receiver.classes = ; loaded no span receivers
5802 [main] DEBUG org.apache.hadoop.hdfs.BlockReaderLocal — dfs.client.use.legacy.blockreader.local = false
5802 [main] DEBUG org.apache.hadoop.hdfs.BlockReaderLocal — dfs.client.read.shortcircuit = false
5802 [main] DEBUG org.apache.hadoop.hdfs.BlockReaderLocal — dfs.client.domain.socket.data.traffic = false
5802 [main] DEBUG org.apache.hadoop.hdfs.BlockReaderLocal — dfs.domain.socket.path =
5862 [main] DEBUG org.kitesdk.morphline.stdlib.Pipe — beforeNotify()
5862 [main] DEBUG org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile — beforeNotify()
5862 [main] DEBUG org.kitesdk.morphline.stdlib.LogDebugBuilder$LogDebug — beforeNotify()
5862 [main] DEBUG org.kitesdk.morphline.avro.ExtractAvroPathsBuilder$ExtractAvroPaths — beforeNotify()
5862 [main] DEBUG org.kitesdk.morphline.solr.SanitizeUnknownSolrFieldsBuilder$SanitizeUnknownSolrFields — beforeNotify()
5862 [main] DEBUG org.kitesdk.morphline.solr.LoadSolrBuilder$LoadSolr — beforeNotify()
Exception in thread “main” org.kitesdk.morphline.api.MorphlineRuntimeException: java.lang.IllegalArgumentException: java.net.UnknownHostException: tst
at org.kitesdk.morphline.api.MorphlineContext$DefaultExceptionHandler.handleException(MorphlineContext.java:289)
at org.cloudera.bkvarda.MorphlineParquetIndexer.main(MorphlineParquetIndexer.java:45)
Caused by: java.lang.IllegalArgumentException: java.net.UnknownHostException: tst
at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:406)
at org.apache.hadoop.hdfs.NameNodeProxies.createNonHAProxy(NameNodeProxies.java:310)
at org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:176)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:728)
at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:671)
at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:155)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2815)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:98)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2852)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2834)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:387)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296)
at parquet.hadoop.ParquetReader.<init>(ParquetReader.java:113)
at parquet.hadoop.ParquetReader.<init>(ParquetReader.java:77)
at parquet.avro.AvroParquetReader.<init>(AvroParquetReader.java:62)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:168)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.cloudera.bkvarda.MorphlineParquetIndexer.main(MorphlineParquetIndexer.java:36)
Caused by: java.net.UnknownHostException: tst
… 20 more

)

    Mohit Garg

    Written by