tensorflow 支持hdfs
2.6 开始需要引入 tensorflow io
才能支持viewfs
从 2.6.0 版本开始,您需要将 tensorflow_io 与 tensorflow 一起导入:
1 2
| 安装方式: pip install tensorflow-io
|
复制
1 2
| import tensorflow as tf import tensorflow_io as tfio
|
HDFS
环境变量支持
LD_LIBRARY_PATH必须支持检索到libjvm.so,libhdfs.so
CLASSPATH 必须是glob方式才可以
1 2
| export CLASSPATH=$(hadoop classpath --glob) export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_HOME/jre/lib/amd64/server/:$HADOOP_HOME/native
|
libhdfs 编译
以hadoop-2.6.0-src 为例
vim hadoop-common-project/hadoop-annotations/pom.xml
修改 1.7 改成1.8
mvn -Pdist,native clean package -DskipTests -Dmaven.javadoc.skip=true
hadoop-2.6.0-src/hadoop-common-project/hadoop-common/pom.xml
找到javahClassName 写入a.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| <javahClassName>org.apache.hadoop.io.compress.zlib.ZlibCompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.zlib.ZlibDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.bzip2.Bzip2Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor</javahClassName> <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsMapping</javahClassName> <javahClassName>org.apache.hadoop.io.nativeio.NativeIO</javahClassName> <javahClassName>org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory</javahClassName> <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping</javahClassName> <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyCompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName> <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName> <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName> <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName> <javahClassName>org.apache.hadoop.net.unix.DomainSocket</javahClassName> <javahClassName>org.apache.hadoop.net.unix.DomainSocketWatcher</javahClassName>
|
1 2
| cat a.txt|grep -E -o "org.[^<]+" > class.txt
|
class.txt 如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| org.apache.hadoop.io.compress.zlib.ZlibCompressor org.apache.hadoop.io.compress.zlib.ZlibDecompressor org.apache.hadoop.io.compress.bzip2.Bzip2Compressor org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor org.apache.hadoop.security.JniBasedUnixGroupsMapping org.apache.hadoop.io.nativeio.NativeIO org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping org.apache.hadoop.io.compress.snappy.SnappyCompressor org.apache.hadoop.io.compress.snappy.SnappyDecompressor org.apache.hadoop.io.compress.lz4.Lz4Compressor org.apache.hadoop.io.compress.lz4.Lz4Decompressor org.apache.hadoop.crypto.OpensslCipher org.apache.hadoop.crypto.random.OpensslSecureRandom org.apache.hadoop.util.NativeCrc32 org.apache.hadoop.net.unix.DomainSocket org.apache.hadoop.net.unix.DomainSocketWatcher
|
1
| cat class.txt |xargs javah -d javah -classpath $(hadoop classpath)
|
1 2 3 4
| cd hadoop-hdfs-project/hadoop-hdfs/src/ mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=$HOME/hdfs-native -DGENERATED_JAVAH=../javah -DJVM_ARCH_DATA_MODEL=64 -DREQUIRE_LIBWEBHDFS=OFF -DREQUIRE_FUSE=OFF ..
|
1 2 3 4
| libhdfs.so target/usr/local/lib/libhdfs.so header: main/native/libhdfs/hdfs.h
|
libhdfs.so api说明:
https://archive.cloudera.com/cdh4/cdh/4/hadoop-2.0.0-cdh4.6.0/hadoop-project-dist/hadoop-hdfs/LibHdfs.html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| #include <cstdio> #include "hdfs.h"
int main(int argc, char **argv) { hdfsFS fs = hdfsConnect("default", 0); const char* writePath = "/tmp/testfile.txt"; hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0); if(!writeFile) { fprintf(stderr, "Failed to open %s for writing!\n", writePath); exit(-1); } char* buffer = "Hello, World!"; tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1); if (hdfsFlush(fs, writeFile)) { fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } hdfsCloseFile(fs, writeFile); }
|
1 2
| gcc -o main -Wl,-rpath='$ORIGIN/lib' -Iinclude -Llib main.c -lhdfs CLASSPATH=$(hadoop classpath --glob) ./main
|