概述集群配置及节点信息如下:
8c16g 操作系统centos7.3,操作安装用户ucmed(注意:运维的安全基线加固会影响hadoop启动)
192.168.3.184 master
192.168.3.185 slave1
192.168.3.186 slave2
| master | slave1 | slave2 | |
|---|---|---|---|
| HDFS | NamNode,DataNode | DataNode | SecondaryNameNode,DataNode | 
| YARN | NodeManager | ResourceManager,NodeManager | NodeManager | 
1.修改主机名称,配置hosts及免密登录
  
   - 
    
     
    
    
     
      sudo hostnamectl 
      set-hostname master
     
    
- 
    
     
    
    
     
      sudo hostnamectl 
      set-hostname slave1
     
    
- 
    
     
    
    
     
      sudo hostnamectl 
      set-hostname slave2
     
    
配置hosts文件
  
   - 
    
     
    
    
     
      sudo vi /etc/hosts
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      192.168.3.184 master
     
    
- 
    
     
    
    
     
      192.168.3.185 slave1
     
    
- 
    
     
    
    
     
      192.168.3.186 slave2
     
    
配置免密
  
   - 
    
     
    
    
     
      ssh-keygen -t rsa -b 2048 -v
     
    
- 
    
     
    
    
     
      拷贝  ~/.ssh/id_rsa.pub 到目标服务器并重命名
     
    
- 
    
     
    
    
     
      cp ~/.ssh/id_rsa.pub /tmp/master.pub
     
    
- 
    
     
    
    
     
      cp ~/.ssh/id_rsa.pub /tmp/slave1.pub
     
    
- 
    
     
    
    
     
      cp ~/.ssh/id_rsa.pub /tmp/slave2.pub
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      cat /tmp/master.pub  >> ~/.ssh/authorized_keys
     
    
- 
    
     
    
    
     
      cat /tmp/slave1.pub  >> ~/.ssh/authorized_keys
     
    
- 
    
     
    
    
     
      cat /tmp/slave2.pub  >> ~/.ssh/authorized_keys
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      chmod 700 ~/.ssh/
     
    
- 
    
     
    
    
     
      chmod 600 ~/.ssh/authorized_keys
     
    
2.关闭防火墙及SELinux
查看防火墙状态:systemctl status firewalld
关防火墙
  
   - 
    
     
    
    
     
      sudo systemctl 
      disable firewalld
     
    
- 
    
     
    
    
     
      sudo systemctl stop firewalld
     
    
查看SELinux状态:sestatus
关 SELinux
  
   - 
    
     
    
    
     
      #永久关闭,需要重启机器:
     
    
- 
    
     
    
    
     
      vi /etc/syscon/root/.bashrcfig/selinux
     
    
- 
    
     
    
    
     
      #下面为selinux文件中需要修改的元素
     
    
- 
    
     
    
    
     
      SELINUX=disabled
     
    
3.安装oracleJDK
复制 jdk-8u311-linux-x64.tar.gz到服务器
解压
  
   - 
    
     
    
    
     
      sudo 
      mkdir /usr/local/src/jdk
     
    
- 
    
     
    
    
     
      sudo 
      cp /tmp/jdk-8u311-linux-x64.tar.gz /usr/local/src/jdk
     
    
- 
    
     
    
    
     
      cd /usr/local/src/jdk
     
    
- 
    
     
    
    
     
      sudo tar -zxvf jdk-8u311-linux-x64.tar.gz
     
    
添加环境变量:
  
   - 
    
     
    
    
     
      sudo vim /etc/profile
     
    
- 
    
     
    
    
     
      export JAVA_HOME=/usr/local/src/jdk/jdk1.8.0_311 
     
    
- 
    
     
    
    
     
      export PATH=
      $PATH:
      $JAVA_HOME/bin
     
    
- 
    
     
    
    
     
      立即生效文件
     
    
- 
    
     
    
    
     
      source /etc/profile
     
    
java -version4. hadoop 3.3.1安装
https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
  
   - 
    
     
    
    
     
      sudo 
      mkdir /usr/local/src/hadoop
     
    
- 
    
     
    
    
     
      sudo 
      cp /tmp/hadoop-3.3.1.tar.gz /usr/local/src/hadoop/
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      sudo 
      chown -R ucmed:ucmed /usr/local/src/hadoop
     
    
- 
    
     
    
    
     
      cd /usr/local/src/hadoop
     
    
- 
    
     
    
    
     
      sudo tar -zxvf hadoop-3.3.1.tar.gz
     
    
- 
    
     
    
    
     
      cd /usr/local/src/hadoop/hadoop-3.3.1
     
    
- 
    
     
    
    
     
      sudo 
      mkdir -p /opt/hdfs/data 
     
    
- 
    
     
    
    
     
      sudo 
      mkdir -p /opt/hdfs/name
     
    
- 
    
     
    
    
     
      sudo 
      chown -R ucmed:ucmed /opt/hdfs
     
    
配置环境变量
  
   - 
    
     
    
    
     
      sudo vim /etc/profile
     
    
- 
    
     
    
    
     
      export HADOOP_HOME=/usr/local/src/hadoop/hadoop-3.3.1
     
    
- 
    
     
    
    
     
      export PATH=
      $PATH:
      $JAVA_HOME/bin:
      $HADOOP_HOME/bin
     
    
- 
    
     
    
    
     
      source /etc/profile
     
    
hadoop version5. 配置文件修改
  
   - 
    
     
    
    
     
      cd $HADOOP_HOME/etc/hadoop
     
    
- 
    
     
    
    
     
      vim core-site.xml
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      <configuration>
     
    
- 
    
     
    
    
        
      <!-- 指定NameNode的地址 -->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>fs.defaultFS
      </name>
     
    
- 
    
     
    
    
             
      <value>hdfs://master:8020
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
         
      <!-- 指定hadoop数据的存储目录 -->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>hadoop.tmp.dir
      </name>
     
    
- 
    
     
    
    
             
      <value>/opt/hdfs/data
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
         
      <!-- 配置HDFS网页登录使用的静态用户为ucmed -->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>hadoop.http.staticuser.user
      </name>
     
    
- 
    
     
    
    
             
      <value>ucmed
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
     
      </configuration>
     
    
 
  
   - 
    
     
    
    
     
      vim hdfs-site.xml
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      <configuration>
     
    
- 
    
     
    
    
         
      <!-- nn web端访问地址-->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>dfs.namenode.http-address
      </name>
     
    
- 
    
     
    
    
             
      <value>master:9870
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
         
      <!-- 2nn web端访问地址-->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>dfs.namenode.secondary.http-address
      </name>
     
    
- 
    
     
    
    
             
      <value>slave2:9868
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
     
      </configuration>
     
    
  
   - 
    
     
    
    
     
      vim yarn-site.xml
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      <configuration>
     
    
- 
    
     
    
    
     
          
      <!-- Site specific YARN configuration properties -->
     
    
- 
    
     
    
    
     
          
      <!-- 指定MR走shuffle -->
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.aux-services
      </name>
     
    
- 
    
     
    
    
     
              
      <value>mapreduce_shuffle
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <!-- 指定ResourceManager的地址-->
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.resourcemanager.hostname
      </name>
     
    
- 
    
     
    
    
     
              
      <value>slave1
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <!-- 环境变量的继承 -->
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.env-whitelist
      </name>
     
    
- 
    
     
    
    
     
              
      <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.pmem-check-enabled
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
              
      <description>检测物理内存的使用是否超出分配值,若任务超出分配值,则将其杀掉,默认true。
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.vmem-check-enabled
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
              
      <description>检测虚拟内存的使用是否超出;若任务超出分配值,则将其杀掉,默认true。在确定内存不会泄漏的情况下可以设置此项为 False;
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.vmem-pmem-ratio
      </name>
     
    
- 
    
     
    
    
     
              
      <value>8
      </value>
     
    
- 
    
     
    
    
     
              
      <description>任务每使用1MB物理内存,最多可使用虚拟内存量比率,默认2.1;在上一项中设置为false不检测虚拟内存时,此项就无意义了
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.resource.cpu-vcores
      </name>
     
    
- 
    
     
    
    
     
              
      <value>8
      </value>
     
    
- 
    
     
    
    
     
              
      <description>该节点上YARN可使用的总核心数;一般设为cat /proc/cpuinfo| grep "processor"| wc -l 的值。默认是8个;
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.nodemanager.resource.memory-mb
      </name>
     
    
- 
    
     
    
    
     
              
      <value>24576
      </value>
     
    
- 
    
     
    
    
     
              
      <description>该节点上YARN可使用的物理内存总量,【向操作系统申请的总量】默认是8192(MB)
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.scheduler.minimum-allocation-mb
      </name>
     
    
- 
    
     
    
    
     
              
      <value>1024
      </value>
     
    
- 
    
     
    
    
     
              
      <description>单个容器/调度器可申请的最少物理内存量,默认是1024(MB);一般每个contain都分配这个值;即:capacity memory:3072, vCores:1,如果提示物理内存溢出,提高这个���即可;
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.scheduler.maximum-allocation-mb
      </name>
     
    
- 
    
     
    
    
     
              
      <value>8192
      </value>
     
    
- 
    
     
    
    
     
              
      <description>单个容器申请最大值
      </description>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <description>cpu申请最小量
      </description>
     
    
- 
    
     
    
    
     
              
      <name>yarn.scheduler.minimum-allocation-vcores
      </name>
     
    
- 
    
     
    
    
     
              
      <value>1
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.timeline-service.enabled
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.timeline-service.http-cross-origin.enabled
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.webapp.api-service.enable
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.webapp.ui2.enable
      </name>
     
    
- 
    
     
    
    
     
              
      <value>true
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
          
      <property>
     
    
- 
    
     
    
    
     
              
      <name>yarn.application.classpath
      </name>
     
    
- 
    
     
    
    
     
              
      <value>
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\etc\hadoop,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\common\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\common\lib\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\hdfs\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\hdfs\lib\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\mapreduce\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\mapreduce\lib\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\yarn\*,
     
    
- 
    
     
    
    
     
                  %HADOOP_HOME%\share\hadoop\yarn\lib\*
     
    
- 
    
     
    
    
     
             
      </value>
     
    
- 
    
     
    
    
     
          
      </property>
     
    
- 
    
     
    
    
     
      </configuration>
     
    
 
  
   - 
    
     
    
    
     
      vim mapred-site.xml
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      <configuration>
     
    
- 
    
     
    
    
         
      <!-- 指定MapReduce程序运行在Yarn上 -->
     
    
- 
    
     
    
    
         
      <property>
     
    
- 
    
     
    
    
             
      <name>mapreduce.framework.name
      </name>
     
    
- 
    
     
    
    
             
      <value>yarn
      </value>
     
    
- 
    
     
    
    
         
      </property>
     
    
- 
    
     
    
    
     
      </configuration>
     
    
  
   - 
    
     
    
    
     
      vim workers
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      master
     
    
- 
    
     
    
    
     
      slave1
     
    
- 
    
     
    
    
     
      slave2
     
    
  
   - 
    
     
    
    
     
      vim hadoop-env.
      sh
     
    
- 
    
     
    
    
      
     
    
- 
    
     
    
    
     
      export 
      JAVA_HOME=
      /usr/local/src/jdk/jdk1
      .8
      .0_311
     
    
在****in/start-dfs.sh和****in/stop-dfs.sh文件添加如下内容
  
   - 
    
     
    
    
     
      HDFS_DATANODE_USER=ucmed
     
    
- 
    
     
    
    
     
      HDFS_DATANODE_SECURE_USER=ucmed
     
    
- 
    
     
    
    
     
      HDFS_NAMENODE_USER=ucmed
     
    
- 
    
     
    
    
     
      HDFS_SECONDARYNAMENODE_USER=ucmed
     
    
在****in/start-yarn.sh和****in/stop-yarn.sh文件添加如下内容
  
   - 
    
     
    
    
     
      YARN_RESOURCEMANAGER_USER=ucmed
     
    
- 
    
     
    
    
     
      HADOOP_SECURE_DN_USER=ucmed
     
    
- 
    
     
    
    
     
      YARN_NODEMANAGER_USER=ucmed
     
    
分发配置到slave1和slave2,并配置环境变量(java,hadoop)
  
   - 
    
     
    
    
     
      scp -r 
      /usr/local
      /src/hadoop
      /hadoop-3.3.1 slave1:/usr
      /local/src
      /hadoop/
     
    
- 
    
     
    
    
     
      scp -r 
      /usr/local
      /src/hadoop
      /hadoop-3.3.1 slave2:/usr
      /local/src
      /hadoop/
     
    
在master节点上初始化HDFS文件系统
hdfs namenode -format在master节点上启动hdfs
  
   - 
    
     
    
    
     
      cd /usr/local/src/hadoop/hadoop-3.3.1/s bin
     
    
- 
    
     
    
    
     
      ./start-dfs.sh
     
    
在slave1节点上启动yarn(因为yarn的主节点配置的是slave1)
cd /usr/local/src/hadoop/hadoop-3.3.1/./start-yarn.sh 验证服务
Web端查看HDFS的NameNode
Web端查看YARN的ResourceManager
6.测试
cd $HADOOP_HOME/share/hadoop/mapreduce向HDFS集群写10个10MB的文件:
  
   - 
    
     
    
    
     
      hadoop jar ./hadoop-mapreduce-client-jobclient-3.3.1-tests.jar \
     
    
- 
    
     
    
    
     
      TestDFSIO \
     
    
- 
    
     
    
    
     
      -write \
     
    
- 
    
     
    
    
     
      -nrFiles 10 \
     
    
- 
    
     
    
    
     
      -size 10MB
     
    
删除测试临时文件:
  
   - 
    
     
    
    
     
      hadoop jar hadoop-mapreduce-client-jobclient-3.3.1-tests.jar \
     
    
- 
    
     
    
    
     
      mrbench \
     
    
- 
    
     
    
    
     
      -numRuns 20 \
     
    
- 
    
     
    
    
     
      -maps 3 \
     
    
- 
    
     
    
    
     
      -reduces 3 \
     
    
- 
    
     
    
    
     
      -inputLines 5 \
     
    
- 
    
     
    
    
     
      -inputType descending
     
    
测试重复执行小作业效率
测试使用3个mapper和3个reducer运行一个小作业20次,生成输入行数为5,降序排列:
  
   - 
    
     
    
    
     
      hadoop jar hadoop-mapreduce-client-jobclient-3.3.1-tests.jar \
     
    
- 
    
     
    
    
     
      mrbench \
     
    
- 
    
     
    
    
     
      -numRuns 20 \
     
    
- 
    
     
    
    
     
      -maps 3 \
     
    
- 
    
     
    
    
     
      -reduces 3 \
     
    
- 
    
     
    
    
     
      -inputLines 5 \
     
    
- 
    
     
    
    
     
      -inputType descending
     
    
测试NameNode负载
测试使用3个mapper和3个reducer来创建100个文件:
  
   - 
    
     
    
    
     
      hadoop jar hadoop-mapreduce-client-jobclient-3.3.1-tests.jar \
     
    
- 
    
     
    
    
     
      nnbench \
     
    
- 
    
     
    
    
     
      -operation create_write \
     
    
- 
    
     
    
    
     
      -maps 3 \
     
    
- 
    
     
    
    
     
      -reduces 3 \
     
    
- 
    
     
    
    
     
      -numberOfFiles 100 \
     
    
- 
    
     
    
    
     
      -replicationFactorPerFile 3 \
     
    
- 
    
     
    
    
     
      -readFileAfterOpen true
     
    
转载:https://blog.csdn.net/weixin_38751513/article/details/128674646
 
					