Advertisement

Saturday, March 31, 2018

Hadoop V2 - Pre-Req Setup

In this blog I discuss on pre-req setup for Hadoop 2 installation, I have made several improvements in the pre-req setup which you will find as compared to my Hadoop 1 Blog.

Our Controller Node is namenode or nn or namenode.cluster.com
All the below steps are on namenode
My Cluster consists of 7 nodes -
Below is my /etc/hosts which I created on namenode.
All other nodes are just installed with same password on all the nodes. The roles of the nodes are pretty much self intuitive as per the names


127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
#::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

192.168.10.51 namenode.cluster.com      namenode nn
192.168.10.52 rmanager.cluster.com      rmanager rm
192.168.10.53 snamenode.cluster.com     snamenode snn
192.168.10.54 d1node.cluster.com        d1node    d1n
192.168.10.55 d2node.cluster.com        d2node    d2n
192.168.10.58 d3node.cluster.com        d3node    d3n
192.168.10.57 d4node.cluster.com        d4node    d4n

Version on which I am installing
[Linux 7 Red Hat Enterprise Linux Server release 7.3 (Maipo) 3.10.0-514.el7.x86_64]

Step 1
[As root]
Install pdsh and expect
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-2.31-1.el7.x86_64.rpm
wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/p/pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm

yum install pdsh-2.31-1.el7.x86_64.rpm pdsh-rcmd-rsh-2.31-1.el7.x86_64.rpm -y
yum install expect -y



Step 2  - Create ssh key
[As root]
[root@namenode ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
88:1c:af:e6:23:72:de:b2:76:8d:c6:86:d5:5a:96:1c root@namenode.cluster.com
The key's randomart image is:
+--[ RSA 2048]----+
|                 |
|                 |
|    .            |
|   . +E.         |
|    ooooS        |
|    ..*          |
|   +o*           |
|. *+O .          |
| =oBo.           |
+-----------------+


Step 2.1
[As root - Automation File Creation]
Create Below File (This is to setup passwordless ssh and file-copy)
(Source - http://www.techpaste.com/2013/04/shell-script-automate-ssh-key-transfer-hosts-linux/)
File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]

set username [lrange $argv 1 1]
set password [lrange $argv 2 2]

set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0

Provide Execute Permissions
chmod u+x /tmp/keysetup.exp

Create all_hosts file with a hostname present in newline for each host
[root@namenode tmp]# cat /tmp/all_hosts
nn
snn
rm
d1n
d2n
d3n
d4n
chmod 777 /tmp/all_hosts

Step 3
[As root - Passworless Ssh Setup]

Run below to setup passwordless as user root.
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} root remote_password root ; done
Now namenode can login to all hosts without password.

Step 4
[As root - sysctl file creation and secure liimits file creation]

Create Sysctl file for hadoop
[root@namenode tmp]# cat /tmp/98-sysctl-hadoop.conf
fs.file-max=6815744
fs.aio-max-nr=1048576
net.core.rmem_default=262144
net.core.wmem_default=262144
net.core.rmem_max=16777216
net.core.wmem_max=16777216
net.ipv4.tcp_rmem=4096 262144 16777216
net.ipv4.tcp_wmem=4096 262144 16777216
vm.swappiness=10

Create secur_conf.conf file for security limits
[root@namenode tmp]# cat secur_conf.conf
@hadoop soft nofile 32768
@hadoop hard nofile 32768
@hadoop soft nproc 32768
@hadoop soft nproc 32768

Step 5
[As root - Distribute sysctl configuration file and Apply ]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/98-sysctl-hadoop.conf ${i}:/etc/sysctl.d/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} mv /etc/sysctl.d/99-g03r02c00.conf /etc/sysctl.d/95-g03r02c00.conf ; done
[As root - Apply Settings ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec  /usr/sbin/sysctl --system


Step 6
[As root - Distribute limits file and Deploy]
# for i in $(cat /tmp/all_hosts) ; do scp /tmp/secur_conf.conf ${i}:/tmp/ ; done
#  for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/secur_conf.conf >> /etc/security/limits.conf'  ; done

Step 7
[As root - Disable Transparent Huge Pages Compaction]
# Create File
cat /tmp/thp.disable
echo 'never'; defrag_file_pathname

# for i in $(cat /tmp/all_hosts) ; do scp /tmp/thp.disable ${i}:/tmp/ ; done
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} 'cat /tmp/thp.disable >> /etc/rc.local'  ; done

Step 8
[As root - Disable SE Linux ]
# export WCOLL=/tmp/all_hosts
# pdsh -R exec setenforce 0

Step 9
[As root -  Reboot all machines (and wait)]
#for i in  $(cat /tmp/all_hosts) ; do ssh ${i} reboot ; done

Step 10
[As root - Hosts File Updation]
#for i in $(cat /tmp/all_hosts) ; do scp /etc/hosts ${i}:/etc/hosts; done

Step 11
[As root - Group Creation]
## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} groupadd -g 1000 hadoop" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd

[As root - User Creation]

#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1003  -g hadoop mapred" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd


#for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1004  -g hadoop yarn" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd


## for i in $(cat /tmp/all_hosts) ; do echo "ssh ${i} useradd -u 1005  -g hadoop hdfs" >> /tmp/useradd; done ; bash /tmp/useradd ; rm -f /tmp/useradd

Step 12
[As root - hdfs, mapred an yarn user password change ]
Change hduser to mapred and then yarn for executions.

Create Script as below and run it
#!/bin/bash
for server in `cat /tmp/all_hosts`; do
echo $server;
ssh ${server} 'passwd hduser <<EOF
hadoop
hadoop
EOF';
done

Step 13
[As hdfs, mapred, yarn - Setup ssh equivalency on namenode, rmanager, snamenode]

File Name - /tmp/keysetup.exp
#!/usr/bin/env expect
set host [lrange $argv 0 0]

set username [lrange $argv 1 1]
set password [lrange $argv 2 2]

set lusername [lrange $argv 3 3]
set timeout 10
spawn /usr/bin/ssh-keygen -R $host
spawn /usr/bin/ssh-copy-id -i /home/$lusername/.ssh/id_rsa.pub $username@$host
match_max 100000
expect "*?(yes/no)\?"
send -- "yes\r"
# Look for password prompt
expect {
"*?assword:*" { send -- "$password\r"; send -- "\r"; exp_continue }
eof { exit 1 }
"Now try*\r" { exit 0 }
timeout { exit 1 }
}
exit 0

Provide Execute Permissions
chmod 777 /tmp/keysetup.exp

[As hdfs]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} hdfs remote_password hdfs ; done
[As mapred]
# ssh-keygen
# for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} mapred remote_password mapred ; done
[As yarn]
# ssh-keygen
#for i in $(cat /tmp/all_hosts) ; do /tmp/keysetup.exp ${i} yarn remote_password yarn ; done


Step 14
[As root - Java Installation]
#for i in $(cat /tmp/all_hosts) ; do echo "scp jdk-8u152-linux-x64.rpm ${i}:/tmp &" >> /tmp/sendjdk.bash ; done
Paste and run contents of the file
# for i in $(cat /tmp/all_hosts) ; do  ssh ${i}  rpm -Uvh /tmp/jdk-8u152-linux-x64.rpm  ; done;

Step 15
[As root - Set Env Variables]
Create file profile.sh
cat /tmp/profile.sh
export JAVA_HOME=/usr/java/latest
export HADOOP_PREFIX=/usr/local/hadoop
export LOG=/opt/HDPV2/logs
export CONF=/etc/hadoop/conf
export PATH=$JAVA_HOME/bin:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin:$PATH

#for i in $(cat /tmp/all_hosts) ; do scp /tmp/profile.sh ${i}:/etc/profile.d/ ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} chmod 755 /etc/profile.d/profile.sh ; done
#for i in $(cat /tmp/all_hosts) ; do ssh ${i} source /etc/profile.d/profile.sh ; done

Step 16
[As root - Permissions Set]
My Mount is going to /opt/HDPV2 for my hadoop based data.
# for i in $(cat /tmp/all_hosts) ; do ssh ${i} chown root:hadoop /opt /opt/HDPV2 ; done

No comments:
Write comments