📋 第一步:系統檢查
# 檢查操作系統版本
cat /etc/os-release
# 檢查內核版本
uname -r
# 檢查Java是否安裝
java -version
運行結果示例:
user@server:~$ cat /etc/os-release
PRETTY_NAME="Ubuntu 24.04.2 LTS"
NAME="Ubuntu"
VERSION_ID="24.04"
user@server:~$ uname -r
6.8.0-64-generic
user@server:~$ java -version
Command 'java' not found, but can be installed with:
sudo apt install default-jre
⚡ 第二步:安裝Java
# 更新軟件包列表
sudo apt-get update
# 安裝OpenJDK 8
sudo apt-get install openjdk-8-jdk -y
# 驗證Java安裝
java -version
javac -version
# 查找Java安裝路徑
sudo update-alternatives --config java
# 設置JAVA_HOME環境變量
echo 'export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64' >> ~/.bashrc
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> ~/.bashrc
# 使環境變量生效
source ~/.bashrc
# 驗證JAVA_HOME設置
echo $JAVA_HOME
運行結果示例:
user@server:~$ java -version
openjdk version "1.8.0_462"
OpenJDK Runtime Environment (build 1.8.0_462-8u462-ga~us1-0ubuntu2~24.04.2-b08)
user@server:~$ javac -version
javac 1.8.0_462
user@server:~$ echo $JAVA_HOME
/usr/lib/jvm/java-8-openjdk-amd64
🗄️ 第三步:安裝Hadoop
# 進入用户主目錄
cd ~
# 創建軟件目錄
mkdir -p software
cd software
# 下載Hadoop 3.3.4
wget https://archive.apache.org/dist/hadoop/common/hadoop-3.3.4/hadoop-3.3.4.tar.gz
# 解壓Hadoop
tar -xzf hadoop-3.3.4.tar.gz
# 移動到標準安裝目錄
sudo mv hadoop-3.3.4 /opt/hadoop
# 更改文件所有權
sudo chown -R $(whoami):$(whoami) /opt/hadoop
# 配置Hadoop環境變量
echo 'export HADOOP_HOME=/opt/hadoop' >> ~/.bashrc
echo 'export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> ~/.bashrc
echo 'export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop' >> ~/.bashrc
# 使環境變量生效
source ~/.bashrc
# 驗證Hadoop安裝
hadoop version
運行結果示例:
user@server:~/software$ hadoop version
Hadoop 3.3.4
Source code repository https://github.com/apache/hadoop.git -r a585a73c3e02ac62350c136643a5e7f6095a3dbb
Compiled by stevel on 2022-07-29T12:32Z
This command was run using /opt/hadoop/share/hadoop/common/hadoop-common-3.3.4.jar
⚙️ 第四步:配置Hadoop
# 進入Hadoop配置目錄
cd $HADOOP_HOME/etc/hadoop
# 配置core-site.xml - 設置HDFS默認地址
cat > core-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop-tmp</value>
</property>
</configuration>
EOF
# 配置hdfs-site.xml - 設置HDFS副本數
cat > hdfs-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/tmp/hadoop/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/tmp/hadoop/datanode</value>
</property>
</configuration>
EOF
# 配置mapred-site.xml - 設置MapReduce框架
cat > mapred-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
# 配置yarn-site.xml - 設置YARN資源管理
cat > yarn-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
</property>
</configuration>
EOF
# 配置hadoop-env.sh - 設置Java路徑
echo "export JAVA_HOME=$JAVA_HOME" >> hadoop-env.sh
echo "export HADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true\"" >> hadoop-env.sh
運行結果示例:
user@server:/opt/hadoop/etc/hadoop$ ls -la *.xml
-rw-r--r-- 1 user user 450 Oct 28 10:45 core-site.xml
-rw-r--r-- 1 user user 520 Oct 28 10:45 hdfs-site.xml
-rw-r--r-- 1 user user 300 Oct 28 10:45 mapred-site.xml
-rw-r--r-- 1 user user 550 Oct 28 10:45 yarn-site.xml
🔐 第五步:SSH配置
# 生成SSH密鑰對(無密碼)
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
# 將公鑰添加到授權文件
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
# 設置正確的文件權限
chmod 600 ~/.ssh/authorized_keys
# 測試SSH連接到本地
ssh localhost
# 連接成功後退出SSH
exit
運行結果示例:
user@server:~$ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
Generating public/private rsa key pair.
Your identification has been saved in /home/user/.ssh/id_rsa
user@server:~$ ssh localhost
Welcome to Ubuntu 24.04.2 LTS
user@server:~$ exit
logout
🚀 第六步:啓動Hadoop服務
# 創建Hadoop需要的臨時目錄
sudo mkdir -p /tmp/hadoop-tmp /tmp/hadoop/namenode /tmp/hadoop/datanode
sudo chown -R $(whoami):$(whoami) /tmp/hadoop*
# 格式化HDFS Namenode(首次安裝執行)
hdfs namenode -format
# 啓動HDFS服務
start-dfs.sh
# 啓動YARN服務
start-yarn.sh
# 檢查Java進程,確認服務正常運行
jps
# 檢查HDFS狀態
hdfs dfsadmin -report
運行結果示例:
user@server:~$ jps
614456 ResourceManager
613900 DataNode
614831 NodeManager
615043 Jps
613682 NameNode
614151 SecondaryNameNode
user@server:~$ hdfs dfsadmin -report
Configured Capacity: 41882943488 (39.01 GB)
Present Capacity: 19895091200 (18.53 GB)
DFS Remaining: 19895066624 (18.53 GB)
DFS Used: 24576 (24 KB)
DFS Used%: 0.00%
Live datanodes (1):
📁 第七步:創建項目結構
# 回到用户主目錄
cd ~
# 創建標準的MapReduce項目目錄結構
mkdir -p mapreduce_lab/src/main/java
mkdir -p mapreduce_lab/target/classes
mkdir -p mapreduce_lab/lib
mkdir -p mapreduce_lab/input
# 驗證目錄結構
cd mapreduce_lab
find . -type d
運行結果示例:
user@server:~/mapreduce_lab$ find . -type d
.
./lib
./target
./target/classes
./src
./src/main
./src/main/java
./input
🔧 第八步:配置開發環境
# 確保在項目根目錄
cd ~/mapreduce_lab
# 複製Hadoop核心JAR文件到lib目錄
cp $HADOOP_HOME/share/hadoop/common/hadoop-common-3.3.4.jar lib/
cp $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-core-3.3.4.jar lib/
cp $HADOOP_HOME/share/hadoop/common/lib/*.jar lib/
# 查看複製的JAR文件
ls -la lib/ | head -5
# 創建編譯腳本
cat > compile.sh << 'EOF'
#!/bin/bash
export HADOOP_CLASSPATH=$(hadoop classpath)
javac -cp $HADOOP_CLASSPATH:lib/* -d target/classes src/main/java/*.java
echo "編譯完成!"
EOF
# 創建打包腳本
cat > build.sh << 'EOF'
#!/bin/bash
./compile.sh
jar cf mapreduce-lab.jar -C target/classes .
echo "JAR包打包完成:mapreduce-lab.jar"
EOF
# 給腳本執行權限
chmod +x compile.sh build.sh
# 驗證腳本創建
ls -la *.sh
運行結果示例:
user@server:~/mapreduce_lab$ ls -la lib/ | head -5
total 41500
drwxrwxr-x 2 user user 4096 Oct 28 10:48 .
drwxrwxr-x 6 user user 4096 Oct 28 10:43 ..
-rw-r--r-- 1 user user 29489 Oct 28 10:48 accessors-smart-2.4.7.jar
user@server:~/mapreduce_lab$ ls -la *.sh
-rwxrwxr-x 1 user user 116 Oct 28 10:49 build.sh
-rwxrwxr-x 1 user user 152 Oct 28 10:49 compile.sh
✅ 最終驗證
# 驗證Hadoop服務狀態
jps
# 驗證HDFS狀態
hdfs dfsadmin -report
# 驗證Hadoop classpath配置
hadoop classpath
# 測試編譯環境(即使沒有Java文件也應該能運行)
./compile.sh
運行結果示例:
user@server:~/mapreduce_lab$ jps
614456 ResourceManager
613900 DataNode
614831 NodeManager
615043 Jps
613682 NameNode
614151 SecondaryNameNode
user@server:~/mapreduce_lab$ hadoop classpath
/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*...
user@server:~/mapreduce_lab$ ./compile.sh
編譯完成!
本文章為轉載內容,我們尊重原作者對文章享有的著作權。如有內容錯誤或侵權問題,歡迎原作者聯繫我們進行內容更正或刪除文章。