A snapshot is a point-in-time image of the entire filesystem or a subtree of a filesystem. Some of the scenarios where snapshots are very useful:
有時候我們會為當時的資料夾做一個快照,然後使用oozie的 job定時的discp 到他的cluster上備份。
要還原的時候只要把 snapshot 的檔案跟資料夾 搬到該目錄就好了。
要還原的時候只要把 snapshot 的檔案跟資料夾 搬到該目錄就好了。
from :
Snapshots for HDFS | Hortonworks
HFDS Snapshots
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html
http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html
Step by Step Example
你可以試著操作sanpshot 的流程
- 建立放置檔案的hdfs 資料夾
root@w0[~]{11:49}
# hadoop fs -mkdir /user/root
mkdir: Permission denied: user=root, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
# hadoop fs -mkdir /user/root
mkdir: Permission denied: user=root, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
root@w0[~]{11:50}
# su hdfs
hdfs@w0[/root]{11:50}
$ hadoop fs -mkdir /user/root
hdfs@w0[/root]{11:50}
$ hadoop fs -chown root /user/root
root@w0[~]{11:51}
# hadoop fs -put epel-release-6-8.noarch.rpm /user/root/data
- 設置 root/data 資料夾 可以 Snapshot
root@w0[~]{11:51}
# su -l hdfs -c "hdfs dfsadmin -allowSnapshot /user/root/data"
Allowing snaphot on /user/root/data succeeded
root@w0[~]{11:52}
# hdfs dfs -createSnapshot /user/root/data
Created snapshot /user/root/data/.snapshot/s20140429-115245.568
root@w0[~]{11:52}
# hdfs dfs -createSnapshot /user/root/data ss01
Created snapshot /user/root/data/.snapshot/ss01
- 試著刪除該資料夾
root@w0[~]{11:52}
# hadoop fs -rm -R data
14/04/29 11:53:08 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 21600000 minutes, Emptier interval = 0 minutes.
rm: Failed to move to trash: hdfs://w0:8020/user/root/data. Consider using -skipTrash option
root@w0[~]{11:53}
# hadoop fs -rm -R -skipTrash data
rm: The directory /user/root/data cannot be deleted since /user/root/data is snapshottable and already has snapshots
root@w0[~]{11:53}
# hadoop fs -ls data
Found 2 items
-rw-r--r-- 3 root hdfs 14 2014-04-29 11:51 data/aaa.txt
-rw-r--r-- 3 root hdfs 14540 2014-04-29 11:51 data/epel-release-6-8.noarch.rpm
在這裡可以注意一下 在刪除 snapshot dir 會給的提示。
root@w0[~]{11:54}
# hadoop fs -ls -R -skipTrash data
-ls: Illegal option -skipTrash
Usage: hadoop fs [generic options] -ls [-d] [-h] [-R] [<path> ...]
root@w0[~]{11:54}
# hadoop fs -ls -R data
-rw-r--r-- 3 root hdfs 14 2014-04-29 11:51 data/aaa.txt
-rw-r--r-- 3 root hdfs 14540 2014-04-29 11:51 data/epel-release-6-8.noarch.rpm
root@w0[~]{11:54}
# hadoop fs -rm data/aaa.txt
14/04/29 11:54:47 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 21600000 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://w0:8020/user/root/data/aaa.txt' to trash at: hdfs://w0:8020/user/root/.Trash/Current
root@w0[~]{11:54}
# hadoop fs -rm data/epel-release-6-8.noarch.rpm
14/04/29 11:55:01 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 21600000 minutes, Emptier interval = 0 minutes.
Moved: 'hdfs://w0:8020/user/root/data/epel-release-6-8.noarch.rpm' to trash at: hdfs://w0:8020/user/root/.Trash/Current
root@w0[~]{11:55}
# hadoop fs -ls -R data
root@w0[~]{11:55}
# hadoop fs -ls -R data/.snapshot
drwxr-xr-x - root hdfs 0 2014-04-29 11:52 data/.snapshot/s20140429-115245.568
-rw-r--r-- 3 root hdfs 14 2014-04-29 11:51 data/.snapshot/s20140429-115245.568/aaa.txt
-rw-r--r-- 3 root hdfs 14540 2014-04-29 11:51 data/.snapshot/s20140429-115245.568/epel-release-6-8.noarch.rpm
drwxr-xr-x - root hdfs 0 2014-04-29 11:52 data/.snapshot/ss01
-rw-r--r-- 3 root hdfs 14 2014-04-29 11:51 data/.snapshot/ss01/aaa.txt
-rw-r--r-- 3 root hdfs 14540 2014-04-29 11:51 data/.snapshot/ss01/epel-release-6-8.noarch.rpm