Skip to content

Node.js实现分片上传、断点续传

slice upload

大文件上传会消耗大量的时间,而且中途有可能上传失败。这时我们需要前端和后端配合来解决这个问题。

解决步骤:

  1. 文件分片,减少每次请求消耗的时间
  2. 通知服务端合并文件分片
  3. 控制并发的请求数量,避免浏览器内存溢出
  4. 当因为网络或者其他原因导致某次的请求失败,我们重新发送请求
  5. 断点续传

文件的分片与合并

在JavaScript中,FIle对象是' Blob '对象的子类,该对象包含一个重要的方法slice,通过该方法我们可以这样分割二进制文件:

html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
    <script src="https://cdn.bootcdn.net/ajax/libs/axios/0.24.0/axios.min.js"></script>
</head>
<body>
    <input type="file" multiple="multiple" id="fileInput" />
    <button onclick="SliceUpload()">上传</button>  
    <script>
        function SliceUpload() {
            const file = document.getElementById('fileInput').files[0]
            if (!file) return

            // 文件分片
            let size = 1024 * 50; //50KB 50KB Section size
            let fileChunks = [];
            let index = 0;        //Section num
            for (let cur = 0; cur < file.size; cur += size) {
                fileChunks.push({
                    hash: index++,
                    chunk: file.slice(cur, cur + size),
                });
            }

            // 上传分片
            const uploadList = fileChunks.map((item, index) => {
                let formData = new FormData();
                formData.append("filename", file.name);
                formData.append("hash", item.hash);
                formData.append("chunk", item.chunk);
                return axios({
                    method: "post",
                    url: "/upload",
                    data: formData,
                });
            });
            await Promise.all(uploadList);

            // 所有分片上传完成,通知服务器合并分片
            await axios({
                method: "get",
                url: "/merge",
                params: {
                    filename: file.name,
                },
            });
            console.log("Upload to complete");
        }
    </script>
</body>
</html>

并发控制

如果文件很大,这样切分的分片会很多,浏览器短时间内就会发起大量的请求,可能会导致内存耗尽,所以要进行并发控制。

这里我们结合Promise.race()方法 控制并发请求的数量,避免浏览器内存溢出。

js
// 加入并发控制
async function SliceUpload() {
    const file = document.getElementById('fileInput').files[0]
    if (!file) return

    // 文件分片
    let size = 1024 * 50; //50KB 50KB Section size
    let fileChunks = [];
    let index = 0;        //Section num
    for (let cur = 0; cur < file.size; cur += size) {
        fileChunks.push({
            hash: index++,
            chunk: file.slice(cur, cur + size),
        });
    }

    let pool = []; //Concurrent pool
    let max = 3; //Maximum concurrency
    for (let i = 0; i < fileChunks.length; i++) {
        let item = fileChunks[i];
        let formData = new FormData();
        formData.append("filename", file.name);
        formData.append("hash", item.hash);
        formData.append("chunk", item.chunk);

        // 上传分片
        let task = axios({
            method: "post",
            url: "/upload",
            data: formData,
        });
        task.then(() => {
        // 从并发池中移除已经完成的请求
        let index = pool.findIndex((t) => t === task);
            pool.splice(index);
        });

        // 把请求放入并发池中,如果已经达到最大并发量
        pool.push(task);
        if (pool.length === max) {
            //All requests are requested complete
            await Promise.race(pool);
        }
    }

    // 所有分片上传完成,通知服务器合并分片
    await axios({
        method: "get",
        url: "/merge",
        params: {
            filename: file.name,
        },
    });
    console.log("Upload to complete");
}

使代码可复用

js
function SliceUpload() {
    const file = document.getElementById('fileInput').files[0]
    if (!file) return

    // 文件分片
    let size = 1024 * 50; // 分片大小设置
    let fileChunks = [];
    let index = 0;        // 分片序号
    for (let cur = 0; cur < file.size; cur += size) {
        fileChunks.push({
            hash: index++,
            chunk: file.slice(cur, cur + size),
        });
    }

    const uploadFileChunks = async function(list){
        if(list.length === 0){
            // 所有分片上传完成,通知如无
            await axios({
                method: 'get',
                url: '/merge',
                params: {
                    filename: file.name
                }
            });
            console.log('Upload to complete')
            return
        }

        let pool = []       // 并发池
        let max = 3         // 最大并发数
        let finish = 0      // 完成数量
        let failList = []   // 失败列表
        for(let i=0;i<list.length;i++){
            let item = list[i]
            let formData = new FormData()
            formData.append('filename', file.name)
            formData.append('hash', item.hash)
            formData.append('chunk', item.chunk)
            
            let task = axios({
                method: 'post',
                url: '/upload',
                data: formData
            })

            task.then((data)=>{
                // 从并发池中移除已经完成的请求
                let index = pool.findIndex(t=> t===task)
                pool.splice(index)
            }).catch(()=>{
                failList.push(item)
            }).finally(()=>{
                finish++
                // 如果有失败的重新上传
                if(finish===list.length){
                    uploadFileChunks(failList)
                }
            })
            pool.push(task)
            if(pool.length === max){
                await Promise.race(pool)
            }
        }
    }

    uploadFileChunks(fileChunks)
}

服务端接口实现

js
const express = require('express')
const multiparty = require('multiparty')
const fs = require('fs')
const path = require('path')
const { Buffer } = require('buffer')
// file path
const STATIC_FILES = path.join(__dirname, './static/files')
// Temporary path to upload files
const STATIC_TEMPORARY = path.join(__dirname, './static/temporary')
const server = express()
// Static file hosting
server.use(express.static(path.join(__dirname, './dist')))
// Interface for uploading slices
server.post('/upload', (req, res) => {
    const form = new multiparty.Form();
    form.parse(req, function(err, fields, files) {
        let filename = fields.filename[0]
        let hash = fields.hash[0]
        let chunk = files.chunk[0]
        let dir = `${STATIC_TEMPORARY}/${filename}`
        // console.log(filename, hash, chunk)
        try {
            if (!fs.existsSync(dir)) fs.mkdirSync(dir)
            const buffer = fs.readFileSync(chunk.path)
            const ws = fs.createWriteStream(`${dir}/${hash}`)
            ws.write(buffer)
            ws.close()
            res.send(`${filename}-${hash} Section uploaded successfully`)
        } catch (error) {
            console.error(error)
            res.status(500).send(`${filename}-${hash} Section uploading failed`)
        }
    })
})

//Merged slice interface
server.get('/merge', async (req, res) => {
    const { filename } = req.query
    try {
        let len = 0
        const bufferList = fs.readdirSync(`${STATIC_TEMPORARY}/${filename}`).map((hash,index) => {
            const buffer = fs.readFileSync(`${STATIC_TEMPORARY}/${filename}/${index}`)
            len += buffer.length
            return buffer
        });
        //Merge files
        const buffer = Buffer.concat(bufferList, len);
        const ws = fs.createWriteStream(`${STATIC_FILES}/${filename}`)
        ws.write(buffer);
        ws.close();
        res.send(`Section merge completed`);
    } catch (error) {
        console.error(error);
    }
})

server.listen(3000, _ => {
    console.log('http://localhost:3000/')
})

上边实现了文件分片上传,但是如果当文件上传了一半由于一些意外导致没有完整上传,那么下一次上传同样的文件,上面的程序会断点续传吗?答案是:不会。它还是会从头上传所有文件的切片。

如果要实现断点续传,我们需要知道已经上传切片的大小总和,然后把它当作切片的起点,继续对文件切片上传即可。

断点续传

按照上面的思路,就需要增加一个断点检测的接口,在文件上传前先调用:

js
// 检测断点
server.get('/checkpoint', (req, res) => {
  const { filename } = req.query;
  const dir = `${STATIC_TEMPORARY}/${filename}`;
  try {
    if (!fs.existsSync(dir)) {
      res.send({ success: true, point: 0, hash: 0 });
    } else {
      let len = 0;
      let hash = 0;
      fs.readdirSync(`${STATIC_TEMPORARY}/${filename}`).forEach((item, index) => {
        const buffer = fs.readFileSync(`${STATIC_TEMPORARY}/${filename}/${index}`);
        len += buffer.length;
        hash += 1;
      });
      return res.send({ success: true, point: len, hash: hash - 1 });
    }
  } catch (error) {
    console.error(error);
    res.status(500).send({ success: false, msg: error });
  }
});

这样上传之前先检测断点,如果之前已经上传过的文件,那么调用完上边的接口,前端直接处理为文件上传完成,否则就根据point计算已上传进度,根据hash指定续传分片的索引。

到此大文件分片上传,断点续传的功能就完成了。

Released under the MIT License.