PeerTube remote runner jobs getting stuck

I want to start this post by saying thanks to everyone working on this great project. I also want to come with a couple disclaimers:

  1. I didn’t follow the official tutorial exactly as I already have a nginx reverse proxy and a certbot Docker instance.
  2. I am doing this completely on a hobby basis for the fun of it, and do not have a public instance.

That said, my issue is the following: I set up a couple local machines as remote runners, and a lot of times the job gets stuck because the runner downloads it, transcodes successfully but then fails to upload it fully, and then the job gets stuck just saying « PROCESSING ».

The log of prunner.service then says

Sep 15 14:45:40 oldie prunner[15981]:       "message": "connect ENETUNREACH [IPv6 address here]",
Sep 15 14:45:40 oldie prunner[15981]:       "stack":
Sep 15 14:45:40 oldie prunner[15981]:           Error: connect ENETUNREACH [IPv6 address here]
Sep 15 14:45:40 oldie prunner[15981]:               at TCPConnectWrap.afterConnect [as oncomplete] (node:net:1549:16)
Sep 15 14:45:40 oldie prunner[15981]:       "errno": -101,
Sep 15 14:45:40 oldie prunner[15981]:       "code": "ENETUNREACH",
Sep 15 14:45:40 oldie prunner[15981]:       "syscall": "connect",
Sep 15 14:45:40 oldie prunner[15981]:       "address": "[IPv6 address here]",
Sep 15 14:45:40 oldie prunner[15981]:       "port": 443
Sep 15 14:45:40 oldie prunner[15981]:     }

The strange thing is that sometimes it does go through, so if I restart prunner.service enough times I can get it to finish. It does take a lot of time though because it redoes downloading and transcoding too because it doesn’t keep the job until it is confirmed uploaded successfully. The actual issue seems to be the following:
image

That is a screenshot of both of my local runners, and they’re both on the same network. However, randomly PeerTube thinks the runners are on the Docker gateway network and randomly not, so whenever I refresh that page I never know whether it’s going to list the real outbound IP for the runner or the internal Docker gateway IP. I suspect that’s why the uploads fail, because somehow the nginx reverse proxy doesn’t proxy correctly at all times.

This is what I think are the relevant portions of my nginx config, just switched out the domain name to example.com to anonymize it:

# example.com/
upstream example.com {
    # Container: docker-peertube-1
    #     networks:
    #         docker_default (reachable)
    #     IP address: 172.18.0.42
    #     exposed ports (first ten): 1935/tcp 9000/tcp
    #     default port: 80
    #     using port: 9000
    #         /!\ WARNING: Virtual port published on host.  Clients
    #                      might be able to bypass nginx-proxy and
    #                      access the container's server directly.
    server 172.18.0.42:9000;
}
server {
    server_name example.com;
    access_log /var/log/nginx/access.log vhost;
    listen 80 ;
    # Do not HTTPS redirect Let's Encrypt ACME challenge
    location ^~ /.well-known/acme-challenge/ {
        auth_basic off;
        auth_request off;
        allow all;
        root /usr/share/nginx/html;
        try_files $uri =404;
        break;
    }
    location / {
        return 301 https://$host$request_uri;
    }
}
server {
    server_name example.com;
    access_log /var/log/nginx/access.log vhost;
    http2 on;
    listen 443 ssl ;
    ssl_session_timeout 5m;
    ssl_session_cache shared:SSL:50m;
    ssl_session_tickets off;
    ssl_certificate /etc/nginx/certs/example.com.crt;
    ssl_certificate_key /etc/nginx/certs/example.com.key;
    ssl_dhparam /etc/nginx/certs/example.com.dhparam.pem;
    ssl_stapling on;
    ssl_stapling_verify on;
    ssl_trusted_certificate /etc/nginx/certs/example.com.chain.pem;
    set $sts_header "";
    if ($https) {
        set $sts_header "max-age=31536000";
    }
    add_header Strict-Transport-Security $sts_header always;
    include /etc/nginx/vhost.d/example.com;
    location / {
        proxy_pass http://example.com;
        set $upstream_keepalive false;
    }
}

# configuration file /etc/nginx/vhost.d/example.com:
# Minimum Nginx version required:  1.13.0 (released Apr 25, 2017)
# Please check your Nginx installation features the following modules via 'nginx -V':
# STANDARD HTTP MODULES: Core, Proxy, Rewrite, Access, Gzip, Headers, HTTP/2, Log, Real IP, SSL, Thread Pool, Upstream, AIO Multithreading.
# THIRD PARTY MODULES:   None.

  ##
  # Performance optimizations
  # For extra performance please refer to https://github.com/denji/nginx-tuning
  ##

  # root /var/www/peertube/storage;
# 
  # Enable compression for JS/CSS/HTML, for improved client load times.
  # It might be nice to compress JSON/XML as returned by the API, but
  # leaving that out to protect against potential BREACH attack.
  gzip              on;
  gzip_vary         on;
  gzip_types        # text/html is always compressed by HttpGzipModule
                    text/css
                    application/javascript
                    font/truetype
                    font/opentype
                    application/vnd.ms-fontobject
                    image/svg+xml;
  gzip_min_length   1000; # default is 20 bytes
  gzip_buffers      16 8k;
  gzip_comp_level   2; # default is 1

  client_body_timeout       30s; # default is 60
  client_header_timeout     10s; # default is 60
  send_timeout              10s; # default is 60
  keepalive_timeout         10s; # default is 75
  resolver_timeout          10s; # default is 30
  reset_timedout_connection on;
  proxy_ignore_client_abort on;

  tcp_nopush                on; # send headers in one piece
  tcp_nodelay               on; # don't buffer data sent, good for small data bursts in real time

  ##
  # Application
  ##

  location @api {
  
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header Host            $host;
    proxy_set_header X-Real-IP       $remote_addr;

    client_max_body_size  100k; # default is 1M

    proxy_connect_timeout 10m;
    proxy_send_timeout    10m;
    proxy_read_timeout    10m;
    send_timeout          10m;

    proxy_pass http://example.com;
  }

  location ~ ^/api/v1/videos/(upload-resumable|([^/]+/source/replace-resumable))$ {
    client_max_body_size    0;
    proxy_request_buffering off;

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/users/[^/]+/imports/import-resumable$ {
    client_max_body_size    0;
    proxy_request_buffering off;

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/videos/(upload|([^/]+/studio/edit))$ {
    limit_except POST HEAD { deny all; }

    # This is the maximum upload size, which roughly matches the maximum size of a video file.
    # Note that temporary space is needed equal to the total size of all concurrent uploads.
    # This data gets stored in /var/lib/nginx by default, so you may want to put this directory
    # on a dedicated filesystem.
    client_max_body_size                      12G; # default is 1M
    add_header            X-File-Maximum-Size 8G always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/runners/jobs/[^/]+/(update|success)$ {
    client_max_body_size                      12G; # default is 1M
    add_header            X-File-Maximum-Size 8G always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/(videos|video-playlists|video-channels|users/me) {
    client_max_body_size                      6M; # default is 1M
    add_header            X-File-Maximum-Size 4M always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  ##
  # Websocket
  ##

  location @api_websocket {
    proxy_http_version 1.1;
    proxy_set_header   X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header   Host            $host;
    proxy_set_header   X-Real-IP       $remote_addr;
    proxy_set_header   Upgrade         $http_upgrade;
    proxy_set_header   Connection      "upgrade";

    proxy_pass http://example.com;
  }

  location /socket.io {
    try_files /dev/null @api_websocket;
  }

  location /tracker/socket {
    # Peers send a message to the tracker every 15 minutes
    # Don't close the websocket before then
    proxy_read_timeout 15m; # default is 60s

    try_files /dev/null @api_websocket;
  }

  # Plugin websocket routes
  location ~ ^/plugins/[^/]+(/[^/]+)?/ws/ {
    try_files /dev/null @api_websocket;
  }
  

# configuration file /etc/nginx/conf.d/my_custom_proxy_settings.conf:
client_max_body_size 4096m;

I have the same issue. sometimes runners just stop for a while to transcript/transcode. for i.e. there is a list of 257 pending jobs « waiting for parent » but no process is occuring. it would be nice to add an action like « force to process » on the batch selectbox on the runner jobs list page.

peertube 6.2.1

1 « J'aime »

my runner is registered but stuck too

Okt 02 21:51:48 16C32T prunner[66667]: [21:51:48.401] WARN (66667): Cannot connect to https://dom.tld/runners socket
Okt 02 21:51:48 16C32T prunner[66667]: err: {
Okt 02 21:51:48 16C32T prunner[66667]: « description »: {
Okt 02 21:51:48 16C32T prunner[66667]: « error »: {
Okt 02 21:51:48 16C32T prunner[66667]: « message »: « Unexpected server response: 400 »,
Okt 02 21:51:48 16C32T prunner[66667]: « stack »:
Okt 02 21:51:48 16C32T prunner[66667]: Error: Unexpected server response: 400
Okt 02 21:51:48 16C32T prunner[66667]: at ClientRequest. (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:31347:11)
Okt 02 21:51:48 16C32T prunner[66667]: at ClientRequest.emit (node:events:519:28)
Okt 02 21:51:48 16C32T prunner[66667]: at HTTPParser.parserOnIncomingClient (node:_http_client:709:27)
Okt 02 21:51:48 16C32T prunner[66667]: at HTTPParser.parserOnHeadersComplete (node:_http_common:119:17)
Okt 02 21:51:48 16C32T prunner[66667]: at TLSSocket.socketOnData (node:_http_client:551:22)
Okt 02 21:51:48 16C32T prunner[66667]: at TLSSocket.emit (node:events:519:28)
Okt 02 21:51:48 16C32T prunner[66667]: at addChunk (node:internal/streams/readable:559:12)
Okt 02 21:51:48 16C32T prunner[66667]: at readableAddChunkPushByteMode (node:internal/streams/readable:510:3)
Okt 02 21:51:48 16C32T prunner[66667]: at Readable.push (node:internal/streams/readable:390:5)
Okt 02 21:51:48 16C32T prunner[66667]: at TLSWrap.onStreamRead (node:internal/stream_base_commons:191:23)
Okt 02 21:51:48 16C32T prunner[66667]: « type »: « Error »
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « message »: « Unexpected server response: 400 »,
Okt 02 21:51:48 16C32T prunner[66667]: « stack »:
Okt 02 21:51:48 16C32T prunner[66667]:
Okt 02 21:51:48 16C32T prunner[66667]: « target »: {
Okt 02 21:51:48 16C32T prunner[66667]: « _autoPong »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _binaryType »: « nodebuffer »,
Okt 02 21:51:48 16C32T prunner[66667]: « _bufferedAmount »: 0,
Okt 02 21:51:48 16C32T prunner[66667]: « _closeCode »: 1006,
Okt 02 21:51:48 16C32T prunner[66667]: « _closeFrameReceived »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _closeFrameSent »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _closeMessage »: {
Okt 02 21:51:48 16C32T prunner[66667]: « data »: ,
Okt 02 21:51:48 16C32T prunner[66667]: « type »: « Buffer »
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « _closeTimer »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _events »: {},
Okt 02 21:51:48 16C32T prunner[66667]: « _eventsCount »: 4,
Okt 02 21:51:48 16C32T prunner[66667]: « _extensions »: {},
Okt 02 21:51:48 16C32T prunner[66667]: « _isServer »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _paused »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _protocol »: «  »,
Okt 02 21:51:48 16C32T prunner[66667]: « _readyState »: 2,
Okt 02 21:51:48 16C32T prunner[66667]: « _receiver »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _redirects »: 0,
Okt 02 21:51:48 16C32T prunner[66667]: « _req »: {
Okt 02 21:51:48 16C32T prunner[66667]: « _closed »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _contentLength »: 0,
Okt 02 21:51:48 16C32T prunner[66667]: « _defaultKeepAlive »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _ended »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _events »: {},
Okt 02 21:51:48 16C32T prunner[66667]: « _eventsCount »: 4,
Okt 02 21:51:48 16C32T prunner[66667]: « _hasBody »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _header »: « GET /socket.io/?EIO=4&transport=websocket HTTP/1.1\r\nSec-WebSocket-Version: 13\r\nSec-WebSocket-Key: SKbffiZhJ0yjhgvofJGwdQ==\r\nConnection: Upgrade\r\nUpgrade: websocket\r\nSec-WebSocket-Extensions: permessage-deflate; client_max_window_bits\r\nHost: dom.tld\r\n\r\n »,
Okt 02 21:51:48 16C32T prunner[66667]: « _headerSent »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _keepAliveTimeout »: 0,
Okt 02 21:51:48 16C32T prunner[66667]: « _last »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _removedConnection »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _removedContLen »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _removedTE »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _trailer »: «  »,
Okt 02 21:51:48 16C32T prunner[66667]: « aborted »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « agent »: {
Okt 02 21:51:48 16C32T prunner[66667]: « _events »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _eventsCount »: 2,
Okt 02 21:51:48 16C32T prunner[66667]: « _sessionCache »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « defaultPort »: 443,
Okt 02 21:51:48 16C32T prunner[66667]: « freeSockets »: {},
Okt 02 21:51:48 16C32T prunner[66667]: « keepAlive »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « keepAliveMsecs »: 1000,
Okt 02 21:51:48 16C32T prunner[66667]: « maxCachedSessions »: 100,
Okt 02 21:51:48 16C32T prunner[66667]: « maxFreeSockets »: 256,
Okt 02 21:51:48 16C32T prunner[66667]: « maxSockets »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « maxTotalSockets »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « options »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « protocol »: « https: »,
Okt 02 21:51:48 16C32T prunner[66667]: « requests »: {},
Okt 02 21:51:48 16C32T prunner[66667]: « scheduling »: « lifo »,
Okt 02 21:51:48 16C32T prunner[66667]: « sockets »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « totalSocketCount »: 1
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « chunkedEncoding »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « destroyed »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « finished »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « host »: « dom.tld »,
Okt 02 21:51:48 16C32T prunner[66667]: « maxHeadersCount »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « maxRequestsOnConnectionReached »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « method »: « GET »,
Okt 02 21:51:48 16C32T prunner[66667]: « outputData »: ,
Okt 02 21:51:48 16C32T prunner[66667]: « outputSize »: 0,
Okt 02 21:51:48 16C32T prunner[66667]: « parser »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « path »: « /socket.io/?EIO=4&transport=websocket »,
Okt 02 21:51:48 16C32T prunner[66667]: « protocol »: « https: »,
Okt 02 21:51:48 16C32T prunner[66667]: « res »: {
Okt 02 21:51:48 16C32T prunner[66667]: « _consuming »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _dumped »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _events »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _eventsCount »: 1,
Okt 02 21:51:48 16C32T prunner[66667]: « _readableState »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « aborted »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « client »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « complete »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « httpVersion »: « 1.1 »,
Okt 02 21:51:48 16C32T prunner[66667]: « httpVersionMajor »: 1,
Okt 02 21:51:48 16C32T prunner[66667]: « httpVersionMinor »: 1,
Okt 02 21:51:48 16C32T prunner[66667]: « method »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « rawHeaders »: « [Array] »,
Okt 02 21:51:48 16C32T prunner[66667]: « rawTrailers »: ,
Okt 02 21:51:48 16C32T prunner[66667]: « req »: « [Circular] »,
Okt 02 21:51:48 16C32T prunner[66667]: « socket »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « statusCode »: 400,
Okt 02 21:51:48 16C32T prunner[66667]: « statusMessage »: « Bad Request »,
Okt 02 21:51:48 16C32T prunner[66667]: « upgrade »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « url »: «  »
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « reusedSocket »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « sendDate »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « shouldKeepAlive »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « socket »: {
Okt 02 21:51:48 16C32T prunner[66667]: « _SNICallback »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _closeAfterHandlingError »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _controlReleased »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _events »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _eventsCount »: 9,
Okt 02 21:51:48 16C32T prunner[66667]: « _hadError »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _host »: « dom.tld »,
Okt 02 21:51:48 16C32T prunner[66667]: « _httpMessage »: « [Circular] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _newSessionPending »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _parent »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _pendingData »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _pendingEncoding »: «  »,
Okt 02 21:51:48 16C32T prunner[66667]: « _readableState »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _rejectUnauthorized »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _requestCert »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _secureEstablished »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « _securePending »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « _server »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _sockname »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _tlsOptions »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « _writableState »: « [Object] »,
Okt 02 21:51:48 16C32T prunner[66667]: « allowHalfOpen »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « alpnProtocol »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « authorizationError »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « authorized »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « connecting »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « encrypted »: true,
Okt 02 21:51:48 16C32T prunner[66667]: « parser »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « secureConnecting »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « servername »: « dom.tld »,
Okt 02 21:51:48 16C32T prunner[66667]: « ssl »: null
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « strictContentLength »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « timeoutCb »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « upgradeOrConnect »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « useChunkedEncodingByDefault »: false,
Okt 02 21:51:48 16C32T prunner[66667]: « writable »: true
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « _sender »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _socket »: null,
Okt 02 21:51:48 16C32T prunner[66667]: « _url »: « wss://dioxitube.com/socket.io/?EIO=4&transport=websocket »
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « type »: « ErrorEvent »
Okt 02 21:51:48 16C32T prunner[66667]: },
Okt 02 21:51:48 16C32T prunner[66667]: « message »: « websocket error »,
Okt 02 21:51:48 16C32T prunner[66667]: « stack »:
Okt 02 21:51:48 16C32T prunner[66667]: Error: websocket error
Okt 02 21:51:48 16C32T prunner[66667]: at WS.onError (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:69128:33)
Okt 02 21:51:48 16C32T prunner[66667]: at ws.onerror (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:69764:35)
Okt 02 21:51:48 16C32T prunner[66667]: at callListener (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:30539:18)
Okt 02 21:51:48 16C32T prunner[66667]: at _WebSocket.onError (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:30493:13)
Okt 02 21:51:48 16C32T prunner[66667]: at _WebSocket.emit (node:events:519:28)
Okt 02 21:51:48 16C32T prunner[66667]: at emitErrorAndClose (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:31429:17)
Okt 02 21:51:48 16C32T prunner[66667]: at process.processTicksAndRejections (node:internal/process/task_queues:82:21)
Okt 02 21:51:48 16C32T prunner[66667]: « type »: « TransportError »
Okt 02 21:51:48 16C32T prunner[66667]: }

Hi,

Please paste server logs to have more information on the 400 error

HomeServerIP - - [03/Oct/2024:12:36:04 +0200] "GET /socket.io/?EIO=4&transport=websocket HTTP/1.0" 400 34 "-" "-"

Do you use nginx? If yes please the config

Enabling debug logs can also help on peertube side

i can register the runner
but there is no reconnect
it runs successful again when i restart the service

systemctl restart prunner.service

after all jobs are done the connection is lost again
Last contact shows the end of the last job
and jobs are filling the queue and wait for my manual prunner.service restart
i dont think nginx is the point of failure

solution for now

/etc/systemd/system/prunner-restart.service

[Unit]
Description=Restart PeerTube runner service
[Service]
Type=oneshot
ExecStartPre=/bin/bash -c 'pgrep -ns 1 -x "ffmpeg" && exit 1 || true; pgrep -ns 1 -x "whisper" && exit 1 || true'
ExecStart=/bin/systemctl restart prunner.service

/etc/systemd/system/prunner-restart.timer

[Unit]
Description=Timer to restart prunner service every hour
[Timer]
OnCalendar=hourly
Persistent=true
[Install]
WantedBy=timers.target
  1. sc-daemon-reload
  2. sc-enable prunner-restart.timer
  3. sc-start prunner-restart.timer
  4. sc-status prunner-restart.timer prunner-restart.service
1 « J'aime »

Thank you for the workaround. However, I believe this will interrupt any currently running transcoding jobs, so especially for long-running jobs this won’t really help. But for short videos this would probably be a way to get all jobs completed.

this line checks if a job is running and exit if
and restart if not

2 « J'aime »

Oh, I’m sorry, I did not know that. This sounds very useful to me now, thank you very much!

I hope remote runners can get fixed upstream so this workaround will no longer be needed. Alternatively, that I could get help fixing my setup so it does not regularly error out with ENETUNREACH anymore. I do not know how to debug it further myself.

You need to fix the socket connection (error in PeerTube remote runner jobs getting stuck - #3 par orangeicebear)

Ensure your nginx configuration is up to date (PeerTube/support/nginx/peertube at develop · Chocobozzz/PeerTube · GitHub) and if yes paste peertube logs to see why it answers a 400 HTTP error

peertube runns in a lxc container
lxc host with certbot and reverse proxy
remote runner on the host and remote with same issues
all are latest peertube and runner version

only ipv4 without ssl cert

diff /etc/nginx/sites-enabled/peertube /var/www/peertube/peertube-latest/support/nginx/peertube

5a6,16
> server {
>   listen 80;
>   listen [::]:80;
>   server_name ${WEBSERVER_HOST};
> 
>   location /.well-known/acme-challenge/ {
>     default_type "text/plain";
>     root /var/www/certbot;
>   }
>   location / { return 301 https://$host$request_uri; }
> }
8c19
<   server 127.0.0.1:9000;
---
>   server ${PEERTUBE_HOST};
11a23,25
>   listen 443 ssl http2;
>   listen [::]:443 ssl http2;
>   server_name ${WEBSERVER_HOST};
13,14d26
<   listen 80;
<   server_name DOM.TLD;
18,19c30,40
<   real_ip_header X-Forwarded-For;
<   set_real_ip_from 0.0.0.0/0;
---
>   ##
>   # Certificates
>   # you need a certificate to run in production. see https://letsencrypt.org/
>   ##
>   ssl_certificate     /etc/letsencrypt/live/${WEBSERVER_HOST}/fullchain.pem;
>   ssl_certificate_key /etc/letsencrypt/live/${WEBSERVER_HOST}/privkey.pem;
> 
>   location ^~ '/.well-known/acme-challenge' {
>     default_type "text/plain";
>     root /var/www/certbot;
>   }
225,227d245
< 
<       # Don't spam access log file with byte range requests
<       access_log off;

log from the lxc container running the peertube node

2 second interval

CLIENTIPADDRESS - - [21/Oct/2024:17:29:43 +0200] « GET /socket.io/?EIO=4&transport=websocket HTTP/1.0 » 400 34 « - » « - »

Is it a peertube log or a nginx log? Can you paste peertube logs to see if the request passes through nginx?

it was a nginx log

nothing with
journalctl -u peertube.service G « socket.io »
and
journalctl -u peertube.service G « websocket »

journalctl -u peertube.service
is filled up with these records
maybe because the peertube node in the lxc container runs only on port 80

Oct 23 16:35:07 www peertube[97906]: [DOM.TLD:443] 2024-10-23 16:35:07.037 error: Client log: {« headers »:{« normalizedNames »:{},« lazyUpdate »:null},« status »:400,« statusText »:« OK »,« url »:« https://DOM.TLD/api/v1/users/token",« ok »:false,« name »:« HttpErrorResponse »,« message »:"Http failure response for https://DOM.TLD/api/v1/users/token: 400 OK »,« error »:{« type »:« PeerTube grant: refresh token is invalid »,« status »:400,« docs »:« PeerTube grant: refresh token is invalid »}} {

Oct 23 16:36:42 www peertube[97906]: « stack »: « HTTPError: Response code 404 (Not Found)\n at Request. (file:///var/www/peertube/versions/peertube-v6.3.2/node_modules/got/dist/source/as-promise/index.js:86:42)\n at Object.onceWrapper (node:events:634:26)\n at Request.emit (node:events:531:35)\n at Request._onResponseBase (file:///var/www/peertube/versions/peertube-v6.3.2/node_modules/got/dist/source/core/index.js:726:22)\n at process.processTicksAndRejections (node:internal/process/task_queues:95:5)\n at async Request._onResponse (file:///var/www/peertube/versions/peertube-v6.3.2/node_modules/got/dist/source/core/index.js:768:13) »,

Still getting socket hangups all the time.

Nov 06 20:32:12 arthur-server prunner[151890]: [20:32:12.156] INFO (151890): Checking available jobs on https://example.org
Nov 06 20:46:29 arthur-server prunner[151890]: [20:46:29.637] INFO (151890): Downloaded input file https://example.org/api/v1/runners/jobs/a628bb2a-92c2-4f0e-a26b-a0e4fdaba377/files/videos/6643d0cb-deec-4783-9723-4b482a8a5872>
Nov 07 00:46:38 arthur-server prunner[151890]: [00:46:38.499] ERROR (151890): Cannot process job
Nov 07 00:46:38 arthur-server prunner[151890]:     err: {
Nov 07 00:46:38 arthur-server prunner[151890]:       "type": "Error",
Nov 07 00:46:38 arthur-server prunner[151890]:       "message": "socket hang up",
Nov 07 00:46:38 arthur-server prunner[151890]:       "stack":
Nov 07 00:46:38 arthur-server prunner[151890]:           Error: socket hang up
Nov 07 00:46:38 arthur-server prunner[151890]:               at connResetException (node:internal/errors:720:14)
Nov 07 00:46:38 arthur-server prunner[151890]:               at TLSSocket.socketOnEnd (node:_http_client:525:23)
Nov 07 00:46:38 arthur-server prunner[151890]:               at TLSSocket.emit (node:events:529:35)
Nov 07 00:46:38 arthur-server prunner[151890]:               at endReadableNT (node:internal/streams/readable:1400:12)
Nov 07 00:46:38 arthur-server prunner[151890]:               at process.processTicksAndRejections (node:internal/process/task_queues:82:21)
Nov 07 00:46:38 arthur-server prunner[151890]:       "code": "ECONNRESET"
Nov 07 00:46:38 arthur-server prunner[151890]:     }
Nov 07 00:46:38 arthur-server prunner[151890]: [00:46:38.513] INFO (151890): Checking available jobs on https://example.org
...skipping...
Nov 06 20:32:10 arthur-server prunner[151890]:       "message": "Expected status 204, got 408. \nThe server responded: \"\".\nYou may take a closer look at the logs. To see how to do so, check out this page: https://github.com/Chocoboz>
Nov 06 20:32:10 arthur-server prunner[151890]:       "stack":
Nov 06 20:32:10 arthur-server prunner[151890]:           Error: Expected status 204, got 408.
Nov 06 20:32:10 arthur-server prunner[151890]:           The server responded: "".
Nov 06 20:32:10 arthur-server prunner[151890]:           You may take a closer look at the logs. To see how to do so, check out this page: https://github.com/Chocobozzz/PeerTube/blob/develop/support/doc/development/tests.md#debug-serve>
Nov 06 20:32:10 arthur-server prunner[151890]:               at buildRequest (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:81177:14)
Nov 06 20:32:10 arthur-server prunner[151890]:               at makeUploadRequest (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:81103:9)
Nov 06 20:32:10 arthur-server prunner[151890]:               at RunnerJobsCommand.postUploadRequest (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:81256:12)
Nov 06 20:32:10 arthur-server prunner[151890]:               at RunnerJobsCommand.success (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:81812:17)
Nov 06 20:32:10 arthur-server prunner[151890]:               at processHLSTranscoding (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:86334:29)
Nov 06 20:32:10 arthur-server prunner[151890]:               at async processJob (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:93889:7)
Nov 06 20:32:10 arthur-server prunner[151890]:           ----
Nov 06 20:32:10 arthur-server prunner[151890]:               at file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:81180:19
Nov 06 20:32:10 arthur-server prunner[151890]:               at file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:51102:17
Nov 06 20:32:10 arthur-server prunner[151890]:               at Test._assertFunction (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:51089:17)
Nov 06 20:32:10 arthur-server prunner[151890]:               at Test.assert (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:50980:27)
Nov 06 20:32:10 arthur-server prunner[151890]:               at localAssert (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:50949:18)
Nov 06 20:32:10 arthur-server prunner[151890]:               at file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:50952:11
Nov 06 20:32:10 arthur-server prunner[151890]:               at Request3.callback (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:50520:7)
Nov 06 20:32:10 arthur-server prunner[151890]:               at IncomingMessage.<anonymous> (file:///usr/local/lib/node_modules/@peertube/peertube-runner/dist/peertube-runner.js:50694:22)
Nov 06 20:32:10 arthur-server prunner[151890]:               at IncomingMessage.emit (node:events:529:35)
Nov 06 20:32:10 arthur-server prunner[151890]:               at endReadableNT (node:internal/streams/readable:1400:12)
Nov 06 20:32:10 arthur-server prunner[151890]:       "res": {
Nov 06 20:32:10 arthur-server prunner[151890]:         "req": {
Nov 06 20:32:10 arthur-server prunner[151890]:           "method": "POST",
Nov 06 20:32:10 arthur-server prunner[151890]:           "url": "https://example.org/api/v1/runners/jobs/a628bb2a-92c2-4f0e-a26b-a0e4fdaba377/success",
Nov 06 20:32:10 arthur-server prunner[151890]:           "headers": {}
Nov 06 20:32:10 arthur-server prunner[151890]:         },
Nov 06 20:32:10 arthur-server prunner[151890]:         "header": {
Nov 06 20:32:10 arthur-server prunner[151890]:           "server": "nginx/1.27.2",
Nov 06 20:32:10 arthur-server prunner[151890]:           "date": "Wed, 06 Nov 2024 20:32:08 GMT",
Nov 06 20:32:10 arthur-server prunner[151890]:           "transfer-encoding": "chunked",
Nov 06 20:32:10 arthur-server prunner[151890]:           "connection": "close",
Nov 06 20:32:10 arthur-server prunner[151890]:           "strict-transport-security": "max-age=31536000"
Nov 06 20:32:10 arthur-server prunner[151890]:         },
Nov 06 20:32:10 arthur-server prunner[151890]:         "status": 408,
Nov 06 20:32:10 arthur-server prunner[151890]:         "text": ""
Nov 06 20:32:10 arthur-server prunner[151890]:       }
Nov 06 20:32:10 arthur-server prunner[151890]:     }
Nov 06 20:32:10 arthur-server prunner[151890]: [20:32:10.984] INFO (151890): Checking available jobs on https://example.org
Nov 06 20:32:11 arthur-server prunner[151890]: [20:32:11.773] INFO (151890): [https://example.org] Processing job of type vod-hls-transcoding: a628bb2a-92c2-4f0e-a26b-a0e4fdaba377
Nov 06 20:32:11 arthur-server prunner[151890]: [20:32:11.775] INFO (151890): Downloading input file https://example.org/api/v1/runners/jobs/a628bb2a-92c2-4f0e-a26b-a0e4fdaba377/files/videos/6643d0cb-deec-4783-9723-4b482a8a587>
Nov 06 20:32:12 arthur-server prunner[151890]: [20:32:12.156] INFO (151890): Checking available jobs on https://example.org
Nov 06 20:46:29 arthur-server prunner[151890]: [20:46:29.637] INFO (151890): Downloaded input file https://example.org/api/v1/runners/jobs/a628bb2a-92c2-4f0e-a26b-a0e4fdaba377/files/videos/6643d0cb-deec-4783-9723-4b482a8a5872>
Nov 07 00:46:38 arthur-server prunner[151890]: [00:46:38.499] ERROR (151890): Cannot process job
Nov 07 00:46:38 arthur-server prunner[151890]:     err: {
Nov 07 00:46:38 arthur-server prunner[151890]:       "type": "Error",
Nov 07 00:46:38 arthur-server prunner[151890]:       "message": "socket hang up",
Nov 07 00:46:38 arthur-server prunner[151890]:       "stack":
Nov 07 00:46:38 arthur-server prunner[151890]:           Error: socket hang up
Nov 07 00:46:38 arthur-server prunner[151890]:               at connResetException (node:internal/errors:720:14)
Nov 07 00:46:38 arthur-server prunner[151890]:               at TLSSocket.socketOnEnd (node:_http_client:525:23)
Nov 07 00:46:38 arthur-server prunner[151890]:               at TLSSocket.emit (node:events:529:35)
Nov 07 00:46:38 arthur-server prunner[151890]:               at endReadableNT (node:internal/streams/readable:1400:12)
Nov 07 00:46:38 arthur-server prunner[151890]:               at process.processTicksAndRejections (node:internal/process/task_queues:82:21)
Nov 07 00:46:38 arthur-server prunner[151890]:       "code": "ECONNRESET"
Nov 07 00:46:38 arthur-server prunner[151890]:     }
Nov 07 00:46:38 arthur-server prunner[151890]: [00:46:38.513] INFO (151890): Checking available jobs on https://example.org

My (partial) docker compose config:

  peertube:
    # If you don't want to use the official image and build one from sources:
    # build:
    #   context: .
    #   dockerfile: ./support/docker/production/Dockerfile.bookworm
    image: chocobozzz/peertube:production-bookworm
    # Use a static IP for this container because nginx does not handle proxy host change without reload
    # This container could be restarted on crash or until the postgresql database is ready for connection
    networks:
      default:
      peertube-network:
        ipv4_address: 172.18.0.42
        ipv6_address: 2001:db8:2::42
    env_file:
      - ./peertube/.env
    environment:
      - VIRTUAL_PORT=9000
      - VIRTUAL_HOST=example.org
      - LETSENCRYPT_HOST=example.org
      - LETSENCRYPT_EMAIL=example.org
    ports:
     - "1935:1935" # Comment if you don't want to use the live feature
     # - "9000:9000" # Uncomment if you use another webserver/proxy or test PeerTube in local, otherwise not suitable for production
     # - "52800:52800"
    volumes:
      # Remove the following line if you want to use another webserver/proxy or test PeerTube in local
    # - assets:/app/client/dist
      - ./peertube/docker-volume/data:/data
      - ./peertube/docker-volume/config:/config
      - certs:/data/plugins/data/peertube-plugin-livechat/prosodyAppImage/squashfs-root/etc/prosody/certs
    depends_on:
      - proxy
      # - letsencrypt
      - postgres
      - redis
      - postfix
    restart: "unless-stopped"


networks:
  peertube-network:
    enable_ipv6: true
    ipam:
      driver: default
      config:
      - subnet: 172.18.0.0/16
      - subnet: 2001:db8:2::/64

My (partial) Nginx proxy config:

# example.org/
upstream example.org {
    # Container: docker-peertube-1
    #     networks:
    #         docker_default (reachable)
    #         docker_peertube-network (unreachable)
    #     IP address: 172.20.0.8
    #     exposed ports (first ten): 1935/tcp 9000/tcp
    #     default port: 80
    #     using port: 9000
    server 172.20.0.8:9000;
}
server {
    server_name example.org;
    access_log /var/log/nginx/access.log vhost;
    listen 80 ;
    # Do not HTTPS redirect Let's Encrypt ACME challenge
    location ^~ /.well-known/acme-challenge/ {
        auth_basic off;
        auth_request off;
        allow all;
        root /usr/share/nginx/html;
        try_files $uri =404;
        break;
    }
    location / {
        return 301 https://$host$request_uri;
    }
}
server {
    server_name example.org;
    access_log /var/log/nginx/access.log vhost;
    http2 on;
    listen 443 ssl ;
    ssl_session_timeout 5m;
    ssl_session_cache shared:SSL:50m;
    ssl_session_tickets off;
    ssl_certificate /etc/nginx/certs/example.org.crt;
    ssl_certificate_key /etc/nginx/certs/example.org.key;
    ssl_dhparam /etc/nginx/certs/example.org.dhparam.pem;
    ssl_stapling on;
    ssl_stapling_verify on;
    ssl_trusted_certificate /etc/nginx/certs/example.org.chain.pem;
    set $sts_header "";
    if ($https) {
        set $sts_header "max-age=31536000";
    }
    add_header Strict-Transport-Security $sts_header always;
    include /etc/nginx/vhost.d/example.org;
    location / {
        proxy_pass http://example.org;
        set $upstream_keepalive false;
    }
}

# configuration file /etc/nginx/vhost.d/example.org:
# Minimum Nginx version required:  1.13.0 (released Apr 25, 2017)
# Please check your Nginx installation features the following modules via 'nginx -V':
# STANDARD HTTP MODULES: Core, Proxy, Rewrite, Access, Gzip, Headers, HTTP/2, Log, Real IP, SSL, Thread Pool, Upstream, AIO Multithreading.
# THIRD PARTY MODULES:   None.

  ##
  # Performance optimizations
  # For extra performance please refer to https://github.com/denji/nginx-tuning
  ##

  # root /var/www/peertube/storage;
# 
  # Enable compression for JS/CSS/HTML, for improved client load times.
  # It might be nice to compress JSON/XML as returned by the API, but
  # leaving that out to protect against potential BREACH attack.
  gzip              on;
  gzip_vary         on;
  gzip_types        # text/html is always compressed by HttpGzipModule
                    text/css
                    application/javascript
                    font/truetype
                    font/opentype
                    application/vnd.ms-fontobject
                    image/svg+xml;
  gzip_min_length   1000; # default is 20 bytes
  gzip_buffers      16 8k;
  gzip_comp_level   2; # default is 1

  client_body_timeout       30s; # default is 60
  client_header_timeout     10s; # default is 60
  send_timeout              10s; # default is 60
  keepalive_timeout         10s; # default is 75
  resolver_timeout          10s; # default is 30
  reset_timedout_connection on;
  proxy_ignore_client_abort on;

  tcp_nopush                on; # send headers in one piece
  tcp_nodelay               on; # don't buffer data sent, good for small data bursts in real time

  ##
  # Application
  ##

  location @api {
  
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header Host            $host;
    proxy_set_header X-Real-IP       $remote_addr;

    client_max_body_size  100k; # default is 1M

    proxy_connect_timeout 10m;
    proxy_send_timeout    10m;
    proxy_read_timeout    10m;
    send_timeout          10m;

    proxy_pass http://example.org;
  }

  location ~ ^/api/v1/videos/(upload-resumable|([^/]+/source/replace-resumable))$ {
    client_max_body_size    0;
    proxy_request_buffering off;

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/users/[^/]+/imports/import-resumable$ {
    client_max_body_size    0;
    proxy_request_buffering off;

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/videos/(upload|([^/]+/studio/edit))$ {
    limit_except POST HEAD { deny all; }

    # This is the maximum upload size, which roughly matches the maximum size of a video file.
    # Note that temporary space is needed equal to the total size of all concurrent uploads.
    # This data gets stored in /var/lib/nginx by default, so you may want to put this directory
    # on a dedicated filesystem.
    client_max_body_size                      12G; # default is 1M
    add_header            X-File-Maximum-Size 8G always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/runners/jobs/[^/]+/(update|success)$ {
    client_max_body_size                      12G; # default is 1M
    add_header            X-File-Maximum-Size 8G always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  location ~ ^/api/v1/(videos|video-playlists|video-channels|users/me) {
    client_max_body_size                      6M; # default is 1M
    add_header            X-File-Maximum-Size 4M always; # inform backend of the set value in bytes before mime-encoding (x * 1.4 >= client_max_body_size)

    try_files /dev/null @api;
  }

  ##
  # Websocket
  ##

  location @api_websocket {
    proxy_http_version 1.1;
    proxy_set_header   X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header   Host            $host;
    proxy_set_header   X-Real-IP       $remote_addr;
    proxy_set_header   Upgrade         $http_upgrade;
    proxy_set_header   Connection      "upgrade";

    proxy_pass http://example.org;
  }

  location /socket.io {
    try_files /dev/null @api_websocket;
  }

  location /tracker/socket {
    # Peers send a message to the tracker every 15 minutes
    # Don't close the websocket before then
    proxy_read_timeout 15m; # default is 60s

    try_files /dev/null @api_websocket;
  }

  # Plugin websocket routes
  location ~ ^/plugins/[^/]+(/[^/]+)?/ws/ {
    try_files /dev/null @api_websocket;
  }
  

# configuration file /etc/nginx/conf.d/my_custom_proxy_settings.conf:
client_max_body_size 4096m;