File size: 3,436 Bytes
f478a41
 
 
 
 
ba7f421
f478a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a8c984
f478a41
 
 
 
 
 
 
 
0a8c984
f478a41
0a8c984
 
f478a41
 
 
db100dd
 
 
 
 
0a8c984
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f478a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
const express = require('express');
const fetch = require('node-fetch');

const app = express();
const TARGET_URL = 'https://rhknk53jznw37un7.us-east-1.aws.endpoints.huggingface.cloud';
const MAX_PARALLEL = 1;
const MAX_WAIT_MS = 10 * 60 * 1000; // 10 minutes

let activeRequests = 0;
const queue = [];

async function processQueue() {
  while (queue.length > 0 && activeRequests < MAX_PARALLEL) {
    const { req, res, next } = queue.shift();
    activeRequests++;
    handleRequest(req, res, next).finally(() => {
      activeRequests--;
      processQueue();
    });
  }
}

async function retryWith503Backoff(url, options, startTime) {
  let attempt = 0;
  let lastResponse = null;
  
  while (true) {
    const elapsed = Date.now() - startTime;
    if (elapsed > MAX_WAIT_MS) {
      throw new Error('Max wait time exceeded (10 minutes)');
    }
    
    const response = await fetch(url, options);
    lastResponse = response;
    
    // If successful (2xx), return immediately
    if (response.ok) {
      return response;
    }
    
    // Don't retry 401 (Unauthorized) - return immediately
    if (response.status === 401) {
      return response;
    }
    
    // For 503: retry with exponential backoff until max time
    // For other errors: retry up to 3 times, then return the error
    if (response.status === 503) {
      // Exponential backoff for 503: 1s, 2s, 4s, 8s, 16s, 32s, 64s...
      const delay = Math.min(1000 * Math.pow(2, attempt), 64000);
      attempt++;
      
      // Check if waiting would exceed max time
      if (elapsed + delay > MAX_WAIT_MS) {
        return response; // Return 503 if we'd exceed max time
      }
      
      await new Promise(resolve => setTimeout(resolve, delay));
    } else {
      // For non-503 errors, retry up to 3 times with shorter delays
      if (attempt >= 3) {
        return response; // Return the error after 3 attempts
      }
      
      // Short delay for non-503 errors: 1s, 2s, 3s
      const delay = (attempt + 1) * 1000;
      attempt++;
      
      // Check if waiting would exceed max time
      if (elapsed + delay > MAX_WAIT_MS) {
        return response;
      }
      
      await new Promise(resolve => setTimeout(resolve, delay));
    }
  }
}

async function handleRequest(req, res, next) {
  try {
    const startTime = Date.now();
    const targetUrl = TARGET_URL + req.url;
    
    const options = {
      method: req.method,
      headers: { ...req.headers, host: new URL(TARGET_URL).host },
      body: req.method !== 'GET' && req.method !== 'HEAD' ? req.body : undefined
    };
    
    const response = await retryWith503Backoff(targetUrl, options, startTime);
    
    res.status(response.status);
    response.headers.forEach((value, key) => {
      res.setHeader(key, value);
    });
    
    response.body.pipe(res);
  } catch (error) {
    res.status(504).json({ error: error.message });
  }
}

app.use(express.raw({ type: '*/*', limit: '50mb' }));

app.use((req, res, next) => {
  if (activeRequests < MAX_PARALLEL) {
    activeRequests++;
    handleRequest(req, res, next).finally(() => {
      activeRequests--;
      processQueue();
    });
  } else {
    queue.push({ req, res, next });
  }
});

const PORT = process.env.PORT || 7860;
app.listen(PORT, () => {
  console.log(`Reverse proxy listening on port ${PORT}`);
  console.log(`Proxying to: ${TARGET_URL}`);
  console.log(`Max parallel requests: ${MAX_PARALLEL}`);
});