socks_edge.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. from __future__ import annotations
  2. import asyncio
  3. import contextlib
  4. import itertools
  5. import socket
  6. import struct
  7. from dataclasses import dataclass, field
  8. from typing import Dict
  9. from .config import Config, RelayNode
  10. from .protocol import AUTH, STATUS_OK, TCP_CLOSE, TCP_DATA, TCP_OPEN, TCP_STATUS, UDP_RECV, UDP_SEND, Frame, decode_json, encode_json, read_frame, write_frame
  11. from .scheduler import Scheduler
  12. SOCKS_VERSION = 5
  13. async def read_exact(reader: asyncio.StreamReader, size: int) -> bytes:
  14. return await reader.readexactly(size)
  15. @dataclass(eq=False)
  16. class RelayLink:
  17. node: RelayNode
  18. reader: asyncio.StreamReader
  19. writer: asyncio.StreamWriter
  20. pump: asyncio.Task | None = None
  21. tcp_sessions: Dict[tuple[int, int], "TcpRaceSession"] = field(default_factory=dict)
  22. udp_server: "UdpAssociateServer | None" = None
  23. closed: bool = False
  24. async def start(self) -> None:
  25. await write_frame(self.writer, Frame(AUTH, 0, 0, 0, 0, encode_json({"token": self.node.token})))
  26. frame = await read_frame(self.reader)
  27. if frame.kind != AUTH or frame.packet_id != STATUS_OK:
  28. raise ConnectionError(f"relay auth failed: {self.node.name}")
  29. self.pump = asyncio.create_task(self._pump())
  30. async def _pump(self) -> None:
  31. try:
  32. while True:
  33. frame = await read_frame(self.reader)
  34. key = (frame.session_id, frame.stream_id)
  35. if frame.kind in (TCP_STATUS, TCP_DATA, TCP_CLOSE):
  36. session = self.tcp_sessions.get(key)
  37. if session:
  38. await session.handle_frame(self, frame)
  39. elif frame.kind == UDP_RECV and self.udp_server:
  40. await self.udp_server.handle_from_relay(frame, self)
  41. except asyncio.IncompleteReadError:
  42. pass
  43. finally:
  44. await self.close()
  45. async def send(self, frame: Frame) -> None:
  46. if self.closed:
  47. raise ConnectionError(f"relay closed: {self.node.name}")
  48. await write_frame(self.writer, frame)
  49. async def close(self) -> None:
  50. if self.closed:
  51. return
  52. self.closed = True
  53. self.writer.close()
  54. with contextlib.suppress(Exception):
  55. await self.writer.wait_closed()
  56. @dataclass
  57. class UdpFlowState:
  58. flow_id: int
  59. client_addr: tuple[str, int]
  60. target_host: str
  61. target_port: int
  62. created_at: float
  63. last_activity: float
  64. packets_sent: int = 0
  65. packets_received: int = 0
  66. duplicate_responses: int = 0
  67. winner_name: str | None = None
  68. candidate_names: tuple[str, ...] = ()
  69. def touch(self, now: float) -> None:
  70. self.last_activity = now
  71. @dataclass
  72. class TcpRaceSession:
  73. session_id: int
  74. stream_id: int
  75. target_host: str
  76. target_port: int
  77. local_reader: asyncio.StreamReader
  78. local_writer: asyncio.StreamWriter
  79. links: list[RelayLink]
  80. warmup_bytes: int
  81. winning_link: RelayLink | None = None
  82. winner_name: str | None = None
  83. opened: int = 0
  84. open_errors: list[str] = field(default_factory=list)
  85. uplink_bytes: int = 0
  86. closed: bool = False
  87. open_event: asyncio.Event = field(default_factory=asyncio.Event)
  88. winner_event: asyncio.Event = field(default_factory=asyncio.Event)
  89. pump_task: asyncio.Task | None = None
  90. win_counts: Dict[str, int] = field(default_factory=dict)
  91. async def start(self) -> None:
  92. meta = encode_json({"host": self.target_host, "port": self.target_port})
  93. for link in self.links:
  94. link.tcp_sessions[(self.session_id, self.stream_id)] = self
  95. await link.send(Frame(TCP_OPEN, self.session_id, self.stream_id, 0, 0, meta))
  96. await asyncio.wait_for(self.open_event.wait(), timeout=10)
  97. if self.opened == 0:
  98. raise ConnectionError(self.open_errors[0] if self.open_errors else "all relays failed")
  99. self.pump_task = asyncio.create_task(self._pump_local())
  100. async def _pump_local(self) -> None:
  101. try:
  102. while True:
  103. chunk = await self.local_reader.read(65536)
  104. if not chunk:
  105. break
  106. self.uplink_bytes += len(chunk)
  107. if self.winning_link is None and self.uplink_bytes <= self.warmup_bytes:
  108. await asyncio.gather(*(link.send(Frame(TCP_DATA, self.session_id, self.stream_id, 0, 0, chunk)) for link in self.links if not link.closed), return_exceptions=True)
  109. else:
  110. if self.winning_link is None:
  111. await self.winner_event.wait()
  112. if self.winning_link:
  113. await self.winning_link.send(Frame(TCP_DATA, self.session_id, self.stream_id, 0, 0, chunk))
  114. except Exception:
  115. pass
  116. finally:
  117. await self.close()
  118. async def handle_frame(self, link: RelayLink, frame: Frame) -> None:
  119. if self.closed:
  120. return
  121. if frame.kind == TCP_STATUS:
  122. if frame.packet_id == STATUS_OK:
  123. self.opened += 1
  124. else:
  125. self.open_errors.append(frame.payload.decode("utf-8", errors="replace"))
  126. if self.opened > 0 or len(self.open_errors) == len(self.links):
  127. self.open_event.set()
  128. return
  129. if frame.kind == TCP_DATA:
  130. if self.winning_link is None:
  131. self.winning_link = link
  132. self.winner_name = link.node.name
  133. self.win_counts[link.node.name] = self.win_counts.get(link.node.name, 0) + 1
  134. node_total = self.win_counts[link.node.name]
  135. relay_detail = ", ".join(f"{name}={count}" for name, count in sorted(self.win_counts.items())) or "none"
  136. print(f"[edge] tcp win session={self.session_id} target={self.target_host}:{self.target_port} winner={link.node.name} node_total={node_total} win_breakdown={relay_detail}")
  137. self.winner_event.set()
  138. await self._close_losers(except_link=link)
  139. if link is self.winning_link:
  140. self.local_writer.write(frame.payload)
  141. await self.local_writer.drain()
  142. return
  143. if frame.kind == TCP_CLOSE:
  144. if self.winning_link is None:
  145. self.winning_link = link
  146. self.winner_event.set()
  147. if link is self.winning_link:
  148. await self.close()
  149. async def _close_losers(self, except_link: RelayLink) -> None:
  150. await asyncio.gather(*(link.send(Frame(TCP_CLOSE, self.session_id, self.stream_id, 0, 0, b"")) for link in self.links if link is not except_link and not link.closed), return_exceptions=True)
  151. async def close(self) -> None:
  152. if self.closed:
  153. return
  154. self.closed = True
  155. if self.pump_task and self.pump_task is not asyncio.current_task():
  156. self.pump_task.cancel()
  157. with contextlib.suppress(Exception):
  158. await self.pump_task
  159. await asyncio.gather(*(link.send(Frame(TCP_CLOSE, self.session_id, self.stream_id, 0, 0, b"")) for link in self.links if not link.closed), return_exceptions=True)
  160. for link in self.links:
  161. link.tcp_sessions.pop((self.session_id, self.stream_id), None)
  162. self.local_writer.close()
  163. with contextlib.suppress(Exception):
  164. await self.local_writer.wait_closed()
  165. class UdpAssociateServer(asyncio.DatagramProtocol):
  166. def __init__(self, edge: "SocksEdge") -> None:
  167. self.edge = edge
  168. self.transport: asyncio.DatagramTransport | None = None
  169. self.client_addr = None
  170. self.packet_counter = itertools.count(1)
  171. self.pending: set[int] = set()
  172. self.packet_flows: dict[int, int] = {}
  173. self.client_flows: dict[tuple[tuple[str, int], str, int], UdpFlowState] = {}
  174. self.flow_counter = itertools.count(1)
  175. self.last_summary_at = 0.0
  176. self.win_counts: Dict[str, int] = {}
  177. def connection_made(self, transport) -> None:
  178. self.transport = transport
  179. def datagram_received(self, data: bytes, addr) -> None:
  180. if len(data) < 10:
  181. return
  182. if self.client_addr is None:
  183. self.client_addr = addr
  184. print(f"[edge] udp client bound addr={addr[0]}:{addr[1]}")
  185. if addr != self.client_addr:
  186. return
  187. host, port, payload = self._parse_socks_udp(data)
  188. loop = asyncio.get_running_loop()
  189. now = loop.time()
  190. flow_key = ((addr[0], addr[1]), host, port)
  191. flow = self.client_flows.get(flow_key)
  192. if flow is None:
  193. flow = UdpFlowState(
  194. flow_id=next(self.flow_counter),
  195. client_addr=(addr[0], addr[1]),
  196. target_host=host,
  197. target_port=port,
  198. created_at=now,
  199. last_activity=now,
  200. )
  201. self.client_flows[flow_key] = flow
  202. flow.touch(now)
  203. flow.packets_sent += 1
  204. packet_id = next(self.packet_counter)
  205. self.pending.add(packet_id)
  206. self.packet_flows[packet_id] = flow.flow_id
  207. print(f"[edge] udp recv flow={flow.flow_id} packet_id={packet_id} target={host}:{port} size={len(payload)}")
  208. asyncio.create_task(self.edge.forward_udp(host, port, payload, packet_id, self))
  209. self._log_udp_summary()
  210. async def handle_from_relay(self, frame: Frame, link: RelayLink) -> None:
  211. if frame.packet_id not in self.pending or self.transport is None or self.client_addr is None:
  212. return
  213. self.pending.discard(frame.packet_id)
  214. flow_id = self.packet_flows.pop(frame.packet_id, 0)
  215. host = self.edge.udp_targets.get(frame.packet_id, ("0.0.0.0", 0))[0]
  216. port = self.edge.udp_targets.get(frame.packet_id, ("0.0.0.0", 0))[1]
  217. packet = self._build_socks_udp(host, port, frame.payload)
  218. winner_log = ""
  219. flow = self._find_flow(flow_id)
  220. if flow is not None:
  221. now = asyncio.get_running_loop().time()
  222. flow.touch(now)
  223. flow.packets_received += 1
  224. if flow.winner_name is None:
  225. flow.winner_name = link.node.name
  226. self.win_counts[link.node.name] = self.win_counts.get(link.node.name, 0) + 1
  227. relay_detail = ", ".join(f"{name}={count}" for name, count in sorted(self.win_counts.items())) or "none"
  228. print(
  229. f"[edge] udp flow={flow.flow_id} winner={link.node.name} "
  230. f"target={flow.target_host}:{flow.target_port} mode=single candidates={len(flow.candidate_names) or len(self.edge.links)}"
  231. )
  232. print(f"[edge] udp win relay_breakdown={relay_detail}")
  233. elif flow.winner_name != link.node.name:
  234. flow.duplicate_responses += 1
  235. winner_log = f" duplicate=1 winner={flow.winner_name} from={link.node.name}"
  236. print(
  237. f"[edge] udp send flow={flow_id or 'unknown'} packet_id={frame.packet_id} "
  238. f"target={host}:{port} size={len(frame.payload)} relay={link.node.name}{winner_log}"
  239. )
  240. self.transport.sendto(packet, self.client_addr)
  241. self._log_udp_summary()
  242. def set_flow_candidates(self, packet_id: int, candidate_names: tuple[str, ...]) -> None:
  243. flow_id = self.packet_flows.get(packet_id)
  244. flow = self._find_flow(flow_id)
  245. if flow is not None and not flow.candidate_names:
  246. flow.candidate_names = candidate_names
  247. def note_unsent(self, packet_id: int) -> None:
  248. flow_id = self.packet_flows.pop(packet_id, 0)
  249. self.pending.discard(packet_id)
  250. flow = self._find_flow(flow_id)
  251. if flow is not None:
  252. flow.touch(asyncio.get_running_loop().time())
  253. print(f"[edge] udp drop flow={flow_id or 'unknown'} packet_id={packet_id} reason=no_available_links")
  254. self._log_udp_summary(force=True)
  255. def _find_flow(self, flow_id: int | None) -> UdpFlowState | None:
  256. if not flow_id:
  257. return None
  258. for flow in self.client_flows.values():
  259. if flow.flow_id == flow_id:
  260. return flow
  261. return None
  262. def _log_udp_summary(self, force: bool = False) -> None:
  263. now = asyncio.get_running_loop().time()
  264. if not force and now - self.last_summary_at < 10:
  265. return
  266. self.last_summary_at = now
  267. active_flows = len(self.client_flows)
  268. winners = sum(1 for flow in self.client_flows.values() if flow.winner_name)
  269. packets_sent = sum(flow.packets_sent for flow in self.client_flows.values())
  270. packets_received = sum(flow.packets_received for flow in self.client_flows.values())
  271. duplicates = sum(flow.duplicate_responses for flow in self.client_flows.values())
  272. print(
  273. f"[edge] udp summary bind={self.client_addr[0]}:{self.client_addr[1]} active_flows={active_flows} "
  274. f"winner_flows={winners} packets_sent={packets_sent} packets_received={packets_received} dup={duplicates}"
  275. if self.client_addr
  276. else f"[edge] udp summary bind=unbound active_flows={active_flows} winner_flows={winners} packets_sent={packets_sent} packets_received={packets_received} dup={duplicates}"
  277. )
  278. def _parse_socks_udp(self, packet: bytes) -> tuple[str, int, bytes]:
  279. atyp = packet[3]
  280. offset = 4
  281. if atyp == 1:
  282. host = socket.inet_ntoa(packet[offset:offset + 4])
  283. offset += 4
  284. elif atyp == 3:
  285. size = packet[offset]
  286. offset += 1
  287. host = packet[offset:offset + size].decode()
  288. offset += size
  289. else:
  290. raise ValueError("unsupported udp atyp")
  291. port = struct.unpack("!H", packet[offset:offset + 2])[0]
  292. offset += 2
  293. return host, port, packet[offset:]
  294. def _build_socks_udp(self, host: str, port: int, payload: bytes) -> bytes:
  295. try:
  296. addr = socket.inet_aton(host)
  297. header = b"\x00\x00\x00\x01" + addr + struct.pack("!H", port)
  298. except OSError:
  299. raw = host.encode()
  300. header = b"\x00\x00\x00\x03" + bytes([len(raw)]) + raw + struct.pack("!H", port)
  301. return header + payload
  302. class SocksEdge:
  303. def __init__(self, listen_host: str, listen_port: int, config: Config) -> None:
  304. self.listen_host = listen_host
  305. self.listen_port = listen_port
  306. self.config = config
  307. self.scheduler = Scheduler(config)
  308. self.links: list[RelayLink] = []
  309. self.session_ids = itertools.count(1)
  310. self.udp_targets: dict[int, tuple[str, int]] = {}
  311. self.udp_server: UdpAssociateServer | None = None
  312. async def start(self) -> None:
  313. await self.scheduler.start()
  314. await self._connect_relays()
  315. server = await asyncio.start_server(self._accept, self.listen_host, self.listen_port)
  316. sockets = ", ".join(str(sock.getsockname()) for sock in server.sockets or [])
  317. print(f"[edge] socks5 listening on {sockets}")
  318. async with server:
  319. await server.serve_forever()
  320. async def _connect_relays(self) -> None:
  321. for node in self.config.relays:
  322. reader, writer = await asyncio.open_connection(node.host, node.port)
  323. link = RelayLink(node, reader, writer)
  324. await link.start()
  325. self.links.append(link)
  326. loop = asyncio.get_running_loop()
  327. transport, protocol = await loop.create_datagram_endpoint(lambda: UdpAssociateServer(self), local_addr=(self.listen_host, 0))
  328. self.udp_server = protocol
  329. for link in self.links:
  330. link.udp_server = protocol
  331. self.udp_transport = transport
  332. async def _accept(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
  333. try:
  334. peer = writer.get_extra_info("peername")
  335. host, port, udp_mode = await self._handshake(reader, writer, peer)
  336. if udp_mode:
  337. return
  338. links = self._selected_links()
  339. session = TcpRaceSession(
  340. session_id=next(self.session_ids),
  341. stream_id=0,
  342. target_host=host,
  343. target_port=port,
  344. local_reader=reader,
  345. local_writer=writer,
  346. links=links,
  347. warmup_bytes=self.config.tcp_warmup_bytes,
  348. )
  349. await session.start()
  350. except Exception:
  351. writer.close()
  352. with contextlib.suppress(Exception):
  353. await writer.wait_closed()
  354. def _selected_links(self) -> list[RelayLink]:
  355. chosen = {node.name for node in self.scheduler.choose()}
  356. links = [link for link in self.links if link.node.name in chosen and not link.closed]
  357. return links or [link for link in self.links if not link.closed][:1]
  358. async def forward_udp(self, host: str, port: int, payload: bytes, packet_id: int, udp_server: UdpAssociateServer) -> None:
  359. self.udp_targets[packet_id] = (host, port)
  360. meta = encode_json({"host": host, "port": port})
  361. links = self._selected_links()
  362. link_names = ",".join(link.node.name for link in links) or "none"
  363. udp_server.set_flow_candidates(packet_id, tuple(link.node.name for link in links))
  364. print(f"[edge] udp forward packet_id={packet_id} target={host}:{port} size={len(payload)} links={link_names}")
  365. if not links:
  366. udp_server.note_unsent(packet_id)
  367. return
  368. for index, link in enumerate(links):
  369. body = meta + payload if index == 0 else payload
  370. await link.send(Frame(UDP_SEND, 1, index, 0, packet_id if index == 0 else 0, body))
  371. async def _handshake(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter, peer) -> tuple[str, int, bool]:
  372. version, methods_len = (await read_exact(reader, 2))
  373. if version != SOCKS_VERSION:
  374. raise ValueError("unsupported socks version")
  375. await read_exact(reader, methods_len)
  376. writer.write(b"\x05\x00")
  377. await writer.drain()
  378. version, command, _, atyp = await read_exact(reader, 4)
  379. if version != SOCKS_VERSION:
  380. raise ValueError("unsupported socks version")
  381. if atyp == 1:
  382. host = socket.inet_ntoa(await read_exact(reader, 4))
  383. elif atyp == 3:
  384. size = (await read_exact(reader, 1))[0]
  385. host = (await read_exact(reader, size)).decode()
  386. else:
  387. raise ValueError("unsupported atyp")
  388. port = struct.unpack("!H", await read_exact(reader, 2))[0]
  389. peer_text = f"{peer[0]}:{peer[1]}" if isinstance(peer, tuple) and len(peer) >= 2 else str(peer)
  390. if command == 1:
  391. print(f"[edge] socks handshake peer={peer_text} command=connect target={host}:{port}")
  392. writer.write(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00")
  393. await writer.drain()
  394. return host, port, False
  395. if command == 3 and self.udp_server and self.udp_server.transport:
  396. bind_host, bind_port = self.udp_server.transport.get_extra_info("sockname")[:2]
  397. print(f"[edge] socks handshake peer={peer_text} command=udp_associate target={host}:{port} bind={bind_host}:{bind_port}")
  398. writer.write(b"\x05\x00\x00\x01" + socket.inet_aton(bind_host) + struct.pack("!H", bind_port))
  399. await writer.drain()
  400. return host, port, True
  401. raise ValueError("unsupported socks command")