linux 下udp打洞原理分析

做過P2P通信的人應該都會用到這個技術,udp打洞。通過udp打洞,可以實現兩個nat後的主機進行直接通信。
wiki中關於udp打洞的描述:

通過UDP打洞實現NAT穿越是一種在處於使用了NAT的私有網絡中的Internet主機之間建立雙向UDP連接的方法。由於NAT的行爲是非標準化的,因此它並不能應用於所有類型的NAT。

其基本思想是這樣的:讓位於NAT後的兩臺主機都與處於公共地址空間的、衆所周知的第三臺服務器相連,然後,一旦NAT設備建立好UDP狀態信息就轉爲直接通信,並寄希望於NAT設備會在分組其實是從另外一個主機傳送過來的情況下仍然保持當前狀態。

這項技術需要一個圓錐型NAT設備才能夠正常工作。對稱型NAT不能使用這項技術。

這項技術在P2P軟件和VoIP電話領域被廣泛採用。它是Skype用以繞過防火牆和NAT設備的技術之一。

相同的技術有時還被用於TCP連接——儘管遠沒有UDP成功。


UDP打洞的過程大體上如下:

主機A和主機B都是通過NAT設備訪問互聯網,主機S位於互聯網上。

1. A和B都與S之間通過UDP進行心跳連接

2. A通知S,要與B通信

3. S把B的公網IP、port告訴A,同時把A的公網IP、port告訴B

4. A向B的公網IP、port發送數據(這個數據包應該會被丟棄,但是打開了B回來的窗戶)

5. B向A的公網IP、port發送數據(這個數據包就會被A接受,之後A和B就建立起了連接)


上述能夠正常工作的前提的,A連接S和連接B的時候,NAT設備對A做地址轉換的時候,需要選擇相同的IP、port。

比如:A--->NATA--->S

      A--->NATA--->NATB-->B

那麼NATA對A進行NAT的時候,需要選擇相同的IP和port進行SNAT。

如果使用Linux作爲防火牆,那麼非常幸運,Linux就是這麼搞的

現在我們來看看linux是如何實現的:

linux通過SNAT netfilter target 進行源地址轉換,源碼位於net/ipv4/netfilter/nf_nat_rule.c

點擊(此處)摺疊或打開

  1. /* Source NAT */
  2. static unsigned int
  3. ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
  4. {
  5.     struct nf_conn *ct;
  6.     enum ip_conntrack_info ctinfo;
  7.     const struct nf_nat_multi_range_compat *mr = par->targinfo;

  8.     NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);

  9.     ct = nf_ct_get(skb, &ctinfo);

  10.     /* Connection must be valid and new. */
  11.     NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
  12.              ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
  13.     NF_CT_ASSERT(par->out != NULL);

  14.     return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
  15. }
這裏實際上調用函數nf_nat_setup_info(net/ipv4/netfilter/nf_nat_core.c)

點擊(此處)摺疊或打開

  1. unsigned int
  2. nf_nat_setup_info(struct nf_conn *ct,
  3.          const struct nf_nat_range *range,
  4.          enum nf_nat_manip_type maniptype)
  5. {
  6.     struct net *net = nf_ct_net(ct);
  7.     struct nf_conntrack_tuple curr_tuple, new_tuple;
  8.     struct nf_conn_nat *nat;
  9.     int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);

  10.     /* nat helper or nfctnetlink also setup binding */
  11.     nat = nfct_nat(ct);
  12.     if (!nat) {
  13.         nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
  14.         if (nat == NULL) {
  15.             pr_debug("failed to add NAT extension\n");
  16.             return NF_ACCEPT;
  17.         }
  18.     }

  19.     NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
  20.          maniptype == IP_NAT_MANIP_DST);
  21.     BUG_ON(nf_nat_initialized(ct, maniptype));

  22.     /* What we've got will look like inverse of reply. Normally
  23.      this is what is in the conntrack, except for prior
  24.      manipulations (future optimization: if num_manips == 0,
  25.      orig_tp =
  26.      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
  27.     nf_ct_invert_tuplepr(&curr_tuple,
  28.              &ct->tuplehash[IP_CT_DIR_REPLY].tuple);

  29.     get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);

  30.     if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
  31.         struct nf_conntrack_tuple reply;

  32.         /* Alter conntrack table so will recognize replies. */
  33.         nf_ct_invert_tuplepr(&reply, &new_tuple);
  34.         nf_conntrack_alter_reply(ct, &reply);

  35.         /* Non-atomic: we own this at the moment. */
  36.         if (maniptype == IP_NAT_MANIP_SRC)
  37.             ct->status |= IPS_SRC_NAT;
  38.         else
  39.             ct->status |= IPS_DST_NAT;
  40.     }

  41.     /* Place in source hash if this is the first time. */
  42.     if (have_to_hash) {
  43.         unsigned int srchash;

  44.         srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
  45.         spin_lock_bh(&nf_nat_lock);
  46.         /* nf_conntrack_alter_reply might re-allocate exntension aera */
  47.         nat = nfct_nat(ct);
  48.         nat->ct = ct;
  49.         hlist_add_head_rcu(&nat->bysource,
  50.                  &net->ipv4.nat_bysource[srchash]);
  51.         spin_unlock_bh(&nf_nat_lock);
  52.     }

  53.     /* It's done. */
  54.     if (maniptype == IP_NAT_MANIP_DST)
  55.         set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
  56.     else
  57.         set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);

  58.     return NF_ACCEPT;
  59. }
這裏第33行,進行snat的端口和ip,核心就在這個函數裏面

點擊(此處)摺疊或打開

  1. static void
  2. get_unique_tuple(struct nf_conntrack_tuple *tuple,
  3.          const struct nf_conntrack_tuple *orig_tuple,
  4.          const struct nf_nat_range *range,
  5.          struct nf_conn *ct,
  6.          enum nf_nat_manip_type maniptype)
  7. {
  8.     struct net *net = nf_ct_net(ct);
  9.     const struct nf_nat_protocol *proto;

  10.     /* 1) If this srcip/proto/src-proto-part is currently mapped,
  11.      and that same mapping gives a unique tuple within the given
  12.      range, use that.

  13.      This is only required for source (ie. NAT/masq) mappings.
  14.      So far, we don't do local source mappings, so multiple
  15.      manips not an issue. */
  16.     if (maniptype == IP_NAT_MANIP_SRC &&
  17.      !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
  18.         if (find_appropriate_src(net, orig_tuple, tuple, range)) {
  19.             pr_debug("get_unique_tuple: Found current src map\n");
  20.             if (!nf_nat_used_tuple(tuple, ct))
  21.                 return;
  22.         }
  23.     }

  24.     /* 2) Select the least-used IP/proto combination in the given
  25.      range. */
  26.     *tuple = *orig_tuple;
  27.     find_best_ips_proto(tuple, range, ct, maniptype);

  28.     /* 3) The per-protocol part of the manip is made to map into
  29.      the range to make a unique tuple. */

  30.     rcu_read_lock();
  31.     proto = __nf_nat_proto_find(orig_tuple->dst.protonum);

  32.     /* Change protocol info to have some randomization */
  33.     if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
  34.         proto->unique_tuple(tuple, range, maniptype, ct);
  35.         goto out;
  36.     }

  37.     /* Only bother mapping if it's not already in range and unique */
  38.     if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
  39.      proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
  40.      !nf_nat_used_tuple(tuple, ct))
  41.         goto out;

  42.     /* Last change: get protocol to try to obtain unique tuple. */
  43.     proto->unique_tuple(tuple, range, maniptype, ct);
  44. out:
  45.     rcu_read_unlock();
  46. }
這個函數的第20行,就是一個選擇的原則。如果這個port、ip、protocol已經被snat過,那麼這次snat優先選擇之前使用的ip和port

點擊(此處)摺疊或打開

  1. /* Only called for SRC manip */
  2. static int
  3. find_appropriate_src(struct net *net,
  4.          const struct nf_conntrack_tuple *tuple,
  5.          struct nf_conntrack_tuple *result,
  6.          const struct nf_nat_range *range)
  7. {
  8.     unsigned int h = hash_by_src(net, tuple);
  9.     const struct nf_conn_nat *nat;
  10.     const struct nf_conn *ct;
  11.     const struct hlist_node *n;

  12.     rcu_read_lock();
  13.     hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
  14.         ct = nat->ct;
  15.         if (same_src(ct, tuple)) {
  16.             /* Copy source part from reply tuple. */
  17.             nf_ct_invert_tuplepr(result,
  18.                  &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
  19.             result->dst = tuple->dst;

  20.             if (in_range(result, range)) {
  21.                 rcu_read_unlock();
  22.                 return 1;
  23.             }
  24.         }
  25.     }
  26.     rcu_read_unlock();
  27.     return 0;
  28. }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章