前些年在sina 企业mail的rpc在freebsd老是倒,在网上看到一个哥们说烂

jackxiang 2009-10-4 12:43 | |
北京 sina 企业邮箱,老是需要重启(已经替换php掉了,rpc在salary上运行很正常,但是在freebsd上就有僵尸和无缘无故死掉的可能),可能是超时没有人连接:
转下面一个网路上哥们的文章,但是他没有提出解决方案,呵呵,等于没有说:
亲爱的,稍微改进下你的程序吧,这不费多大事儿。

我在一个多线程的corba程序的多个worker线程中调用了sun rpc的clnt_create函数。然后我的程序偶尔会core dump【http://blog.5ifd.com/post/2023/】.查了下,backtrace如下:

#0  0×84c7f82a in fclose (fp=0×0) at /usr/src/lib/libc/stdio/fclose.c:56

#1  0×84c4b0a2 in endnetconfig (handlep=0×86e0420) at /usr/src/lib/libc/rpc/getnetconfig.c:394

#2  0×84c40cc5 in __rpc_endconf (vhandle=0×86e0410) at /usr/src/lib/libc/rpc/rpc_generic.c:441

#3  0×84c327eb in clnt_create_timed (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,

    netclass=0×80977d4 “tcp”, tp=0×0) at /usr/src/lib/libc/rpc/clnt_generic.c:271

#4  0×84c3264d in clnt_create (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,

    nettype=0×80977d4 “tcp”) at /usr/src/lib/libc/rpc/clnt_generic.c:186

... 后面的我略了。

然后我打开netnetconfig.c看了下

endnetconfig中调用了fclose函数关闭一个叫做nc_file的file handle。而这个file handle是一个全局静态变量。

static FILE *nc_file;

然而在使用这个变量的时候,完全没有加锁。



    ni.ref++;

    if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {

        nc_vars->valid = NC_VALID;

        nc_vars->flag = 0;

        nc_vars->nc_configs = ni.head;

        return ((void *)nc_vars);

    }

    ni.ref–;

其实问题很简单,

static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;

定义一个mutex,然后在恰当的时候获取、释放锁就行了。

亲爱的,稍微改进下你的程序吧,这不费多大事儿。

—————-分隔线—————————

14:22 2007-12-11

光说不干是可耻的。做了个patch,如下:

— src/lib/libc/rpc/getnetconfig.c.orig Tue Dec 11 13:45:32 2007

+++ src/lib/libc/rpc/getnetconfig.c Tue Dec 11 14:11:41 2007

@@ -131,7 +131,10 @@ static struct netconfig *dup_ncp(struct





static FILE *nc_file;  /* for netconfig db */

+static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;

static struct netconfig_info ni = { 0, 0, NULL, NULL};

+/* should not acquire it after acquired a nc_file_lock */ static

+pthread_mutex_t ni_lock = PTHREAD_MUTEX_INITIALIZER;



#define MAXNETCONFIGLINE    1000



@@ -205,14 +208,23 @@ setnetconfig()

      * For multiple calls, i.e. nc_file is not NULL, we just return the

      * handle without reopening the netconfig db.

      */

+    mutex_lock(&ni_lock);

     ni.ref++;

+    mutex_unlock(&ni_lock);

+

+    mutex_lock(&nc_file_lock);

     if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {

  nc_vars->valid = NC_VALID;

  nc_vars->flag = 0;

  nc_vars->nc_configs = ni.head;

+ mutex_unlock(&nc_file_lock);

  return ((void *)nc_vars);

     }

+    mutex_unlock(&nc_file_lock);

+    mutex_lock(&ni_lock);

     ni.ref–;

+    mutex_unlock(&ni_lock);

+

     nc_error = NC_NONETCONFIG;

     free(nc_vars);

     return (NULL);

@@ -235,15 +247,17 @@ void *handlep;

     char *stringp;  /* tmp string pointer */

     struct netconfig_list *list;

     struct netconfig *np;

-

+    struct netconfig *result;

     /*

      * Verify that handle is valid

      */

+    mutex_lock(&nc_file_lock);

     if (ncp == NULL || nc_file == NULL) {

  nc_error = NC_NOTINIT;

+ mutex_unlock(&nc_file_lock);

  return (NULL);

     }

-

+    mutex_unlock(&nc_file_lock);

     switch (ncp->valid) {

     case NC_VALID:

  /*

@@ -256,7 +270,9 @@ void *handlep;

   */

  if (ncp->flag == 0) { /* first time */

      ncp->flag = 1;

+     mutex_lock(&ni_lock);

      ncp->nc_configs = ni.head;

+     mutex_unlock(&ni_lock);

      if (ncp->nc_configs != NULL) /* entry already exist */

   return(ncp->nc_configs->ncp);

  }

@@ -269,7 +285,12 @@ void *handlep;

   * If we cannot find the entry in the list and is end of file,

   * we give up.

   */

- if (ni.eof == 1) return(NULL);

+ mutex_lock(&ni_lock);

+ if (ni.eof == 1) {

+   mutex_unlock(&ni_lock);

+   return(NULL);

+ }

+ mutex_unlock(&ni_lock);

  break;

     default:

  nc_error = NC_NOTINIT;

@@ -290,14 +311,18 @@ void *handlep;

     /*

      * Read a line from netconfig file.

      */

+    mutex_lock(&nc_file_lock);

     do {

  if (fgets(stringp, MAXNETCONFIGLINE, nc_file) == NULL) {

      free(stringp);

+     mutex_lock(&ni_lock);

      ni.eof = 1;

+     mutex_unlock(&ni_lock);

+     mutex_unlock(&nc_file_lock);

      return (NULL);

         }

     } while (*stringp == ‘#’);

-

+    mutex_unlock(&nc_file_lock);

     list = (struct netconfig_list *) malloc(sizeof (struct netconfig_list));

     if (list == NULL) {

      free(stringp);

@@ -326,6 +351,7 @@ void *handlep;

   * Reposition the current pointer of the handle to the last entry

   * in the list.

   */

+        mutex_lock(&ni_lock);

  if (ni.head == NULL) { /* first entry */

      ni.head = ni.tail = list;

  }

@@ -334,7 +360,9 @@ void *handlep;

          ni.tail = ni.tail->next;

      }

  ncp->nc_configs = ni.tail;

- return(ni.tail->ncp);

+ result = ni.tail->ncp;

+ mutex_unlock(&ni_lock);

+ return(result);

     }

}



@@ -368,8 +396,10 @@ void *handlep;

     nc_handlep->valid = NC_INVALID;

     nc_handlep->flag = 0;

     nc_handlep->nc_configs = NULL;

+    mutex_lock(&ni_lock);

     if (–ni.ref > 0) {

      free(nc_handlep);

+ mutex_unlock(&ni_lock);

  return(0);

     }



@@ -381,6 +411,7 @@ void *handlep;

     ni.eof = ni.ref = 0;

     ni.head = NULL;

     ni.tail = NULL;

+    mutex_unlock(&ni_lock);

     while (q) {

  p = q->next;

  if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups); @@ -390,9 +421,10 @@ void *handlep;

  q = p;

     }

     free(nc_handlep);

-

+    mutex_lock(&nc_file_lock);

     fclose(nc_file);

     nc_file = NULL;

+    mutex_unlock(&nc_file_lock);

     return (0);

}



@@ -440,16 +472,20 @@ getnetconfigent(netid)

      * If all the netconfig db has been read and placed into the list and

      * there is no match for the netid, return NULL.

      */

+    mutex_lock(&ni_lock);

     if (ni.head != NULL) {

  for (list = ni.head; list; list = list->next) {

      if (strcmp(list->ncp->nc_netid, netid) == 0) {

+         mutex_unlock(&ni_lock);

          return(dup_ncp(list->ncp));

      }

  }

- if (ni.eof == 1) /* that’s all the entries */

+ if (ni.eof == 1) {/* that’s all the entries */

+         mutex_unlock(&ni_lock);

   return(NULL);

+ }

     }

-

+    mutex_unlock(&ni_lock);



     if ((file = fopen(NETCONFIG, “r”)) == NULL) {

  nc_error = NC_NONETCONFIG;

作者:jackxiang@向东博客 专注WEB应用 构架之美 --- 构架之美,在于尽态极妍 | 应用之美,在于药到病除
地址:http://jackxiang.com/post/2022/
版权所有。转载时必须以链接形式注明作者和原始出处及本声明!


最后编辑: jackxiang 编辑于2009-10-4 12:50
评论列表
发表评论

昵称

网址

电邮

打开HTML 打开UBB 打开表情 隐藏 记住我 [登入] [注册]