北京 sina 企业邮箱,老是需要重启(已经替换php掉了,rpc在salary上运行很正常,但是在freebsd上就有僵尸和无缘无故死掉的可能),可能是超时没有人连接:
转下面一个网路上哥们的文章,但是他没有提出解决方案,呵呵,等于没有说:
亲爱的,稍微改进下你的程序吧,这不费多大事儿。
我在一个多线程的corba程序的多个worker线程中调用了sun rpc的clnt_create函数。然后我的程序偶尔会core dump【http://blog.5ifd.com/post/2023/】.查了下,backtrace如下:
#0 0×84c7f82a in fclose (fp=0×0) at /usr/src/lib/libc/stdio/fclose.c:56
#1 0×84c4b0a2 in endnetconfig (handlep=0×86e0420) at /usr/src/lib/libc/rpc/getnetconfig.c:394
#2 0×84c40cc5 in __rpc_endconf (vhandle=0×86e0410) at /usr/src/lib/libc/rpc/rpc_generic.c:441
#3 0×84c327eb in clnt_create_timed (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,
netclass=0×80977d4 “tcp”, tp=0×0) at /usr/src/lib/libc/rpc/clnt_generic.c:271
#4 0×84c3264d in clnt_create (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,
nettype=0×80977d4 “tcp”) at /usr/src/lib/libc/rpc/clnt_generic.c:186
... 后面的我略了。
然后我打开netnetconfig.c看了下
endnetconfig中调用了fclose函数关闭一个叫做nc_file的file handle。而这个file handle是一个全局静态变量。
static FILE *nc_file;
然而在使用这个变量的时候,完全没有加锁。
如
ni.ref++;
if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {
nc_vars->valid = NC_VALID;
nc_vars->flag = 0;
nc_vars->nc_configs = ni.head;
return ((void *)nc_vars);
}
ni.ref–;
其实问题很简单,
static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;
定义一个mutex,然后在恰当的时候获取、释放锁就行了。
亲爱的,稍微改进下你的程序吧,这不费多大事儿。
—————-分隔线—————————
14:22 2007-12-11
光说不干是可耻的。做了个patch,如下:
— src/lib/libc/rpc/getnetconfig.c.orig Tue Dec 11 13:45:32 2007
+++ src/lib/libc/rpc/getnetconfig.c Tue Dec 11 14:11:41 2007
@@ -131,7 +131,10 @@ static struct netconfig *dup_ncp(struct
static FILE *nc_file; /* for netconfig db */
+static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;
static struct netconfig_info ni = { 0, 0, NULL, NULL};
+/* should not acquire it after acquired a nc_file_lock */ static
+pthread_mutex_t ni_lock = PTHREAD_MUTEX_INITIALIZER;
#define MAXNETCONFIGLINE 1000
@@ -205,14 +208,23 @@ setnetconfig()
* For multiple calls, i.e. nc_file is not NULL, we just return the
* handle without reopening the netconfig db.
*/
+ mutex_lock(&ni_lock);
ni.ref++;
+ mutex_unlock(&ni_lock);
+
+ mutex_lock(&nc_file_lock);
if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {
nc_vars->valid = NC_VALID;
nc_vars->flag = 0;
nc_vars->nc_configs = ni.head;
+ mutex_unlock(&nc_file_lock);
return ((void *)nc_vars);
}
+ mutex_unlock(&nc_file_lock);
+ mutex_lock(&ni_lock);
ni.ref–;
+ mutex_unlock(&ni_lock);
+
nc_error = NC_NONETCONFIG;
free(nc_vars);
return (NULL);
@@ -235,15 +247,17 @@ void *handlep;
char *stringp; /* tmp string pointer */
struct netconfig_list *list;
struct netconfig *np;
-
+ struct netconfig *result;
/*
* Verify that handle is valid
*/
+ mutex_lock(&nc_file_lock);
if (ncp == NULL || nc_file == NULL) {
nc_error = NC_NOTINIT;
+ mutex_unlock(&nc_file_lock);
return (NULL);
}
-
+ mutex_unlock(&nc_file_lock);
switch (ncp->valid) {
case NC_VALID:
/*
@@ -256,7 +270,9 @@ void *handlep;
*/
if (ncp->flag == 0) { /* first time */
ncp->flag = 1;
+ mutex_lock(&ni_lock);
ncp->nc_configs = ni.head;
+ mutex_unlock(&ni_lock);
if (ncp->nc_configs != NULL) /* entry already exist */
return(ncp->nc_configs->ncp);
}
@@ -269,7 +285,12 @@ void *handlep;
* If we cannot find the entry in the list and is end of file,
* we give up.
*/
- if (ni.eof == 1) return(NULL);
+ mutex_lock(&ni_lock);
+ if (ni.eof == 1) {
+ mutex_unlock(&ni_lock);
+ return(NULL);
+ }
+ mutex_unlock(&ni_lock);
break;
default:
nc_error = NC_NOTINIT;
@@ -290,14 +311,18 @@ void *handlep;
/*
* Read a line from netconfig file.
*/
+ mutex_lock(&nc_file_lock);
do {
if (fgets(stringp, MAXNETCONFIGLINE, nc_file) == NULL) {
free(stringp);
+ mutex_lock(&ni_lock);
ni.eof = 1;
+ mutex_unlock(&ni_lock);
+ mutex_unlock(&nc_file_lock);
return (NULL);
}
} while (*stringp == ‘#’);
-
+ mutex_unlock(&nc_file_lock);
list = (struct netconfig_list *) malloc(sizeof (struct netconfig_list));
if (list == NULL) {
free(stringp);
@@ -326,6 +351,7 @@ void *handlep;
* Reposition the current pointer of the handle to the last entry
* in the list.
*/
+ mutex_lock(&ni_lock);
if (ni.head == NULL) { /* first entry */
ni.head = ni.tail = list;
}
@@ -334,7 +360,9 @@ void *handlep;
ni.tail = ni.tail->next;
}
ncp->nc_configs = ni.tail;
- return(ni.tail->ncp);
+ result = ni.tail->ncp;
+ mutex_unlock(&ni_lock);
+ return(result);
}
}
@@ -368,8 +396,10 @@ void *handlep;
nc_handlep->valid = NC_INVALID;
nc_handlep->flag = 0;
nc_handlep->nc_configs = NULL;
+ mutex_lock(&ni_lock);
if (–ni.ref > 0) {
free(nc_handlep);
+ mutex_unlock(&ni_lock);
return(0);
}
@@ -381,6 +411,7 @@ void *handlep;
ni.eof = ni.ref = 0;
ni.head = NULL;
ni.tail = NULL;
+ mutex_unlock(&ni_lock);
while (q) {
p = q->next;
if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups); @@ -390,9 +421,10 @@ void *handlep;
q = p;
}
free(nc_handlep);
-
+ mutex_lock(&nc_file_lock);
fclose(nc_file);
nc_file = NULL;
+ mutex_unlock(&nc_file_lock);
return (0);
}
@@ -440,16 +472,20 @@ getnetconfigent(netid)
* If all the netconfig db has been read and placed into the list and
* there is no match for the netid, return NULL.
*/
+ mutex_lock(&ni_lock);
if (ni.head != NULL) {
for (list = ni.head; list; list = list->next) {
if (strcmp(list->ncp->nc_netid, netid) == 0) {
+ mutex_unlock(&ni_lock);
return(dup_ncp(list->ncp));
}
}
- if (ni.eof == 1) /* that’s all the entries */
+ if (ni.eof == 1) {/* that’s all the entries */
+ mutex_unlock(&ni_lock);
return(NULL);
+ }
}
-
+ mutex_unlock(&ni_lock);
if ((file = fopen(NETCONFIG, “r”)) == NULL) {
nc_error = NC_NONETCONFIG;
转下面一个网路上哥们的文章,但是他没有提出解决方案,呵呵,等于没有说:
亲爱的,稍微改进下你的程序吧,这不费多大事儿。
我在一个多线程的corba程序的多个worker线程中调用了sun rpc的clnt_create函数。然后我的程序偶尔会core dump【http://blog.5ifd.com/post/2023/】.查了下,backtrace如下:
#0 0×84c7f82a in fclose (fp=0×0) at /usr/src/lib/libc/stdio/fclose.c:56
#1 0×84c4b0a2 in endnetconfig (handlep=0×86e0420) at /usr/src/lib/libc/rpc/getnetconfig.c:394
#2 0×84c40cc5 in __rpc_endconf (vhandle=0×86e0410) at /usr/src/lib/libc/rpc/rpc_generic.c:441
#3 0×84c327eb in clnt_create_timed (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,
netclass=0×80977d4 “tcp”, tp=0×0) at /usr/src/lib/libc/rpc/clnt_generic.c:271
#4 0×84c3264d in clnt_create (hostname=0×80977d8 “127.0.0.1″, prog=931729681, vers=1,
nettype=0×80977d4 “tcp”) at /usr/src/lib/libc/rpc/clnt_generic.c:186
... 后面的我略了。
然后我打开netnetconfig.c看了下
endnetconfig中调用了fclose函数关闭一个叫做nc_file的file handle。而这个file handle是一个全局静态变量。
static FILE *nc_file;
然而在使用这个变量的时候,完全没有加锁。
如
ni.ref++;
if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {
nc_vars->valid = NC_VALID;
nc_vars->flag = 0;
nc_vars->nc_configs = ni.head;
return ((void *)nc_vars);
}
ni.ref–;
其实问题很简单,
static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;
定义一个mutex,然后在恰当的时候获取、释放锁就行了。
亲爱的,稍微改进下你的程序吧,这不费多大事儿。
—————-分隔线—————————
14:22 2007-12-11
光说不干是可耻的。做了个patch,如下:
— src/lib/libc/rpc/getnetconfig.c.orig Tue Dec 11 13:45:32 2007
+++ src/lib/libc/rpc/getnetconfig.c Tue Dec 11 14:11:41 2007
@@ -131,7 +131,10 @@ static struct netconfig *dup_ncp(struct
static FILE *nc_file; /* for netconfig db */
+static pthread_mutex_t nc_file_lock = PTHREAD_MUTEX_INITIALIZER;
static struct netconfig_info ni = { 0, 0, NULL, NULL};
+/* should not acquire it after acquired a nc_file_lock */ static
+pthread_mutex_t ni_lock = PTHREAD_MUTEX_INITIALIZER;
#define MAXNETCONFIGLINE 1000
@@ -205,14 +208,23 @@ setnetconfig()
* For multiple calls, i.e. nc_file is not NULL, we just return the
* handle without reopening the netconfig db.
*/
+ mutex_lock(&ni_lock);
ni.ref++;
+ mutex_unlock(&ni_lock);
+
+ mutex_lock(&nc_file_lock);
if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, “r”)) != NULL) {
nc_vars->valid = NC_VALID;
nc_vars->flag = 0;
nc_vars->nc_configs = ni.head;
+ mutex_unlock(&nc_file_lock);
return ((void *)nc_vars);
}
+ mutex_unlock(&nc_file_lock);
+ mutex_lock(&ni_lock);
ni.ref–;
+ mutex_unlock(&ni_lock);
+
nc_error = NC_NONETCONFIG;
free(nc_vars);
return (NULL);
@@ -235,15 +247,17 @@ void *handlep;
char *stringp; /* tmp string pointer */
struct netconfig_list *list;
struct netconfig *np;
-
+ struct netconfig *result;
/*
* Verify that handle is valid
*/
+ mutex_lock(&nc_file_lock);
if (ncp == NULL || nc_file == NULL) {
nc_error = NC_NOTINIT;
+ mutex_unlock(&nc_file_lock);
return (NULL);
}
-
+ mutex_unlock(&nc_file_lock);
switch (ncp->valid) {
case NC_VALID:
/*
@@ -256,7 +270,9 @@ void *handlep;
*/
if (ncp->flag == 0) { /* first time */
ncp->flag = 1;
+ mutex_lock(&ni_lock);
ncp->nc_configs = ni.head;
+ mutex_unlock(&ni_lock);
if (ncp->nc_configs != NULL) /* entry already exist */
return(ncp->nc_configs->ncp);
}
@@ -269,7 +285,12 @@ void *handlep;
* If we cannot find the entry in the list and is end of file,
* we give up.
*/
- if (ni.eof == 1) return(NULL);
+ mutex_lock(&ni_lock);
+ if (ni.eof == 1) {
+ mutex_unlock(&ni_lock);
+ return(NULL);
+ }
+ mutex_unlock(&ni_lock);
break;
default:
nc_error = NC_NOTINIT;
@@ -290,14 +311,18 @@ void *handlep;
/*
* Read a line from netconfig file.
*/
+ mutex_lock(&nc_file_lock);
do {
if (fgets(stringp, MAXNETCONFIGLINE, nc_file) == NULL) {
free(stringp);
+ mutex_lock(&ni_lock);
ni.eof = 1;
+ mutex_unlock(&ni_lock);
+ mutex_unlock(&nc_file_lock);
return (NULL);
}
} while (*stringp == ‘#’);
-
+ mutex_unlock(&nc_file_lock);
list = (struct netconfig_list *) malloc(sizeof (struct netconfig_list));
if (list == NULL) {
free(stringp);
@@ -326,6 +351,7 @@ void *handlep;
* Reposition the current pointer of the handle to the last entry
* in the list.
*/
+ mutex_lock(&ni_lock);
if (ni.head == NULL) { /* first entry */
ni.head = ni.tail = list;
}
@@ -334,7 +360,9 @@ void *handlep;
ni.tail = ni.tail->next;
}
ncp->nc_configs = ni.tail;
- return(ni.tail->ncp);
+ result = ni.tail->ncp;
+ mutex_unlock(&ni_lock);
+ return(result);
}
}
@@ -368,8 +396,10 @@ void *handlep;
nc_handlep->valid = NC_INVALID;
nc_handlep->flag = 0;
nc_handlep->nc_configs = NULL;
+ mutex_lock(&ni_lock);
if (–ni.ref > 0) {
free(nc_handlep);
+ mutex_unlock(&ni_lock);
return(0);
}
@@ -381,6 +411,7 @@ void *handlep;
ni.eof = ni.ref = 0;
ni.head = NULL;
ni.tail = NULL;
+ mutex_unlock(&ni_lock);
while (q) {
p = q->next;
if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups); @@ -390,9 +421,10 @@ void *handlep;
q = p;
}
free(nc_handlep);
-
+ mutex_lock(&nc_file_lock);
fclose(nc_file);
nc_file = NULL;
+ mutex_unlock(&nc_file_lock);
return (0);
}
@@ -440,16 +472,20 @@ getnetconfigent(netid)
* If all the netconfig db has been read and placed into the list and
* there is no match for the netid, return NULL.
*/
+ mutex_lock(&ni_lock);
if (ni.head != NULL) {
for (list = ni.head; list; list = list->next) {
if (strcmp(list->ncp->nc_netid, netid) == 0) {
+ mutex_unlock(&ni_lock);
return(dup_ncp(list->ncp));
}
}
- if (ni.eof == 1) /* that’s all the entries */
+ if (ni.eof == 1) {/* that’s all the entries */
+ mutex_unlock(&ni_lock);
return(NULL);
+ }
}
-
+ mutex_unlock(&ni_lock);
if ((file = fopen(NETCONFIG, “r”)) == NULL) {
nc_error = NC_NONETCONFIG;
作者:jackxiang@向东博客 专注WEB应用 构架之美 --- 构架之美,在于尽态极妍 | 应用之美,在于药到病除
地址:http://jackxiang.com/post/2022/
版权所有。转载时必须以链接形式注明作者和原始出处及本声明!
最后编辑: jackxiang 编辑于2009-10-4 12:50
评论列表