Commit 0c83ed8eeb28a045cdbd0b216679938aa9e665fe

Authored by Kurt Hackel
Committed by Joel Becker
1 parent 52fd3d6fea

[PATCH] OCFS2: The Second Oracle Cluster Filesystem

A simple node information service, filled and updated from
userspace. The rest of the stack queries this service for simple node
information.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>

Showing 7 changed files with 1001 additions and 0 deletions Side-by-side Diff

fs/ocfs2/cluster/Makefile
  1 +obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
  2 +
  3 +ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
  4 + quorum.o tcp.o ver.o
fs/ocfs2/cluster/endian.h
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * Copyright (C) 2005 Oracle. All rights reserved.
  5 + *
  6 + * This program is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This program is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU General Public
  17 + * License along with this program; if not, write to the
  18 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 + * Boston, MA 021110-1307, USA.
  20 + */
  21 +
  22 +#ifndef OCFS2_CLUSTER_ENDIAN_H
  23 +#define OCFS2_CLUSTER_ENDIAN_H
  24 +
  25 +static inline void be32_add_cpu(__be32 *var, u32 val)
  26 +{
  27 + *var = cpu_to_be32(be32_to_cpu(*var) + val);
  28 +}
  29 +
  30 +#endif /* OCFS2_CLUSTER_ENDIAN_H */
fs/ocfs2/cluster/nodemanager.c
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * Copyright (C) 2004, 2005 Oracle. All rights reserved.
  5 + *
  6 + * This program is free software; you can redistribute it and/or
  7 + * modify it under the terms of the GNU General Public
  8 + * License as published by the Free Software Foundation; either
  9 + * version 2 of the License, or (at your option) any later version.
  10 + *
  11 + * This program is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14 + * General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU General Public
  17 + * License along with this program; if not, write to the
  18 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 + * Boston, MA 021110-1307, USA.
  20 + */
  21 +
  22 +#include <linux/kernel.h>
  23 +#include <linux/module.h>
  24 +#include <linux/sysctl.h>
  25 +#include <linux/configfs.h>
  26 +
  27 +#include "endian.h"
  28 +#include "tcp.h"
  29 +#include "nodemanager.h"
  30 +#include "heartbeat.h"
  31 +#include "masklog.h"
  32 +#include "sys.h"
  33 +#include "ver.h"
  34 +
  35 +/* for now we operate under the assertion that there can be only one
  36 + * cluster active at a time. Changing this will require trickling
  37 + * cluster references throughout where nodes are looked up */
  38 +static struct o2nm_cluster *o2nm_single_cluster = NULL;
  39 +
  40 +#define OCFS2_MAX_HB_CTL_PATH 256
  41 +static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
  42 +
  43 +static ctl_table ocfs2_nm_table[] = {
  44 + {
  45 + .ctl_name = 1,
  46 + .procname = "hb_ctl_path",
  47 + .data = ocfs2_hb_ctl_path,
  48 + .maxlen = OCFS2_MAX_HB_CTL_PATH,
  49 + .mode = 0644,
  50 + .proc_handler = &proc_dostring,
  51 + .strategy = &sysctl_string,
  52 + },
  53 + { .ctl_name = 0 }
  54 +};
  55 +
  56 +static ctl_table ocfs2_mod_table[] = {
  57 + {
  58 + .ctl_name = KERN_OCFS2_NM,
  59 + .procname = "nm",
  60 + .data = NULL,
  61 + .maxlen = 0,
  62 + .mode = 0555,
  63 + .child = ocfs2_nm_table
  64 + },
  65 + { .ctl_name = 0}
  66 +};
  67 +
  68 +static ctl_table ocfs2_kern_table[] = {
  69 + {
  70 + .ctl_name = KERN_OCFS2,
  71 + .procname = "ocfs2",
  72 + .data = NULL,
  73 + .maxlen = 0,
  74 + .mode = 0555,
  75 + .child = ocfs2_mod_table
  76 + },
  77 + { .ctl_name = 0}
  78 +};
  79 +
  80 +static ctl_table ocfs2_root_table[] = {
  81 + {
  82 + .ctl_name = CTL_FS,
  83 + .procname = "fs",
  84 + .data = NULL,
  85 + .maxlen = 0,
  86 + .mode = 0555,
  87 + .child = ocfs2_kern_table
  88 + },
  89 + { .ctl_name = 0 }
  90 +};
  91 +
  92 +static struct ctl_table_header *ocfs2_table_header = NULL;
  93 +
  94 +const char *o2nm_get_hb_ctl_path(void)
  95 +{
  96 + return ocfs2_hb_ctl_path;
  97 +}
  98 +EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
  99 +
  100 +struct o2nm_cluster {
  101 + struct config_group cl_group;
  102 + unsigned cl_has_local:1;
  103 + u8 cl_local_node;
  104 + rwlock_t cl_nodes_lock;
  105 + struct o2nm_node *cl_nodes[O2NM_MAX_NODES];
  106 + struct rb_root cl_node_ip_tree;
  107 + /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
  108 + unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
  109 +};
  110 +
  111 +struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
  112 +{
  113 + struct o2nm_node *node = NULL;
  114 +
  115 + if (node_num >= O2NM_MAX_NODES || o2nm_single_cluster == NULL)
  116 + goto out;
  117 +
  118 + read_lock(&o2nm_single_cluster->cl_nodes_lock);
  119 + node = o2nm_single_cluster->cl_nodes[node_num];
  120 + if (node)
  121 + config_item_get(&node->nd_item);
  122 + read_unlock(&o2nm_single_cluster->cl_nodes_lock);
  123 +out:
  124 + return node;
  125 +}
  126 +EXPORT_SYMBOL_GPL(o2nm_get_node_by_num);
  127 +
  128 +int o2nm_configured_node_map(unsigned long *map, unsigned bytes)
  129 +{
  130 + struct o2nm_cluster *cluster = o2nm_single_cluster;
  131 +
  132 + BUG_ON(bytes < (sizeof(cluster->cl_nodes_bitmap)));
  133 +
  134 + if (cluster == NULL)
  135 + return -EINVAL;
  136 +
  137 + read_lock(&cluster->cl_nodes_lock);
  138 + memcpy(map, cluster->cl_nodes_bitmap, sizeof(cluster->cl_nodes_bitmap));
  139 + read_unlock(&cluster->cl_nodes_lock);
  140 +
  141 + return 0;
  142 +}
  143 +EXPORT_SYMBOL_GPL(o2nm_configured_node_map);
  144 +
  145 +static struct o2nm_node *o2nm_node_ip_tree_lookup(struct o2nm_cluster *cluster,
  146 + __be32 ip_needle,
  147 + struct rb_node ***ret_p,
  148 + struct rb_node **ret_parent)
  149 +{
  150 + struct rb_node **p = &cluster->cl_node_ip_tree.rb_node;
  151 + struct rb_node *parent = NULL;
  152 + struct o2nm_node *node, *ret = NULL;
  153 +
  154 + while (*p) {
  155 + parent = *p;
  156 + node = rb_entry(parent, struct o2nm_node, nd_ip_node);
  157 +
  158 + if (memcmp(&ip_needle, &node->nd_ipv4_address,
  159 + sizeof(ip_needle)) < 0)
  160 + p = &(*p)->rb_left;
  161 + else if (memcmp(&ip_needle, &node->nd_ipv4_address,
  162 + sizeof(ip_needle)) > 0)
  163 + p = &(*p)->rb_right;
  164 + else {
  165 + ret = node;
  166 + break;
  167 + }
  168 + }
  169 +
  170 + if (ret_p != NULL)
  171 + *ret_p = p;
  172 + if (ret_parent != NULL)
  173 + *ret_parent = parent;
  174 +
  175 + return ret;
  176 +}
  177 +
  178 +struct o2nm_node *o2nm_get_node_by_ip(__be32 addr)
  179 +{
  180 + struct o2nm_node *node = NULL;
  181 + struct o2nm_cluster *cluster = o2nm_single_cluster;
  182 +
  183 + if (cluster == NULL)
  184 + goto out;
  185 +
  186 + read_lock(&cluster->cl_nodes_lock);
  187 + node = o2nm_node_ip_tree_lookup(cluster, addr, NULL, NULL);
  188 + if (node)
  189 + config_item_get(&node->nd_item);
  190 + read_unlock(&cluster->cl_nodes_lock);
  191 +
  192 +out:
  193 + return node;
  194 +}
  195 +EXPORT_SYMBOL_GPL(o2nm_get_node_by_ip);
  196 +
  197 +void o2nm_node_put(struct o2nm_node *node)
  198 +{
  199 + config_item_put(&node->nd_item);
  200 +}
  201 +EXPORT_SYMBOL_GPL(o2nm_node_put);
  202 +
  203 +void o2nm_node_get(struct o2nm_node *node)
  204 +{
  205 + config_item_get(&node->nd_item);
  206 +}
  207 +EXPORT_SYMBOL_GPL(o2nm_node_get);
  208 +
  209 +u8 o2nm_this_node(void)
  210 +{
  211 + u8 node_num = O2NM_MAX_NODES;
  212 +
  213 + if (o2nm_single_cluster && o2nm_single_cluster->cl_has_local)
  214 + node_num = o2nm_single_cluster->cl_local_node;
  215 +
  216 + return node_num;
  217 +}
  218 +EXPORT_SYMBOL_GPL(o2nm_this_node);
  219 +
  220 +/* node configfs bits */
  221 +
  222 +static struct o2nm_cluster *to_o2nm_cluster(struct config_item *item)
  223 +{
  224 + return item ?
  225 + container_of(to_config_group(item), struct o2nm_cluster,
  226 + cl_group)
  227 + : NULL;
  228 +}
  229 +
  230 +static struct o2nm_node *to_o2nm_node(struct config_item *item)
  231 +{
  232 + return item ? container_of(item, struct o2nm_node, nd_item) : NULL;
  233 +}
  234 +
  235 +static void o2nm_node_release(struct config_item *item)
  236 +{
  237 + struct o2nm_node *node = to_o2nm_node(item);
  238 + kfree(node);
  239 +}
  240 +
  241 +static ssize_t o2nm_node_num_read(struct o2nm_node *node, char *page)
  242 +{
  243 + return sprintf(page, "%d\n", node->nd_num);
  244 +}
  245 +
  246 +static struct o2nm_cluster *to_o2nm_cluster_from_node(struct o2nm_node *node)
  247 +{
  248 + /* through the first node_set .parent
  249 + * mycluster/nodes/mynode == o2nm_cluster->o2nm_node_group->o2nm_node */
  250 + return to_o2nm_cluster(node->nd_item.ci_parent->ci_parent);
  251 +}
  252 +
  253 +enum {
  254 + O2NM_NODE_ATTR_NUM = 0,
  255 + O2NM_NODE_ATTR_PORT,
  256 + O2NM_NODE_ATTR_ADDRESS,
  257 + O2NM_NODE_ATTR_LOCAL,
  258 +};
  259 +
  260 +static ssize_t o2nm_node_num_write(struct o2nm_node *node, const char *page,
  261 + size_t count)
  262 +{
  263 + struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
  264 + unsigned long tmp;
  265 + char *p = (char *)page;
  266 +
  267 + tmp = simple_strtoul(p, &p, 0);
  268 + if (!p || (*p && (*p != '\n')))
  269 + return -EINVAL;
  270 +
  271 + if (tmp >= O2NM_MAX_NODES)
  272 + return -ERANGE;
  273 +
  274 + /* once we're in the cl_nodes tree networking can look us up by
  275 + * node number and try to use our address and port attributes
  276 + * to connect to this node.. make sure that they've been set
  277 + * before writing the node attribute? */
  278 + if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) ||
  279 + !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
  280 + return -EINVAL; /* XXX */
  281 +
  282 + write_lock(&cluster->cl_nodes_lock);
  283 + if (cluster->cl_nodes[tmp])
  284 + p = NULL;
  285 + else {
  286 + cluster->cl_nodes[tmp] = node;
  287 + node->nd_num = tmp;
  288 + set_bit(tmp, cluster->cl_nodes_bitmap);
  289 + }
  290 + write_unlock(&cluster->cl_nodes_lock);
  291 + if (p == NULL)
  292 + return -EEXIST;
  293 +
  294 + return count;
  295 +}
  296 +static ssize_t o2nm_node_ipv4_port_read(struct o2nm_node *node, char *page)
  297 +{
  298 + return sprintf(page, "%u\n", ntohs(node->nd_ipv4_port));
  299 +}
  300 +
  301 +static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node,
  302 + const char *page, size_t count)
  303 +{
  304 + unsigned long tmp;
  305 + char *p = (char *)page;
  306 +
  307 + tmp = simple_strtoul(p, &p, 0);
  308 + if (!p || (*p && (*p != '\n')))
  309 + return -EINVAL;
  310 +
  311 + if (tmp == 0)
  312 + return -EINVAL;
  313 + if (tmp >= (u16)-1)
  314 + return -ERANGE;
  315 +
  316 + node->nd_ipv4_port = htons(tmp);
  317 +
  318 + return count;
  319 +}
  320 +
  321 +static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page)
  322 +{
  323 + return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address));
  324 +}
  325 +
  326 +static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node,
  327 + const char *page,
  328 + size_t count)
  329 +{
  330 + struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
  331 + int ret, i;
  332 + struct rb_node **p, *parent;
  333 + unsigned int octets[4];
  334 + __be32 ipv4_addr = 0;
  335 +
  336 + ret = sscanf(page, "%3u.%3u.%3u.%3u", &octets[3], &octets[2],
  337 + &octets[1], &octets[0]);
  338 + if (ret != 4)
  339 + return -EINVAL;
  340 +
  341 + for (i = 0; i < ARRAY_SIZE(octets); i++) {
  342 + if (octets[i] > 255)
  343 + return -ERANGE;
  344 + be32_add_cpu(&ipv4_addr, octets[i] << (i * 8));
  345 + }
  346 +
  347 + ret = 0;
  348 + write_lock(&cluster->cl_nodes_lock);
  349 + if (o2nm_node_ip_tree_lookup(cluster, ipv4_addr, &p, &parent))
  350 + ret = -EEXIST;
  351 + else {
  352 + rb_link_node(&node->nd_ip_node, parent, p);
  353 + rb_insert_color(&node->nd_ip_node, &cluster->cl_node_ip_tree);
  354 + }
  355 + write_unlock(&cluster->cl_nodes_lock);
  356 + if (ret)
  357 + return ret;
  358 +
  359 + memcpy(&node->nd_ipv4_address, &ipv4_addr, sizeof(ipv4_addr));
  360 +
  361 + return count;
  362 +}
  363 +
  364 +static ssize_t o2nm_node_local_read(struct o2nm_node *node, char *page)
  365 +{
  366 + return sprintf(page, "%d\n", node->nd_local);
  367 +}
  368 +
  369 +static ssize_t o2nm_node_local_write(struct o2nm_node *node, const char *page,
  370 + size_t count)
  371 +{
  372 + struct o2nm_cluster *cluster = to_o2nm_cluster_from_node(node);
  373 + unsigned long tmp;
  374 + char *p = (char *)page;
  375 + ssize_t ret;
  376 +
  377 + tmp = simple_strtoul(p, &p, 0);
  378 + if (!p || (*p && (*p != '\n')))
  379 + return -EINVAL;
  380 +
  381 + tmp = !!tmp; /* boolean of whether this node wants to be local */
  382 +
  383 + /* setting local turns on networking rx for now so we require having
  384 + * set everything else first */
  385 + if (!test_bit(O2NM_NODE_ATTR_ADDRESS, &node->nd_set_attributes) ||
  386 + !test_bit(O2NM_NODE_ATTR_NUM, &node->nd_set_attributes) ||
  387 + !test_bit(O2NM_NODE_ATTR_PORT, &node->nd_set_attributes))
  388 + return -EINVAL; /* XXX */
  389 +
  390 + /* the only failure case is trying to set a new local node
  391 + * when a different one is already set */
  392 + if (tmp && tmp == cluster->cl_has_local &&
  393 + cluster->cl_local_node != node->nd_num)
  394 + return -EBUSY;
  395 +
  396 + /* bring up the rx thread if we're setting the new local node. */
  397 + if (tmp && !cluster->cl_has_local) {
  398 + ret = o2net_start_listening(node);
  399 + if (ret)
  400 + return ret;
  401 + }
  402 +
  403 + if (!tmp && cluster->cl_has_local &&
  404 + cluster->cl_local_node == node->nd_num) {
  405 + o2net_stop_listening(node);
  406 + cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
  407 + }
  408 +
  409 + node->nd_local = tmp;
  410 + if (node->nd_local) {
  411 + cluster->cl_has_local = tmp;
  412 + cluster->cl_local_node = node->nd_num;
  413 + }
  414 +
  415 + return count;
  416 +}
  417 +
  418 +struct o2nm_node_attribute {
  419 + struct configfs_attribute attr;
  420 + ssize_t (*show)(struct o2nm_node *, char *);
  421 + ssize_t (*store)(struct o2nm_node *, const char *, size_t);
  422 +};
  423 +
  424 +static struct o2nm_node_attribute o2nm_node_attr_num = {
  425 + .attr = { .ca_owner = THIS_MODULE,
  426 + .ca_name = "num",
  427 + .ca_mode = S_IRUGO | S_IWUSR },
  428 + .show = o2nm_node_num_read,
  429 + .store = o2nm_node_num_write,
  430 +};
  431 +
  432 +static struct o2nm_node_attribute o2nm_node_attr_ipv4_port = {
  433 + .attr = { .ca_owner = THIS_MODULE,
  434 + .ca_name = "ipv4_port",
  435 + .ca_mode = S_IRUGO | S_IWUSR },
  436 + .show = o2nm_node_ipv4_port_read,
  437 + .store = o2nm_node_ipv4_port_write,
  438 +};
  439 +
  440 +static struct o2nm_node_attribute o2nm_node_attr_ipv4_address = {
  441 + .attr = { .ca_owner = THIS_MODULE,
  442 + .ca_name = "ipv4_address",
  443 + .ca_mode = S_IRUGO | S_IWUSR },
  444 + .show = o2nm_node_ipv4_address_read,
  445 + .store = o2nm_node_ipv4_address_write,
  446 +};
  447 +
  448 +static struct o2nm_node_attribute o2nm_node_attr_local = {
  449 + .attr = { .ca_owner = THIS_MODULE,
  450 + .ca_name = "local",
  451 + .ca_mode = S_IRUGO | S_IWUSR },
  452 + .show = o2nm_node_local_read,
  453 + .store = o2nm_node_local_write,
  454 +};
  455 +
  456 +static struct configfs_attribute *o2nm_node_attrs[] = {
  457 + [O2NM_NODE_ATTR_NUM] = &o2nm_node_attr_num.attr,
  458 + [O2NM_NODE_ATTR_PORT] = &o2nm_node_attr_ipv4_port.attr,
  459 + [O2NM_NODE_ATTR_ADDRESS] = &o2nm_node_attr_ipv4_address.attr,
  460 + [O2NM_NODE_ATTR_LOCAL] = &o2nm_node_attr_local.attr,
  461 + NULL,
  462 +};
  463 +
  464 +static int o2nm_attr_index(struct configfs_attribute *attr)
  465 +{
  466 + int i;
  467 + for (i = 0; i < ARRAY_SIZE(o2nm_node_attrs); i++) {
  468 + if (attr == o2nm_node_attrs[i])
  469 + return i;
  470 + }
  471 + BUG();
  472 + return 0;
  473 +}
  474 +
  475 +static ssize_t o2nm_node_show(struct config_item *item,
  476 + struct configfs_attribute *attr,
  477 + char *page)
  478 +{
  479 + struct o2nm_node *node = to_o2nm_node(item);
  480 + struct o2nm_node_attribute *o2nm_node_attr =
  481 + container_of(attr, struct o2nm_node_attribute, attr);
  482 + ssize_t ret = 0;
  483 +
  484 + if (o2nm_node_attr->show)
  485 + ret = o2nm_node_attr->show(node, page);
  486 + return ret;
  487 +}
  488 +
  489 +static ssize_t o2nm_node_store(struct config_item *item,
  490 + struct configfs_attribute *attr,
  491 + const char *page, size_t count)
  492 +{
  493 + struct o2nm_node *node = to_o2nm_node(item);
  494 + struct o2nm_node_attribute *o2nm_node_attr =
  495 + container_of(attr, struct o2nm_node_attribute, attr);
  496 + ssize_t ret;
  497 + int attr_index = o2nm_attr_index(attr);
  498 +
  499 + if (o2nm_node_attr->store == NULL) {
  500 + ret = -EINVAL;
  501 + goto out;
  502 + }
  503 +
  504 + if (test_bit(attr_index, &node->nd_set_attributes))
  505 + return -EBUSY;
  506 +
  507 + ret = o2nm_node_attr->store(node, page, count);
  508 + if (ret < count)
  509 + goto out;
  510 +
  511 + set_bit(attr_index, &node->nd_set_attributes);
  512 +out:
  513 + return ret;
  514 +}
  515 +
  516 +static struct configfs_item_operations o2nm_node_item_ops = {
  517 + .release = o2nm_node_release,
  518 + .show_attribute = o2nm_node_show,
  519 + .store_attribute = o2nm_node_store,
  520 +};
  521 +
  522 +static struct config_item_type o2nm_node_type = {
  523 + .ct_item_ops = &o2nm_node_item_ops,
  524 + .ct_attrs = o2nm_node_attrs,
  525 + .ct_owner = THIS_MODULE,
  526 +};
  527 +
  528 +/* node set */
  529 +
  530 +struct o2nm_node_group {
  531 + struct config_group ns_group;
  532 + /* some stuff? */
  533 +};
  534 +
  535 +#if 0
  536 +static struct o2nm_node_group *to_o2nm_node_group(struct config_group *group)
  537 +{
  538 + return group ?
  539 + container_of(group, struct o2nm_node_group, ns_group)
  540 + : NULL;
  541 +}
  542 +#endif
  543 +
  544 +static struct config_item *o2nm_node_group_make_item(struct config_group *group,
  545 + const char *name)
  546 +{
  547 + struct o2nm_node *node = NULL;
  548 + struct config_item *ret = NULL;
  549 +
  550 + if (strlen(name) > O2NM_MAX_NAME_LEN)
  551 + goto out; /* ENAMETOOLONG */
  552 +
  553 + node = kcalloc(1, sizeof(struct o2nm_node), GFP_KERNEL);
  554 + if (node == NULL)
  555 + goto out; /* ENOMEM */
  556 +
  557 + strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */
  558 + config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
  559 + spin_lock_init(&node->nd_lock);
  560 +
  561 + ret = &node->nd_item;
  562 +
  563 +out:
  564 + if (ret == NULL)
  565 + kfree(node);
  566 +
  567 + return ret;
  568 +}
  569 +
  570 +static void o2nm_node_group_drop_item(struct config_group *group,
  571 + struct config_item *item)
  572 +{
  573 + struct o2nm_node *node = to_o2nm_node(item);
  574 + struct o2nm_cluster *cluster = to_o2nm_cluster(group->cg_item.ci_parent);
  575 +
  576 + o2net_disconnect_node(node);
  577 +
  578 + if (cluster->cl_has_local &&
  579 + (cluster->cl_local_node == node->nd_num)) {
  580 + cluster->cl_has_local = 0;
  581 + cluster->cl_local_node = O2NM_INVALID_NODE_NUM;
  582 + o2net_stop_listening(node);
  583 + }
  584 +
  585 + /* XXX call into net to stop this node from trading messages */
  586 +
  587 + write_lock(&cluster->cl_nodes_lock);
  588 +
  589 + /* XXX sloppy */
  590 + if (node->nd_ipv4_address)
  591 + rb_erase(&node->nd_ip_node, &cluster->cl_node_ip_tree);
  592 +
  593 + /* nd_num might be 0 if the node number hasn't been set.. */
  594 + if (cluster->cl_nodes[node->nd_num] == node) {
  595 + cluster->cl_nodes[node->nd_num] = NULL;
  596 + clear_bit(node->nd_num, cluster->cl_nodes_bitmap);
  597 + }
  598 + write_unlock(&cluster->cl_nodes_lock);
  599 +
  600 + config_item_put(item);
  601 +}
  602 +
  603 +static struct configfs_group_operations o2nm_node_group_group_ops = {
  604 + .make_item = o2nm_node_group_make_item,
  605 + .drop_item = o2nm_node_group_drop_item,
  606 +};
  607 +
  608 +static struct config_item_type o2nm_node_group_type = {
  609 + .ct_group_ops = &o2nm_node_group_group_ops,
  610 + .ct_owner = THIS_MODULE,
  611 +};
  612 +
  613 +/* cluster */
  614 +
  615 +static void o2nm_cluster_release(struct config_item *item)
  616 +{
  617 + struct o2nm_cluster *cluster = to_o2nm_cluster(item);
  618 +
  619 + kfree(cluster->cl_group.default_groups);
  620 + kfree(cluster);
  621 +}
  622 +
  623 +static struct configfs_item_operations o2nm_cluster_item_ops = {
  624 + .release = o2nm_cluster_release,
  625 +};
  626 +
  627 +static struct config_item_type o2nm_cluster_type = {
  628 + .ct_item_ops = &o2nm_cluster_item_ops,
  629 + .ct_owner = THIS_MODULE,
  630 +};
  631 +
  632 +/* cluster set */
  633 +
  634 +struct o2nm_cluster_group {
  635 + struct configfs_subsystem cs_subsys;
  636 + /* some stuff? */
  637 +};
  638 +
  639 +#if 0
  640 +static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *group)
  641 +{
  642 + return group ?
  643 + container_of(to_configfs_subsystem(group), struct o2nm_cluster_group, cs_subsys)
  644 + : NULL;
  645 +}
  646 +#endif
  647 +
  648 +static struct config_group *o2nm_cluster_group_make_group(struct config_group *group,
  649 + const char *name)
  650 +{
  651 + struct o2nm_cluster *cluster = NULL;
  652 + struct o2nm_node_group *ns = NULL;
  653 + struct config_group *o2hb_group = NULL, *ret = NULL;
  654 + void *defs = NULL;
  655 +
  656 + /* this runs under the parent dir's i_sem; there can be only
  657 + * one caller in here at a time */
  658 + if (o2nm_single_cluster)
  659 + goto out; /* ENOSPC */
  660 +
  661 + cluster = kcalloc(1, sizeof(struct o2nm_cluster), GFP_KERNEL);
  662 + ns = kcalloc(1, sizeof(struct o2nm_node_group), GFP_KERNEL);
  663 + defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
  664 + o2hb_group = o2hb_alloc_hb_set();
  665 + if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
  666 + goto out;
  667 +
  668 + config_group_init_type_name(&cluster->cl_group, name,
  669 + &o2nm_cluster_type);
  670 + config_group_init_type_name(&ns->ns_group, "node",
  671 + &o2nm_node_group_type);
  672 +
  673 + cluster->cl_group.default_groups = defs;
  674 + cluster->cl_group.default_groups[0] = &ns->ns_group;
  675 + cluster->cl_group.default_groups[1] = o2hb_group;
  676 + cluster->cl_group.default_groups[2] = NULL;
  677 + rwlock_init(&cluster->cl_nodes_lock);
  678 + cluster->cl_node_ip_tree = RB_ROOT;
  679 +
  680 + ret = &cluster->cl_group;
  681 + o2nm_single_cluster = cluster;
  682 +
  683 +out:
  684 + if (ret == NULL) {
  685 + kfree(cluster);
  686 + kfree(ns);
  687 + o2hb_free_hb_set(o2hb_group);
  688 + kfree(defs);
  689 + }
  690 +
  691 + return ret;
  692 +}
  693 +
  694 +static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item)
  695 +{
  696 + struct o2nm_cluster *cluster = to_o2nm_cluster(item);
  697 + int i;
  698 + struct config_item *killme;
  699 +
  700 + BUG_ON(o2nm_single_cluster != cluster);
  701 + o2nm_single_cluster = NULL;
  702 +
  703 + for (i = 0; cluster->cl_group.default_groups[i]; i++) {
  704 + killme = &cluster->cl_group.default_groups[i]->cg_item;
  705 + cluster->cl_group.default_groups[i] = NULL;
  706 + config_item_put(killme);
  707 + }
  708 +
  709 + config_item_put(item);
  710 +}
  711 +
  712 +static struct configfs_group_operations o2nm_cluster_group_group_ops = {
  713 + .make_group = o2nm_cluster_group_make_group,
  714 + .drop_item = o2nm_cluster_group_drop_item,
  715 +};
  716 +
  717 +static struct config_item_type o2nm_cluster_group_type = {
  718 + .ct_group_ops = &o2nm_cluster_group_group_ops,
  719 + .ct_owner = THIS_MODULE,
  720 +};
  721 +
  722 +static struct o2nm_cluster_group o2nm_cluster_group = {
  723 + .cs_subsys = {
  724 + .su_group = {
  725 + .cg_item = {
  726 + .ci_namebuf = "cluster",
  727 + .ci_type = &o2nm_cluster_group_type,
  728 + },
  729 + },
  730 + },
  731 +};
  732 +
  733 +static void __exit exit_o2nm(void)
  734 +{
  735 + if (ocfs2_table_header)
  736 + unregister_sysctl_table(ocfs2_table_header);
  737 +
  738 + /* XXX sync with hb callbacks and shut down hb? */
  739 + o2net_unregister_hb_callbacks();
  740 + configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
  741 + o2cb_sys_shutdown();
  742 +
  743 + o2net_exit();
  744 +}
  745 +
  746 +static int __init init_o2nm(void)
  747 +{
  748 + int ret = -1;
  749 +
  750 + cluster_print_version();
  751 +
  752 + o2hb_init();
  753 + o2net_init();
  754 +
  755 + ocfs2_table_header = register_sysctl_table(ocfs2_root_table, 0);
  756 + if (!ocfs2_table_header) {
  757 + printk(KERN_ERR "nodemanager: unable to register sysctl\n");
  758 + ret = -ENOMEM; /* or something. */
  759 + goto out;
  760 + }
  761 +
  762 + ret = o2net_register_hb_callbacks();
  763 + if (ret)
  764 + goto out_sysctl;
  765 +
  766 + config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
  767 + init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem);
  768 + ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
  769 + if (ret) {
  770 + printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
  771 + goto out_callbacks;
  772 + }
  773 +
  774 + ret = o2cb_sys_init();
  775 + if (!ret)
  776 + goto out;
  777 +
  778 + configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
  779 +out_callbacks:
  780 + o2net_unregister_hb_callbacks();
  781 +out_sysctl:
  782 + unregister_sysctl_table(ocfs2_table_header);
  783 +out:
  784 + return ret;
  785 +}
  786 +
  787 +MODULE_AUTHOR("Oracle");
  788 +MODULE_LICENSE("GPL");
  789 +
  790 +module_init(init_o2nm)
  791 +module_exit(exit_o2nm)
fs/ocfs2/cluster/nodemanager.h
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * nodemanager.h
  5 + *
  6 + * Function prototypes
  7 + *
  8 + * Copyright (C) 2004 Oracle. All rights reserved.
  9 + *
  10 + * This program is free software; you can redistribute it and/or
  11 + * modify it under the terms of the GNU General Public
  12 + * License as published by the Free Software Foundation; either
  13 + * version 2 of the License, or (at your option) any later version.
  14 + *
  15 + * This program is distributed in the hope that it will be useful,
  16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18 + * General Public License for more details.
  19 + *
  20 + * You should have received a copy of the GNU General Public
  21 + * License along with this program; if not, write to the
  22 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 + * Boston, MA 021110-1307, USA.
  24 + *
  25 + */
  26 +
  27 +#ifndef O2CLUSTER_NODEMANAGER_H
  28 +#define O2CLUSTER_NODEMANAGER_H
  29 +
  30 +#include "ocfs2_nodemanager.h"
  31 +
  32 +/* This totally doesn't belong here. */
  33 +#include <linux/configfs.h>
  34 +#include <linux/rbtree.h>
  35 +
  36 +#define KERN_OCFS2 988
  37 +#define KERN_OCFS2_NM 1
  38 +
  39 +const char *o2nm_get_hb_ctl_path(void);
  40 +
  41 +struct o2nm_node {
  42 + spinlock_t nd_lock;
  43 + struct config_item nd_item;
  44 + char nd_name[O2NM_MAX_NAME_LEN+1]; /* replace? */
  45 + __u8 nd_num;
  46 + /* only one address per node, as attributes, for now. */
  47 + __be32 nd_ipv4_address;
  48 + __be16 nd_ipv4_port;
  49 + struct rb_node nd_ip_node;
  50 + /* there can be only one local node for now */
  51 + int nd_local;
  52 +
  53 + unsigned long nd_set_attributes;
  54 +};
  55 +
  56 +u8 o2nm_this_node(void);
  57 +
  58 +int o2nm_configured_node_map(unsigned long *map, unsigned bytes);
  59 +struct o2nm_node *o2nm_get_node_by_num(u8 node_num);
  60 +struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
  61 +void o2nm_node_get(struct o2nm_node *node);
  62 +void o2nm_node_put(struct o2nm_node *node);
  63 +
  64 +#endif /* O2CLUSTER_NODEMANAGER_H */
fs/ocfs2/cluster/ocfs2_nodemanager.h
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * ocfs2_nodemanager.h
  5 + *
  6 + * Header describing the interface between userspace and the kernel
  7 + * for the ocfs2_nodemanager module.
  8 + *
  9 + * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  10 + *
  11 + * This program is free software; you can redistribute it and/or
  12 + * modify it under the terms of the GNU General Public
  13 + * License as published by the Free Software Foundation; either
  14 + * version 2 of the License, or (at your option) any later version.
  15 + *
  16 + * This program is distributed in the hope that it will be useful,
  17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19 + * General Public License for more details.
  20 + *
  21 + * You should have received a copy of the GNU General Public
  22 + * License along with this program; if not, write to the
  23 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 + * Boston, MA 021110-1307, USA.
  25 + *
  26 + */
  27 +
  28 +#ifndef _OCFS2_NODEMANAGER_H
  29 +#define _OCFS2_NODEMANAGER_H
  30 +
  31 +#define O2NM_API_VERSION 5
  32 +
  33 +#define O2NM_MAX_NODES 255
  34 +#define O2NM_INVALID_NODE_NUM 255
  35 +
  36 +/* host name, group name, cluster name all 64 bytes */
  37 +#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
  38 +
  39 +#endif /* _OCFS2_NODEMANAGER_H */
fs/ocfs2/cluster/ver.c
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * ver.c
  5 + *
  6 + * version string
  7 + *
  8 + * Copyright (C) 2002, 2005 Oracle. All rights reserved.
  9 + *
  10 + * This program is free software; you can redistribute it and/or
  11 + * modify it under the terms of the GNU General Public
  12 + * License as published by the Free Software Foundation; either
  13 + * version 2 of the License, or (at your option) any later version.
  14 + *
  15 + * This program is distributed in the hope that it will be useful,
  16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18 + * General Public License for more details.
  19 + *
  20 + * You should have received a copy of the GNU General Public
  21 + * License along with this program; if not, write to the
  22 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 + * Boston, MA 021110-1307, USA.
  24 + */
  25 +
  26 +#include <linux/module.h>
  27 +#include <linux/kernel.h>
  28 +
  29 +#include "ver.h"
  30 +
  31 +#define CLUSTER_BUILD_VERSION "1.3.3"
  32 +
  33 +#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
  34 +
  35 +void cluster_print_version(void)
  36 +{
  37 + printk(KERN_INFO "%s\n", VERSION_STR);
  38 +}
  39 +
  40 +MODULE_DESCRIPTION(VERSION_STR);
  41 +
  42 +MODULE_VERSION(CLUSTER_BUILD_VERSION);
fs/ocfs2/cluster/ver.h
  1 +/* -*- mode: c; c-basic-offset: 8; -*-
  2 + * vim: noexpandtab sw=8 ts=8 sts=0:
  3 + *
  4 + * ver.h
  5 + *
  6 + * Function prototypes
  7 + *
  8 + * Copyright (C) 2005 Oracle. All rights reserved.
  9 + *
  10 + * This program is free software; you can redistribute it and/or
  11 + * modify it under the terms of the GNU General Public
  12 + * License as published by the Free Software Foundation; either
  13 + * version 2 of the License, or (at your option) any later version.
  14 + *
  15 + * This program is distributed in the hope that it will be useful,
  16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18 + * General Public License for more details.
  19 + *
  20 + * You should have received a copy of the GNU General Public
  21 + * License along with this program; if not, write to the
  22 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 + * Boston, MA 021110-1307, USA.
  24 + */
  25 +
  26 +#ifndef O2CLUSTER_VER_H
  27 +#define O2CLUSTER_VER_H
  28 +
  29 +void cluster_print_version(void);
  30 +
  31 +#endif /* O2CLUSTER_VER_H */