linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAID device driver
@ 2002-09-24  1:54 Larry Kessler
  2002-09-24  2:22 ` Jeff Garzik
  0 siblings, 1 reply; 206+ messages in thread
From: Larry Kessler @ 2002-09-24  1:54 UTC (permalink / raw)
  To: linux-kernel mailing list
  Cc: Alan Cox, Andrew V. Savochkin, cgl_discussion mailing list,
	evlog mailing list, ipslinux (Keith Mitchell),
	Linus Torvalds, Rusty Russell, evlog mailing list

Please see [PATCH-RFC] README 1st note.

Also note that this patch includes pci_problem.h, as does the eepro100.c
device driver patch included in the 'README 1st' note.
 
Summary of this patch...
 
 drivers/scsi/ips.c
    Device Driver for the IBM ServeRAID controller, with use of new 
    macros replacing prink() for error conditions.
 
 include/linux/scsi_problem.h
  -  scsi_host_detail() macro providing common information of interest
     for scsi-class devices.    
  -  scsi_host_problem and scsi_host_introduce macros   

 include/linux/pci_problem.h

  -  pci_detail() macro providing common information on a per class
     basis when problems are being reported for devices of that class. 
  -  pci_problem and pci_introduce macros


--- linux-2.5.37/drivers/scsi/ips.c	Fri Sep 20 10:20:13 2002
+++ linux-2.5.37-net/drivers/scsi/ips.c	Mon Sep 23 19:54:30 2002
@@ -176,6 +176,7 @@
 #include "scsi.h"
 #include "hosts.h"
 #include "ips.h"
+#include "scsi_problem.h"
 
 #include <linux/module.h>
 
@@ -694,7 +695,7 @@
       ips_FlashData = ( char * ) __get_free_pages( GFP_KERNEL, 7 );   
       if (ips_FlashData == NULL) {
          /* The validity of this pointer is checked in ips_make_passthru() before it is used */
-         printk( KERN_WARNING "ERROR: Can't Allocate Large Buffer for Flashing\n" );
+         problem( LOG_WARNING, "ERROR: Can't Allocate Large Buffer for Flashing\n" );
       }
    }                                                                               
 
@@ -779,6 +780,9 @@
  #endif
    if (ips_num_controllers > 0) 
       register_reboot_notifier(&ips_notifier);
+   else
+     problem(LOG_INFO, "Unable to detect any ips controllers\n",
+           detail(ips_num_controllers, "%d", ips_num_controllers));
 
    return (ips_num_controllers);
 #endif
@@ -861,8 +865,11 @@
 #if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0)
             if (check_mem_region(mem_addr, mem_len)) {
                /* Couldn't allocate io space */
-               printk(KERN_WARNING "(%s%d) couldn't allocate IO space %x len %d.\n",
-                      ips_name, ips_next_controller, io_addr, io_len);
+               pci_problem(LOG_WARNING, dev[i], "check_mem_region failed. Couldn't allocate IO space\n",
+                           detail(ips_name, "%s", ips_name),
+                           detail(ips_number, "%d", ips_next_controller),
+                           detail(io_addr, "%x", io_addr),
+                           detail(io_len, "%d", io_len));
 
                ips_next_controller++;
 
@@ -889,8 +896,11 @@
 
             if (check_region(io_addr, io_len)) {
                /* Couldn't allocate io space */
-               printk(KERN_WARNING "(%s%d) couldn't allocate IO space %x len %d.\n",
-                      ips_name, ips_next_controller, io_addr, io_len);
+               pci_problem(LOG_WARNING, dev[i], "check_region failed. Couldn't allocate IO space\n",
+                           detail(ips_name, "%s", ips_name),
+                           detail(ips_number, "%d", ips_next_controller),
+                           detail(io_addr, "%x", io_addr),
+                           detail(io_len, "%d", io_len));
 
                ips_next_controller++;
 
@@ -902,8 +912,9 @@
 
          /* get planer status */
          if (pci_read_config_word(dev[i], 0x04, &planer)) {
-            printk(KERN_WARNING "(%s%d) can't get planer status.\n",
-                   ips_name, ips_next_controller);
+            pci_problem(LOG_WARNING, dev[i], "Can't get planer status.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ips_next_controller));
 
             ips_next_controller++;
 
@@ -926,8 +937,9 @@
 
          /* get the revision ID */
          if (pci_read_config_byte(dev[i], PCI_REVISION_ID, &revision_id)) {
-            printk(KERN_WARNING "(%s%d) can't get revision id.\n",
-                   ips_name, ips_next_controller);
+            pci_problem(LOG_WARNING, dev[i],  "Can't get revision id.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ips_next_controller));
 
             ips_next_controller++;
 
@@ -937,8 +949,9 @@
 #if LINUX_VERSION_CODE < LinuxVersionCode(2,4,0)
          /* get the subdevice id */
          if (pci_read_config_word(dev[i], PCI_SUBSYSTEM_ID, &subdevice_id)) {
-            printk(KERN_WARNING "(%s%d) can't get subdevice id.\n",
-                   ips_name, ips_next_controller);
+            pci_problem(LOG_WARNING, dev[i],  "Can't get subdevice id.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ips_next_controller));
 
             ips_next_controller++;
 
@@ -952,13 +965,17 @@
          sh = scsi_register(SHT, sizeof(ips_ha_t));
 
          if (sh == NULL) {
-            printk(KERN_WARNING "(%s%d) Unable to register controller with SCSI subsystem - skipping controller\n",
-                   ips_name, ips_next_controller);
+            pci_problem(LOG_WARNING, dev[i],  "Unable to register controller with the SCSI subsystem - skipping controller\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ips_next_controller));
 
             ips_next_controller++;
 
             continue;
          }
+         scsi_host_introduce(sh, "controller", 
+                             detail(ips_name, "%s", ips_name),
+                             detail(ips_number, "%d", ips_next_controller));
 
          ha = IPS_HA(sh);
          memset(ha, 0, sizeof(ips_ha_t));
@@ -979,8 +996,9 @@
          ha->enq = kmalloc(sizeof(IPS_ENQ), GFP_KERNEL);
 
          if (!ha->enq) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host inquiry structure - skipping contoller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host inquiry structure - skipping contoller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -996,8 +1014,9 @@
          ha->adapt = kmalloc(sizeof(IPS_ADAPTER), GFP_KERNEL);
 
          if (!ha->adapt) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host adapt structure - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host adapt structure - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1013,8 +1032,9 @@
          ha->conf = kmalloc(sizeof(IPS_CONF), GFP_KERNEL);
 
          if (!ha->conf) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host conf structure - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host conf structure - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1030,8 +1050,9 @@
          ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), GFP_KERNEL);
 
          if (!ha->nvram) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host nvram structure - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host nvram structure - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1047,8 +1068,9 @@
          ha->subsys = kmalloc(sizeof(IPS_SUBSYS), GFP_KERNEL);
 
          if (!ha->subsys) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host subsystem structure - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host subsystem structure - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1064,8 +1086,9 @@
          ha->dummy = kmalloc(sizeof(IPS_IO_CMD), GFP_KERNEL);
 
          if (!ha->dummy) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate host dummy structure - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to allocate host dummy structure - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1086,8 +1109,9 @@
          ha->ioctl_datasize = count;
 
          if (!ha->ioctl_data) {
-            printk(KERN_WARNING "(%s%d) Unable to allocate ioctl data\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh, "Unable to allocate ioctl data\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->ioctl_data = NULL;
             ha->ioctl_order = 0;
@@ -1189,8 +1213,9 @@
                /*
                 * Initialization failed
                 */
-               printk(KERN_WARNING "(%s%d) unable to initialize controller - skipping controller\n",
-                      ips_name, ips_next_controller);
+               scsi_host_problem(LOG_WARNING, sh,  "Initialization of controller failed - skipping controller\n",
+                                 detail(ips_name, "%s", ips_name),
+                                 detail(ips_number, "%d", ips_next_controller));
 
                ha->active = 0;
                ips_free(ha);
@@ -1206,8 +1231,9 @@
 
          /* install the interrupt handler */
          if (request_irq(irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) {
-            printk(KERN_WARNING "(%s%d) unable to install interrupt handler - skipping controller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh,  "Unable to install interrupt handler - skipping controller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1226,8 +1252,9 @@
          ha->max_cmds = 1;
          if (!ips_allocatescbs(ha)) {
             /* couldn't allocate a temp SCB */
-            printk(KERN_WARNING "(%s%d) unable to allocate CCBs - skipping contoller\n",
-                   ips_name, ips_next_controller);
+            scsi_host_problem(LOG_WARNING, sh, "Unable to allocate CCBs - skipping contoller\n",
+                              detail(ips_name, "%s", ips_name),
+                              detail(ips_number, "%d", ips_next_controller));
 
             ha->active = 0;
             ips_free(ha);
@@ -1252,7 +1279,9 @@
    for (i = 0; i < ips_next_controller; i++) {
 
       if (ips_ha[i] == 0) {
-         printk(KERN_WARNING "(%s%d) ignoring bad controller\n", ips_name, i);
+         problem(LOG_WARNING, "Ignoring bad controller\n", 
+                           detail(ips_name, "%s", ips_name),
+                           detail(ips_index, "%d", i));
          continue;
       }
 
@@ -1293,8 +1322,8 @@
    for (i = 0; i < IPS_MAX_ADAPTERS && ips_sh[i] != sh; i++);
 
    if (i == IPS_MAX_ADAPTERS) {
-      printk(KERN_WARNING "(%s) release, invalid Scsi_Host pointer.\n",
-            ips_name);
+      problem(LOG_WARNING, "release fails, invalid Scsi_Host pointer.\n",
+                        detail(ips_name, "%s", ips_name));
 #if LINUX_VERSION_CODE >= LinuxVersionCode(2,4,0)
       BUG();
 #endif
@@ -1326,7 +1355,9 @@
 
    /* send command */
    if (ips_send_wait(ha, scb, ips_cmd_timeout, IPS_INTR_ON) == IPS_FAILURE)
-      printk(KERN_NOTICE "(%s%d) Incomplete Flush.\n", ips_name, ha->host_num);
+      scsi_host_problem(LOG_NOTICE, sh, "Incomplete Flush during release.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
    printk(KERN_NOTICE "(%s%d) Flushing Complete.\n", ips_name, ha->host_num);
 
@@ -1402,7 +1433,9 @@
 
       /* send command */
       if (ips_send_wait(ha, scb, ips_cmd_timeout, IPS_INTR_ON) == IPS_FAILURE)
-         printk(KERN_NOTICE "(%s%d) Incomplete Flush.\n", ips_name, ha->host_num);
+        problem(LOG_NOTICE, "Incomplete Flush during halt.\n",
+                          detail(ips_name, "%s", ips_name),
+                          detail(ips_number, "%d", ha->host_num));
       else
          printk(KERN_NOTICE "(%s%d) Flushing Complete.\n", ips_name, ha->host_num);
    }
@@ -1589,16 +1622,17 @@
     * command must have already been sent
     * reset the controller
     */
-   printk(KERN_NOTICE "(%s%d) Resetting controller.\n",
-          ips_name, ha->host_num);
+   scsi_host_problem(LOG_NOTICE, SC->host, "Resetting controller.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
    ret = (*ha->func.reset)(ha);
 
    if (!ret) {
       Scsi_Cmnd *scsi_cmd;
 
-      printk(KERN_NOTICE
-             "(%s%d) Controller reset failed - controller now offline.\n",
-             ips_name, ha->host_num);
+      scsi_host_problem(LOG_NOTICE, SC->host, "Controller reset has failed - controller now offline.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       /* Now fail all of the active commands */
       DEBUG_VAR(1, "(%s%d) Failing active commands",
@@ -1628,9 +1662,10 @@
    if (!ips_clear_adapter(ha, IPS_INTR_IORL)) {
       Scsi_Cmnd *scsi_cmd;
 
-      printk(KERN_NOTICE
-             "(%s%d) Controller reset failed - controller now offline.\n",
-             ips_name, ha->host_num);
+      scsi_host_problem(LOG_NOTICE, SC->host,
+             "Controller reset failed - controller now offline.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       /* Now fail all of the active commands */
       DEBUG_VAR(1, "(%s%d) Failing active commands",
@@ -2106,9 +2141,9 @@
          break;
 
       if (cstatus.fields.command_id > (IPS_MAX_CMDS - 1)) {
-         printk(KERN_WARNING "(%s%d) Spurious interrupt; no ccb.\n",
-                ips_name, ha->host_num);
-
+         problem(LOG_WARNING,  "Spurious interrupt; no ccb.\n",
+                           detail(ips_name, "%s", ips_name),
+                           detail(ips_number, "%d", ha->host_num));
          continue;
       }
 
@@ -2434,7 +2469,7 @@
       if(pt->CoppCP.cmd.flashfw.count + ha->flash_datasize >
         (PAGE_SIZE << ha->flash_order)){
          ips_free_flash_copperhead(ha);
-         printk(KERN_WARNING "failed size sanity check\n");
+         scsi_host_problem(LOG_WARNING, scb->scsi_cmd->host,  "failed size sanity check\n");
          return IPS_FAILURE;
       }
    }
@@ -3120,15 +3155,17 @@
    ips_ffdc_reset(ha, IPS_INTR_IORL);
 
    if (!ips_read_config(ha, IPS_INTR_IORL)) {
-      printk(KERN_WARNING "(%s%d) unable to read config from controller.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Unable to read config from controller.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    } /* end if */
 
    if (!ips_read_adapter_status(ha, IPS_INTR_IORL)) {
-      printk(KERN_WARNING "(%s%d) unable to read controller status.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Unable to read controller status.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -3137,16 +3174,18 @@
    ips_identify_controller(ha);
 
    if (!ips_read_subsystem_parameters(ha, IPS_INTR_IORL)) {
-      printk(KERN_WARNING "(%s%d) unable to read subsystem parameters.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Unable to read subsystem parameters.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
 
    /* write nvram user page 5 */
    if (!ips_write_driver_status(ha, IPS_INTR_IORL)) {
-      printk(KERN_WARNING "(%s%d) unable to write driver info to controller.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Unable to write driver info to controller.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -4028,16 +4067,18 @@
    METHOD_TRACE("ipsintr_done", 2);
 
    if (!scb) {
-      printk(KERN_WARNING "(%s%d) Spurious interrupt; scb NULL.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Spurious interrupt; scb NULL.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return ;
    }
 
    if (scb->scsi_cmd == NULL) {
       /* unexpected interrupt */
-      printk(KERN_WARNING "(%s%d) Spurious interrupt; scsi_cmd not set.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Spurious interrupt; scsi_cmd not set.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return;
    }
@@ -5459,8 +5500,11 @@
    }
 
    if (PostByte[0] < IPS_GOOD_POST_STATUS) {
-      printk(KERN_WARNING "(%s%d) reset controller fails (post status %x %x).\n",
-             ips_name, ha->host_num, PostByte[0], PostByte[1]);
+      problem(LOG_WARNING, "Reset copperhead controller fails\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num),
+                        detail(status1, "%x", PostByte[0]),
+                        detail(status2, "%x", PostByte[1]));
 
       return (0);
    }
@@ -5551,8 +5595,11 @@
    }
 
    if (PostByte[0] < IPS_GOOD_POST_STATUS) {
-      printk(KERN_WARNING "(%s%d) reset controller fails (post status %x %x).\n",
-             ips_name, ha->host_num, PostByte[0], PostByte[1]);
+      problem(LOG_WARNING, "Reset copperhead_mmio controller fails\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num),
+                        detail(status1, "%x", PostByte[0]),
+                        detail(status2, "%x", PostByte[1]));
 
       return (0);
    }
@@ -5638,8 +5685,9 @@
 
    if (i >= 45) {
       /* error occurred */
-      printk(KERN_WARNING "(%s%d) timeout waiting for post.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Timeout waiting for post.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -5651,8 +5699,10 @@
    writel(Isr, ha->mem_ptr + IPS_REG_I2O_HIR);
 
    if (Post < (IPS_GOOD_POST_STATUS << 8)) {
-      printk(KERN_WARNING "(%s%d) reset controller fails (post status %x).\n",
-             ips_name, ha->host_num, Post);
+      problem(LOG_WARNING, "Reset morpheus controller fails.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num),
+                        detail(post, "%x", Post));
 
       return (0);
    }
@@ -5670,8 +5720,9 @@
 
    if (i >= 240) {
       /* error occurred */
-      printk(KERN_WARNING "(%s%d) timeout waiting for config.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "timeout waiting for config.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -6012,10 +6063,13 @@
          if (!(val & IPS_BIT_START_STOP))
             break;
 
-         printk(KERN_WARNING "(%s%d) ips_issue val [0x%x].\n",
-                ips_name, ha->host_num, val);
-         printk(KERN_WARNING "(%s%d) ips_issue semaphore chk timeout.\n",
-                ips_name, ha->host_num);
+         problem(LOG_WARNING, "copperhead ips_issue val\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num),
+                        detail(val, "%x", val));
+         problem(LOG_WARNING, "copperhead ips_issue semaphore chk timeout.\n",
+                        detail(ips_name, "%s", ips_name),
+                           detail(ips_number, "%d", ha->host_num));
 
          IPS_HA_UNLOCK(cpu_flags);
 
@@ -6075,11 +6129,14 @@
          if (!(val & IPS_BIT_START_STOP))
             break;
 
-         printk(KERN_WARNING "(%s%d) ips_issue val [0x%x].\n",
-                ips_name, ha->host_num, val);
-         printk(KERN_WARNING "(%s%d) ips_issue semaphore chk timeout.\n",
-                ips_name, ha->host_num);
-
+         problem(LOG_WARNING, "copperhead_mmio ips_issue val\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num),
+                        detail(val, "%x", val));
+         problem(LOG_WARNING, "copperhead_mmio ips_issue semaphore chk timeout.\n",
+                        detail(ips_name, "%s", ips_name),
+                           detail(ips_number, "%d", ha->host_num));
+         
          IPS_HA_UNLOCK(cpu_flags);
 
          return (IPS_FAILURE);
@@ -6337,8 +6394,9 @@
    METHOD_TRACE("ips_write_driver_status", 1);
 
    if (!ips_readwrite_page5(ha, FALSE, intr)) {
-      printk(KERN_WARNING "(%s%d) unable to read NVRAM page 5.\n",
-             ips_name, ha->host_num);
+      problem(LOG_WARNING, "Unable to read NVRAM page 5.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -6374,8 +6432,9 @@
 
    /* now update the page */
    if (!ips_readwrite_page5(ha, TRUE, intr)) {
-      printk(KERN_WARNING "(%s%d) unable to write NVRAM page 5.\n",
-             ips_name, ha->host_num);
+     problem(LOG_WARNING, "Unable to update NVRAM page 5.\n",
+                        detail(ips_name, "%s", ips_name),
+                        detail(ips_number, "%d", ha->host_num));
 
       return (0);
    }
@@ -7321,16 +7380,22 @@
  if  (strncmp(FirmwareVersion, Compatable[ ha->nvram->adapter_type ], IPS_COMPAT_ID_LENGTH) != 0)
  {
      if (ips_cd_boot == 0)                                                                              
-       printk(KERN_WARNING "Warning: Adapter %d Firmware Compatible Version is %s, but should be %s\n", 
-              ha->host_num, FirmwareVersion, Compatable[ ha->nvram->adapter_type ]);                    
+       problem(LOG_WARNING, "Warning: Firmware Version mismatch\n", 
+                         detail(ips_name, "%s", ips_name),
+                         detail(ips_number, "%d", ha->host_num),
+                         detail(firmwareversion, "%s", FirmwareVersion),
+                         detail(compatableversion, "%s", Compatable[ ha->nvram->adapter_type ]));
      MatchError = 1;
  }
 
  if  (strncmp(BiosVersion, IPS_COMPAT_BIOS, IPS_COMPAT_ID_LENGTH) != 0)
  {
      if (ips_cd_boot == 0)                                                                          
-       printk(KERN_WARNING "Warning: Adapter %d BIOS Compatible Version is %s, but should be %s\n", 
-              ha->host_num, BiosVersion, IPS_COMPAT_BIOS);                                          
+       problem(LOG_WARNING, "Warning: BIOS Version mismatch\n", 
+                         detail(ips_name, "%s", ips_name),
+                         detail(ips_number, "%d", ha->host_num),
+                         detail(biosversion, "%s", BiosVersion),
+                         detail(compatableversion, "%s", IPS_COMPAT_BIOS));
      MatchError = 1;
  }
 
@@ -7340,7 +7405,7 @@
  {
      ha->nvram->version_mismatch = 1;
      if (ips_cd_boot == 0)                                               
-       printk(KERN_WARNING "Warning ! ! ! ServeRAID Version Mismatch\n");
+       problem(LOG_WARNING, "Warning ! ! ! ServeRAID Version Mismatch\n");
  }
  else
  {
@@ -7524,7 +7589,9 @@
        uint32_t offs;
 
        if (check_mem_region(mem_addr, mem_len)) {
-          printk(KERN_WARNING "Couldn't allocate IO Memory space %x len %d.\n", mem_addr, mem_len);
+          pci_problem(LOG_WARNING, pci_dev,  "Couldn't allocate IO Memory space\n", 
+                      detail(mem_addr, "%x", mem_addr),
+                      detail(mem_len, "%d", mem_len));
           return -1;
           }
 
@@ -7541,7 +7608,9 @@
     /* setup I/O mapped area (if applicable) */
     if (io_addr) {
        if (check_region(io_addr, io_len)) {
-          printk(KERN_WARNING "Couldn't allocate IO space %x len %d.\n", io_addr, io_len);
+          pci_problem(LOG_WARNING, pci_dev,  "Couldn't allocate IO space\n", 
+                      detail(mem_addr, "%x", mem_addr),
+                      detail(mem_len, "%d", mem_len));
           return -1;
        }
        request_region(io_addr, io_len, "ips");
@@ -7549,7 +7618,7 @@
 
     /* get the revision ID */
     if (pci_read_config_byte(pci_dev, PCI_REVISION_ID, &revision_id)) {
-       printk(KERN_WARNING "Can't get revision id.\n" );
+       pci_problem(LOG_WARNING, pci_dev, "Can't get the revision id.\n" );
        return -1;
     }
 
@@ -7562,10 +7631,12 @@
     scsi_set_pci_device(sh, pci_dev);
 #endif
     if (sh == NULL) {
-       printk(KERN_WARNING "Unable to register controller with SCSI subsystem\n" );
+      pci_problem(LOG_WARNING, pci_dev, "Unable to register controller with SCSI subsystem. Failing init.\n",
+                        detail(name, "%s", driver_template.name));
        return -1;
     }
 
+    scsi_host_introduce(sh, "adapter");
     ha = IPS_HA(sh);
     memset(ha, 0, sizeof(ips_ha_t));
     
@@ -7584,7 +7655,7 @@
     ha->enq = kmalloc(sizeof(IPS_ENQ), GFP_KERNEL);
 
     if (!ha->enq) {
-       printk(KERN_WARNING "Unable to allocate host inquiry structure\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate host inquiry structure\n" );
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7596,7 +7667,7 @@
     ha->adapt = pci_alloc_consistent(ha->pcidev, sizeof(IPS_ADAPTER) +
                                      sizeof(IPS_IO_CMD), &dma_address);
     if (!ha->adapt) {
-       printk(KERN_WARNING "Unable to allocate host adapt & dummy structures\n");
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate host adapt & dummy structures\n");
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7610,7 +7681,7 @@
     ha->conf = kmalloc(sizeof(IPS_CONF), GFP_KERNEL);
 
     if (!ha->conf) {
-       printk(KERN_WARNING "Unable to allocate host conf structure\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate host conf structure\n" );
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7622,7 +7693,7 @@
     ha->nvram = kmalloc(sizeof(IPS_NVRAM_P5), GFP_KERNEL);
 
     if (!ha->nvram) {
-       printk(KERN_WARNING "Unable to allocate host NVRAM structure\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate host NVRAM structure\n" );
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7634,7 +7705,7 @@
     ha->subsys = kmalloc(sizeof(IPS_SUBSYS), GFP_KERNEL);
 
     if (!ha->subsys) {
-       printk(KERN_WARNING "Unable to allocate host subsystem structure\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate host subsystem structure\n" );
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7651,7 +7722,7 @@
     ha->ioctl_datasize = count;
 
     if (!ha->ioctl_data) {
-       printk(KERN_WARNING "Unable to allocate IOCTL data\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate IOCTL data. Disabling ioctls.\n" );
        ha->ioctl_data = NULL;
        ha->ioctl_order = 0;
        ha->ioctl_datasize = 0;
@@ -7748,7 +7819,7 @@
           /*
            * Initialization failed
            */
-          printk(KERN_WARNING "Unable to initialize controller\n" );
+          scsi_host_problem(LOG_WARNING, sh, "Unable to initialize controller. Initialization failed.\n" );
           ha->active = 0;
           ips_free(ha);
           scsi_unregister(sh);
@@ -7760,7 +7831,7 @@
 
     /* Install the interrupt handler */
      if (request_irq(irq, do_ipsintr, SA_SHIRQ, ips_name, ha)) {
-       printk(KERN_WARNING "Unable to install interrupt handler\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to install interrupt handler\n" );
        ha->active = 0;
        ips_free(ha);
        scsi_unregister(sh);
@@ -7774,7 +7845,7 @@
      */
     ha->max_cmds = 1;
     if (!ips_allocatescbs(ha)) {
-       printk(KERN_WARNING "Unable to allocate a CCB\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate a CCB\n" );
        ha->active = 0;
        free_irq(ha->irq, ha);
        ips_free(ha);
@@ -7816,7 +7887,7 @@
     }
 
     if (!ips_hainit(ha)) {
-       printk(KERN_WARNING "Unable to initialize controller\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to initialize controller\n" );
        ha->active = 0;
        ips_free(ha);
        free_irq(ha->irq, ha);
@@ -7830,7 +7901,7 @@
 
     /* allocate CCBs */
     if (!ips_allocatescbs(ha)) {
-       printk(KERN_WARNING "Unable to allocate CCBs\n" );
+       scsi_host_problem(LOG_WARNING, sh, "Unable to allocate CCBs\n" );
        ha->active = 0;
        ips_free(ha);
        free_irq(ha->irq, ha);
--- linux-2.5.37/drivers/scsi/scsi_problem.h	Wed Dec 31 18:00:00 1969
+++ linux-2.5.37-net/drivers/scsi/scsi_problem.h	Mon Sep 23 19:56:37 2002
@@ -0,0 +1,64 @@
+/*
+ * Linux Event Logging for the Enterprise
+ * Copyright (c) International Business Machines Corp., 2002
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *  Please send e-mail to lkessler@users.sourceforge.net if you have
+ *  questions or comments.
+ *
+ *  Project Website:  http://evlog.sourceforge.net/
+ *
+ */
+
+#ifndef _SCSI_PROBLEM_H
+#define _SCSI_PROBLEM_H
+
+#include "scsi.h"
+#include "hosts.h"
+#include <linux/pci_problem.h>
+#include <linux/problem.h>
+
+
+#define scsi_host_detail(dev) \
+	detail(scsi_hostno, "%d", (dev)->host_no), \
+        detail(name, "%s", (dev)->hostt->name)
+
+/* This macro could conditionally provide detail() based on
+ * the value of dev, but one problem() per line restriction 
+ * has to be resolved first (just do a printk
+ * to warn the developer if they have used this macro without 
+ * valid args) */
+/* DD writers should use for pci based scsi adapter drivers */
+#define scsi_host_problem(sev, dev, string,...) \
+do { \
+   if (dev)  \
+     problem(sev, string, scsi_host_detail((struct Scsi_Host*)dev), ## __VA_ARGS__); \
+   else  { \
+     if (!dev) \
+       printk("scsi_problem. Invalid usage struct Scsi_Host * is NULL\n"); \
+   } \
+} while (0)
+
+
+/* Since this ultimately resolves to the problem() macro where the
+ * string provided must be unique, a string comment argument is added
+ * to allow multiple introduces to occur from within the same file
+ */
+#define scsi_host_introduce(dev, comment, ...) \
+   introduce(__stringify(KBUILD_MODNAME) " introduces Scsi Adapter: " comment, dev, ## __VA_ARGS__, scsi_host_detail(dev))
+
+#endif	/* _SCSI_PROBLEM_H */
--- linux-2.5.37/drivers/include/linux/pci_problem.h	Wed Dec 31 18:00:00 1969
+++ linux-2.5.37-net/include/linux/pci_problem.h	Mon Sep 23 19:56:11 2002
@@ -0,0 +1,52 @@
+/*
+ * Linux Event Logging for the Enterprise
+ * Copyright (c) International Business Machines Corp., 2002
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *  Please send e-mail to lkessler@users.sourceforge.net if you have
+ *  questions or comments.
+ *
+ *  Project Website:  http://evlog.sourceforge.net/
+ *
+ */
+
+#ifndef _PCI_PROBLEM_H
+#define _PCI_PROBLEM_H
+
+#include <linux/problem.h>
+
+#define pci_detail(pdev) \
+	detail(pci_name, "%s", (pdev)->name), \
+	detail(pci_slot, "%s", (pdev)->slot_name), \
+	detail(pci_vendorid, "%x", (pdev)->vendor), \
+	detail(pci_deviceid, "%x", (pdev)->device), \
+	detail(pci_dev_addr, "%p", (pdev))
+
+#define pci_problem(sev, pdev, string,...) \
+do { \
+  if (pdev)  \
+    problem(sev, string, pci_detail((struct pci_dev *)pdev), ## __VA_ARGS__); \
+  else       \
+    printk("pci_problem. Invalid usage struct pci_dev * is NULL\n"); \
+} while (0)
+
+static inline void pci_introduce(struct pci_dev *pdev) {
+	introduce(__stringify(KBUILD_MODNAME) "introduces pci device: ", pdev, pci_detail(pdev));
+}
+
+
+#endif	/* _PCI_PROBLEM_H */

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAID device driver
  2002-09-24  1:54 [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAID device driver Larry Kessler
@ 2002-09-24  2:22 ` Jeff Garzik
  2002-09-26 15:52   ` Alan Cox
  0 siblings, 1 reply; 206+ messages in thread
From: Jeff Garzik @ 2002-09-24  2:22 UTC (permalink / raw)
  To: Larry Kessler
  Cc: linux-kernel mailing list, Alan Cox, Andrew V. Savochkin,
	cgl_discussion mailing list, evlog mailing list,
	ipslinux (Keith Mitchell),
	Linus Torvalds, Rusty Russell

Larry Kessler wrote:
> Please see [PATCH-RFC] README 1st note.
> 
> Also note that this patch includes pci_problem.h, as does the eepro100.c
> device driver patch included in the 'README 1st' note.
>  
> Summary of this patch...
>  
>  drivers/scsi/ips.c
>     Device Driver for the IBM ServeRAID controller, with use of new 
>     macros replacing prink() for error conditions.
>  
>  include/linux/scsi_problem.h
>   -  scsi_host_detail() macro providing common information of interest
>      for scsi-class devices.    
>   -  scsi_host_problem and scsi_host_introduce macros   
> 
>  include/linux/pci_problem.h
> 
>   -  pci_detail() macro providing common information on a per class
>      basis when problems are being reported for devices of that class. 
>   -  pci_problem and pci_introduce macros

Bloat, bloat, and more bloat.  This API is not scalable at all, if we 
have to add a new header and new foo_problem() macros for every little 
subsystem in the kernel.

If you actually want to standardize some diagnostic messages, it is a 
huge mistake [as your scsi driver example shows] to continue to use 
random text strings followed by a typed attribute list.  If you really 
wanted to standardize logging, why continue to allow driver authors to 
printk driver-specific text strings in lieu of a standard string that 
applies to the same situation in N drivers.

I do encourage the clean-up of drivers logging and can see the utility 
of it, but you are really using a sledgehammer to drive in a carpet nail 
here...

	Jeff




^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAID device driver
  2002-09-24  2:22 ` Jeff Garzik
@ 2002-09-26 15:52   ` Alan Cox
  2002-09-26 22:55     ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Larry Kessler
  0 siblings, 1 reply; 206+ messages in thread
From: Alan Cox @ 2002-09-26 15:52 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Larry Kessler, linux-kernel mailing list, Andrew V. Savochkin,
	cgl_discussion mailing list, evlog mailing list,
	ipslinux (Keith  Mitchell),
	Linus Torvalds, Rusty Russell

On Tue, 2002-09-24 at 03:22, Jeff Garzik wrote:
> If you actually want to standardize some diagnostic messages, it is a 
> huge mistake [as your scsi driver example shows] to continue to use 
> random text strings followed by a typed attribute list.  If you really 
> wanted to standardize logging, why continue to allow driver authors to 
> printk driver-specific text strings in lieu of a standard string that 
> applies to the same situation in N drivers.

A lot of it can be tidied up by very very few changes that can be done
over time and make the job easier. Why not just start with

	dev_printk(dev, KERN_ERR "Exploded mysteriously");

and a few notification type things people can add eg

	dev_failed(dev);
	dev_offline(dev);

much like we keep network status. That lets driverfs tell the decision
making code in hotplug scripts the state of play and lets it figure out
how to reassign resources, paper over cracks, phone the engineer.

Alan


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice  driver
  2002-09-26 15:52   ` Alan Cox
@ 2002-09-26 22:55     ` Larry Kessler
  2002-09-26 22:58       ` Jeff Garzik
  2002-09-27 11:32       ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Alan Cox
  0 siblings, 2 replies; 206+ messages in thread
From: Larry Kessler @ 2002-09-26 22:55 UTC (permalink / raw)
  To: Alan Cox
  Cc: Jeff Garzik, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Linus Torvalds, Richard J Moore

Alan Cox wrote:
> 
> A lot of it can be tidied up by very very few changes that can be done
> over time and make the job easier. Why not just start with
> 
>         dev_printk(dev, KERN_ERR "Exploded mysteriously");
> 
> and a few notification type things people can add eg
> 
>         dev_failed(dev);
>         dev_offline(dev);
> 
> much like we keep network status. That lets driverfs tell the decision
> making code in hotplug scripts the state of play and lets it figure out
> how to reassign resources, paper over cracks, phone the engineer.

Alan-
At the risk of reading more into your suggestion than you intended...
Are you supportive of adding infrastructure into the kernel that 
provides, conceptually at least, the sort of things that Rusty and
I (and others) are after ?

Specifically...

Provide a reasonable and printk-like interface (like you've
shown above), that writes to printk if advanced logging is not 
configured; but, if advanced logging is configured... 
 
1)During the build process, static event details (strings,
  format specifiers, file and function name, line no)
  are stored in a .log section in .o files, so that a user-mode
  utility can extract-it into user-space templates.
2)During runtime, the printk-like interface writes the dynamic data
  into an in-kernel buffer (NOT the printk ring buffer), and a 
  user-space daemon reads the event and writes to a logfile.
3)Advanced logging utilities apply the templates from step (1)     
  when events are read from the logfile for querying and displaying
  events, event notification, and log management. Templates can be
  modified to control how data is displayed (in what language, for
  example).

Mindful  that....

1) It will take time for device drivers to migrate to a new interface
2) It will take time for exploitation of the template approach 
3) we should avoid modifying current printk behavior 
4) advanced logging must be an optional feature to avoid the overhead
   where its not wanted or needed 
5) User-space utilities already exist (evlog.sourceforge.net)

and of course, mindful that the 2.5 window is closing in 1 month.
  
Thanks,
Larry Kessler

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-26 22:55     ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Larry Kessler
@ 2002-09-26 22:58       ` Jeff Garzik
  2002-09-26 23:07         ` Linus Torvalds
  2002-09-27 11:32       ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Alan Cox
  1 sibling, 1 reply; 206+ messages in thread
From: Jeff Garzik @ 2002-09-26 22:58 UTC (permalink / raw)
  To: Larry Kessler
  Cc: Alan Cox, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Linus Torvalds, Richard J Moore

Larry Kessler wrote:
> and of course, mindful that the 2.5 window is closing in 1 month.


no need to be mindful of that.

Let's get it right, rather than rush it...



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-26 22:58       ` Jeff Garzik
@ 2002-09-26 23:07         ` Linus Torvalds
  2002-09-27  2:27           ` Jeff Garzik
  0 siblings, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-09-26 23:07 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore


On Thu, 26 Sep 2002, Jeff Garzik wrote:
> 
> no need to be mindful of that.
> 
> Let's get it right, rather than rush it...

Which imples that it's 2.7 material.

For 2.6.x I care about getting the drivers _working_.

The whole logging discussion with hardened drivers etc is _not_ adding
value to normal people until much much later, and it sound very much to me
like one of those patch sets that some vendors will care about deeply
because they have some big company that cares and pays them.

Those kinds of patch-sets sometimes never make it into the standard 
kernel. They have to prove their worth to real people first, and I could 
care less (but not much) about paperwork reasons.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-26 23:07         ` Linus Torvalds
@ 2002-09-27  2:27           ` Jeff Garzik
  2002-09-27  4:45             ` Linus Torvalds
  0 siblings, 1 reply; 206+ messages in thread
From: Jeff Garzik @ 2002-09-27  2:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore

Linus Torvalds wrote:
> For 2.6.x I care about getting the drivers _working_.

Tangent question, is it definitely to be named 2.6?

Maybe it's just my impression from development speed, but it felt more 
like a 3.0 to me :)

	Jeff





^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-27  2:27           ` Jeff Garzik
@ 2002-09-27  4:45             ` Linus Torvalds
  2002-09-28  7:46               ` Ingo Molnar
  2002-10-03 15:51               ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) jbradford
  0 siblings, 2 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-09-27  4:45 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore


On Thu, 26 Sep 2002, Jeff Garzik wrote:
>
> Linus Torvalds wrote:
> > For 2.6.x I care about getting the drivers _working_.
> 
> Tangent question, is it definitely to be named 2.6?

I see no real reason to call it 3.0.

The order-of-magnitude threading improvements might just come closest to
being a "new thing", but yeah, I still consider it 2.6.x. We don't have 
new architectures or other really fundamental stuff. In many ways the jump 
from 2.2 -> 2.4 was bigger than the 2.4 -> 2.6 thing will be, I suspect.

But hey, it's just a number.  I don't feel that strongly either way. I 
think version number inflation (can anybody say "distribution makers"?) is 
a bit silly, and the way the kernel numbering works there is no reason to 
bump the major number for regular releases.

			Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice  driver
  2002-09-26 22:55     ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Larry Kessler
  2002-09-26 22:58       ` Jeff Garzik
@ 2002-09-27 11:32       ` Alan Cox
  1 sibling, 0 replies; 206+ messages in thread
From: Alan Cox @ 2002-09-27 11:32 UTC (permalink / raw)
  To: Larry Kessler
  Cc: Jeff Garzik, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Linus Torvalds, Richard J Moore

On Thu, 2002-09-26 at 23:55, Larry Kessler wrote:
> At the risk of reading more into your suggestion than you intended...
> Are you supportive of adding infrastructure into the kernel that 
> provides, conceptually at least, the sort of things that Rusty and
> I (and others) are after ?

Sort of. We have a problem about consistently reporting which device. So
dev_printk(dev, ...) is printk that formats up the device info for you.
Its also easy to use and happens to pass a device pointer into the
places you want it for more detailed logging

> Provide a reasonable and printk-like interface (like you've
> shown above), that writes to printk if advanced logging is not 
> configured; but, if advanced logging is configured... 

I'm trying to make sure the right data is available. I don't *care* what
you do with it after it gets thrown at you. If I have to care what you
are doing with the data the interface is wrong.


> 1) It will take time for device drivers to migrate to a new interface

Who cares. Migrate the devices you care about one at a time, test them
and worry about just those. Do you need 120 highly available network
card drivers. Do you need telco grade soundblaster 16 ?

> 3) we should avoid modifying current printk behavior 

We don't. We add an extra helper that builds on it in a totally logical
fashion. The existing one doesnt break, its merely something to be
polished when needed by the folks who care

> 4) advanced logging must be an optional feature to avoid the overhead
>    where its not wanted or needed 

And dev_printk is going to be under 1K. What you do with the data isnt
my problem.


> 5) User-space utilities already exist (evlog.sourceforge.net)

Again, this is about what you do with the data for your cases.
dev_printk is about making the info available cleanly

> and of course, mindful that the 2.5 window is closing in 1 month.

For core code changes for 2.6 base Linus tree. So you end up with a set
of patches you add over time. I would note however that the default
dev_printk() routine that just reformats up as

<level>%s: message

is not exactly taxing to get into 2.5 before October 31st, being about
10 lines long. That gives you the infrastructure to know what is going
on. Similarly I don't think its infeasible to get the state interface
into the base kernel just flipping flags in the device structure.

That makes it easy to add the needed pieces to base kernel code during
the driver work after Oct 31st, but without having to import all the
event logging stuff which wants hammering out over a longer period of
time


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-27  4:45             ` Linus Torvalds
@ 2002-09-28  7:46               ` Ingo Molnar
  2002-09-28  9:16                 ` jw schultz
                                   ` (2 more replies)
  2002-10-03 15:51               ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) jbradford
  1 sibling, 3 replies; 206+ messages in thread
From: Ingo Molnar @ 2002-09-28  7:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Jeff Garzik, Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore


On Thu, 26 Sep 2002, Linus Torvalds wrote:
> On Thu, 26 Sep 2002, Jeff Garzik wrote:
> > Tangent question, is it definitely to be named 2.6?
> 
> I see no real reason to call it 3.0.
> 
> The order-of-magnitude threading improvements might just come closest to
> being a "new thing", but yeah, I still consider it 2.6.x. We don't have
> new architectures or other really fundamental stuff. In many ways the
> jump from 2.2 -> 2.4 was bigger than the 2.4 -> 2.6 thing will be, I
> suspect.

i consider the VM and IO improvements one of the most important things
that happened in the past 5 years - and it's definitely something that
users will notice. Finally we have a top-notch VM and IO subsystem (in
addition to the already world-class networking subsystem) giving
significant improvements both on the desktop and the server - the jump
from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.

I think due to these improvements if we dont call the next kernel 3.0 then
probably no Linux kernel in the future will deserve a major number. In 2-4
years we'll only jump to 3.0 because there's no better number available
after 2.8. That i consider to be ... boring :) [while kernel releases are
supposed to be a bit boring, i dont think they should be _that_ boring.]

	Ingo


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-28  7:46               ` Ingo Molnar
@ 2002-09-28  9:16                 ` jw schultz
  2002-09-30 14:05                   ` Denis Vlasenko
  2002-09-28 15:40                 ` Kernel version [Was: Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver] Horst von Brand
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
  2 siblings, 1 reply; 206+ messages in thread
From: jw schultz @ 2002-09-28  9:16 UTC (permalink / raw)
  To: linux-kernel mailing list

On Sat, Sep 28, 2002 at 09:46:35AM +0200, Ingo Molnar wrote:
> 
> On Thu, 26 Sep 2002, Linus Torvalds wrote:
> > On Thu, 26 Sep 2002, Jeff Garzik wrote:
> > > Tangent question, is it definitely to be named 2.6?
> > 
> > I see no real reason to call it 3.0.
> > 
> > The order-of-magnitude threading improvements might just come closest to
> > being a "new thing", but yeah, I still consider it 2.6.x. We don't have
> > new architectures or other really fundamental stuff. In many ways the
> > jump from 2.2 -> 2.4 was bigger than the 2.4 -> 2.6 thing will be, I
> > suspect.
> 
> i consider the VM and IO improvements one of the most important things
> that happened in the past 5 years - and it's definitely something that
> users will notice. Finally we have a top-notch VM and IO subsystem (in
> addition to the already world-class networking subsystem) giving
> significant improvements both on the desktop and the server - the jump
> from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.
> 
> I think due to these improvements if we dont call the next kernel 3.0 then
> probably no Linux kernel in the future will deserve a major number. In 2-4
> years we'll only jump to 3.0 because there's no better number available
> after 2.8. That i consider to be ... boring :) [while kernel releases are
> supposed to be a bit boring, i dont think they should be _that_ boring.]
> 

Ingo, I agree with Linus.  My recollection of when we moved
to 2.0 was that the major number reflected the user<->kernel
ABI.  I have no problem with a version 2.42 if things stay
stable that long.   I hope they don't but that is another
issue.

Version 3.0 implies incompatibility with binaries from 2.x
The distributions can play around with version numbers
reflecting the GUI interface, libraries or installers but
the kernel major version should stay the same until binary
compatibility is broken.  When we move old syscalls (such as
32 bit file ops) from deprecated to unsupported is when we
increment the major number.

It may be that 2.7 will see the cruft cut out and be the end
of 2.x but 2.5 isn't that.  So far 2.5 is performance
enhancement.  Terrific performance enhancement, thanks to you
and many others.  But it isn't adding major new features nor
is it removing old interfaces.  In many ways 2.6 looks like
a sign that the 2.x kernel is getting mature.  2.6 means
users can expect improvements but don't have to make big changes.
2.6 is an upgrade, 3.0 would be a replacement.


-- 
________________________________________________________________
	J.W. Schultz            Pegasystems Technologies
	email address:		jw@pegasys.ws

		Remember Cernan and Schmitt

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Kernel version [Was: Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver]
  2002-09-28  7:46               ` Ingo Molnar
  2002-09-28  9:16                 ` jw schultz
@ 2002-09-28 15:40                 ` Horst von Brand
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
  2 siblings, 0 replies; 206+ messages in thread
From: Horst von Brand @ 2002-09-28 15:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel

Ingo Molnar <mingo@elte.hu> said:
> On Thu, 26 Sep 2002, Linus Torvalds wrote:
> > On Thu, 26 Sep 2002, Jeff Garzik wrote:
> > > Tangent question, is it definitely to be named 2.6?
> > 
> > I see no real reason to call it 3.0.
> > 
> > The order-of-magnitude threading improvements might just come closest to
> > being a "new thing", but yeah, I still consider it 2.6.x. We don't have
> > new architectures or other really fundamental stuff. In many ways the
> > jump from 2.2 -> 2.4 was bigger than the 2.4 -> 2.6 thing will be, I
> > suspect.
> 
> i consider the VM and IO improvements one of the most important things
> that happened in the past 5 years - and it's definitely something that
> users will notice. Finally we have a top-notch VM and IO subsystem (in
> addition to the already world-class networking subsystem) giving
> significant improvements both on the desktop and the server - the jump
> from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.

But is is as large as the jump from 1.2.x to 2.0.x?

> I think due to these improvements if we dont call the next kernel 3.0 then
> probably no Linux kernel in the future will deserve a major number. In 2-4
> years we'll only jump to 3.0 because there's no better number available
> after 2.8. That i consider to be ... boring :) [while kernel releases are
> supposed to be a bit boring, i dont think they should be _that_ boring.]

What is wrong with 2.10, or 2.256 for that matter?
-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-28  7:46               ` Ingo Molnar
  2002-09-28  9:16                 ` jw schultz
  2002-09-28 15:40                 ` Kernel version [Was: Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver] Horst von Brand
@ 2002-09-29  1:31                 ` Linus Torvalds
  2002-09-29  6:14                   ` james
                                     ` (4 more replies)
  2 siblings, 5 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-09-29  1:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jeff Garzik, Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore


On Sat, 28 Sep 2002, Ingo Molnar wrote:
> 
> i consider the VM and IO improvements one of the most important things
> that happened in the past 5 years - and it's definitely something that
> users will notice. Finally we have a top-notch VM and IO subsystem (in
> addition to the already world-class networking subsystem) giving
> significant improvements both on the desktop and the server - the jump
> from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.

Hey, _if_ people actually are universally happy with the VM in the current
2.5.x tree, I'll happily call the dang thing 5.0 or whatever (just
kidding, but yeah, that would be a good enough reason to bump the major
number).

However, I'll believe that when I see it. Usually people don't complain 
during a development kernel, because they think they shouldn't, and then 
when it becomes stable (ie when the version number changes) they are 
surprised that the behabviour didn't magically improve, and _then_ we get 
tons of complaints about how bad the VM is under their load.

Am I hapyy with current 2.5.x?  Sure. Are others? Apparently. But does 
that mean that we have a top-notch VM and we should bump the major number? 
I wish.

The block IO cleanups are important, and that was the major thing _I_ 
personally wanted from the 2.5.x tree when it was opened. I agree with you 
there. But I don't think they are major-number-material.

Anyway, people who are having VM trouble with the current 2.5.x series, 
please _complain_, and tell what your workload is. Don't sit silent and 
make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
thing.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
@ 2002-09-29  6:14                   ` james
  2002-09-29  6:55                     ` Andre Hedrick
                                       ` (4 more replies)
  2002-09-29  7:16                   ` jbradford
                                     ` (3 subsequent siblings)
  4 siblings, 5 replies; 206+ messages in thread
From: james @ 2002-09-29  6:14 UTC (permalink / raw)
  To: Linus Torvalds, Ingo Molnar
  Cc: Jeff Garzik, Larry Kessler, Alan Cox, linux-kernel mailing list,
	Andrew V. Savochkin, Rusty Russell, Richard J Moore

On Saturday 28 September 2002 08:31 pm, Linus Torvalds wrote:
> On Sat, 28 Sep 2002, Ingo Molnar wrote:
> > i consider the VM and IO improvements one of the most important things
> > that happened in the past 5 years - and it's definitely something that
> > users will notice. Finally we have a top-notch VM and IO subsystem (in
> > addition to the already world-class networking subsystem) giving
> > significant improvements both on the desktop and the server - the jump
> > from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.
>
> Hey, _if_ people actually are universally happy with the VM in the current
> 2.5.x tree, I'll happily call the dang thing 5.0 or whatever (just
> kidding, but yeah, that would be a good enough reason to bump the major
> number).
>
> However, I'll believe that when I see it. Usually people don't complain
> during a development kernel, because they think they shouldn't, and then
> when it becomes stable (ie when the version number changes) they are
> surprised that the behabviour didn't magically improve, and _then_ we get
> tons of complaints about how bad the VM is under their load.
>
> Am I hapyy with current 2.5.x?  Sure. Are others? Apparently. But does
> that mean that we have a top-notch VM and we should bump the major number?
> I wish.
>
> The block IO cleanups are important, and that was the major thing _I_
> personally wanted from the 2.5.x tree when it was opened. I agree with you
> there. But I don't think they are major-number-material.
>
> Anyway, people who are having VM trouble with the current 2.5.x series,
> please _complain_, and tell what your workload is. Don't sit silent and
> make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x
> thing.
>
How many people are sitting on the sidelines waiting for guarantee that ide is 
not going to blow up on our filesystems and take our data with it. Guarantee 
that ide is working and not dangerous to our data, then I bet a lot more 
people will come back and bang on 2.5. 

I know this whole ide mess have taken me away from the devolemental series. 
And I bet a lot of others. 

My vote for reason to advance to v3.0 would be more based on our filesystems 
surport. .i.e. XFS and the latest Reiserfs and redoing our middle layer, 
.i.e. treating a cdrw as another drive instead of an ide-scsi device and 
ridding us of  /dev/[hs][dg][a=z] and replacing it with a lot saner 
replacement (I know this talked about it, don't know if it has been or will 
be implemented.)   Along with the changes others have mentioned, but I really 
can't judge those because I have not used 2.5 lately for reasons stated 
above. 

Sincerly 
 
James




> 		Linus
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  6:14                   ` james
@ 2002-09-29  6:55                     ` Andre Hedrick
  2002-09-29 12:59                     ` Gerhard Mack
                                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 206+ messages in thread
From: Andre Hedrick @ 2002-09-29  6:55 UTC (permalink / raw)
  To: james
  Cc: Linus Torvalds, Ingo Molnar, Jeff Garzik, Larry Kessler,
	Alan Cox, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore

On Sun, 29 Sep 2002, james wrote:

> How many people are sitting on the sidelines waiting for guarantee that ide is 
> not going to blow up on our filesystems and take our data with it. Guarantee 
> that ide is working and not dangerous to our data, then I bet a lot more 
> people will come back and bang on 2.5. 
> 
> I know this whole ide mess have taken me away from the devolemental series. 
> And I bet a lot of others. 

Your points are noted and taken, and once AC and I bang out the details in
2.4-ac series they are easily brought forward.  I am staying off 2.5
until I can ramp back up the learning curve on the changing API's.

I really do not want to go in and change what Jens has port forwarded
until I have a complete grasp again.  There are no more major changes at
this point and only delta's as needed to constrain concerns.

The only change could be the addition of SATA II support as soon as I
receive the WG's documents.

Cheers,

Andre Hedrick
Linux Serial ATA Solutions
LAD Storage Consulting Group


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
  2002-09-29  6:14                   ` james
@ 2002-09-29  7:16                   ` jbradford
  2002-09-29  8:08                     ` Jeff Garzik
                                       ` (3 more replies)
  2002-09-29  9:15                   ` v2.6 vs v3.0 Jens Axboe
                                     ` (2 subsequent siblings)
  4 siblings, 4 replies; 206+ messages in thread
From: jbradford @ 2002-09-29  7:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jdickens, torvalds, mingo, jgarzik, kessler, alan, linux-kernel,
	saw, rusty, richardj_moore, andre

> The block IO cleanups are important, and that was the major thing _I_ 
> personally wanted from the 2.5.x tree when it was opened. I agree with you 
> there. But I don't think they are major-number-material.

I'd definitely have voted for stable IPV6 being a 3.0.x requirement, but I guess it's a bit late now :-/

> Anyway, people who are having VM trouble with the current 2.5.x series, 
> please _complain_, and tell what your workload is. Don't sit silent and 
> make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> thing.

I think the broken IDE in 2.5.x has meant that it got seriously less testing overall than previous development trees :-(.  Maybe after halloween when it stabilises a bit more we'll get more reports in.

John

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  7:16                   ` jbradford
@ 2002-09-29  8:08                     ` Jeff Garzik
  2002-09-29  8:17                     ` David S. Miller
                                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 206+ messages in thread
From: Jeff Garzik @ 2002-09-29  8:08 UTC (permalink / raw)
  To: jbradford
  Cc: Linus Torvalds, jdickens, mingo, kessler, alan, linux-kernel,
	saw, rusty, richardj_moore, andre

jbradford@dial.pipex.com wrote:
>>The block IO cleanups are important, and that was the major thing _I_ 
>>personally wanted from the 2.5.x tree when it was opened. I agree with you 
>>there. But I don't think they are major-number-material.
> 
> 
> I'd definitely have voted for stable IPV6 being a 3.0.x requirement, but I guess it's a bit late now :-/

The USAGI guys have just started sending patches in, so there is already 
progress on this front.  And remember that stabilizing and bug fixing 
can continue after Oct 31st... that's just the feature freeze date.


>>Anyway, people who are having VM trouble with the current 2.5.x series, 
>>please _complain_, and tell what your workload is. Don't sit silent and 
>>make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
>>thing.
> 
> 
> I think the broken IDE in 2.5.x has meant that it got seriously less testing overall than previous development trees :-(.


I think this is true, but hopefully recent progress on all fronts will 
start encouraging testers to jump back in...   I have not seen any 
IDE-related corruption reports lately [but then maybe I missed them...]

BTW you should fix your word wrap :)

	Jeff




^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  7:16                   ` jbradford
  2002-09-29  8:08                     ` Jeff Garzik
@ 2002-09-29  8:17                     ` David S. Miller
  2002-09-29  9:12                     ` Jens Axboe
  2002-09-29 15:34                     ` Andi Kleen
  3 siblings, 0 replies; 206+ messages in thread
From: David S. Miller @ 2002-09-29  8:17 UTC (permalink / raw)
  To: jbradford
  Cc: torvalds, jdickens, mingo, jgarzik, kessler, alan, linux-kernel,
	saw, rusty, richardj_moore, andre

   From: jbradford@dial.pipex.com
   Date: Sun, 29 Sep 2002 08:16:23 +0100 (BST)

   I'd definitely have voted for stable IPV6 being a 3.0.x
   requirement, but I guess it's a bit late now :-/

Not at all, the goal is to get a full USAGI merge at a minimum
by the end of October.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  7:16                   ` jbradford
  2002-09-29  8:08                     ` Jeff Garzik
  2002-09-29  8:17                     ` David S. Miller
@ 2002-09-29  9:12                     ` Jens Axboe
  2002-09-29 11:19                       ` Murray J. Root
                                         ` (2 more replies)
  2002-09-29 15:34                     ` Andi Kleen
  3 siblings, 3 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-29  9:12 UTC (permalink / raw)
  To: jbradford
  Cc: Linus Torvalds, jdickens, mingo, jgarzik, kessler, alan,
	linux-kernel, saw, rusty, richardj_moore, andre

On Sun, Sep 29 2002, jbradford@dial.pipex.com wrote:
> > Anyway, people who are having VM trouble with the current 2.5.x series, 
> > please _complain_, and tell what your workload is. Don't sit silent and 
> > make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> > thing.
> 
> I think the broken IDE in 2.5.x has meant that it got seriously less
> testing overall than previous development trees :-(.  Maybe after
> halloween when it stabilises a bit more we'll get more reports in.

2.5 is definitely desktop stable, so please test it if you can. Until
recently there was a personal show stopper for me, the tasklist
deadline. Now 2.5 is happily running on my desktop as well.

2.5 IDE stability should be just as good as 2.4-ac.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
  2002-09-29  6:14                   ` james
  2002-09-29  7:16                   ` jbradford
@ 2002-09-29  9:15                   ` Jens Axboe
  2002-09-29 19:53                     ` james
  2002-09-29 15:26                   ` Matthias Andree
  2002-09-30 18:37                   ` Bill Davidsen
  4 siblings, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-09-29  9:15 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Jeff Garzik, Larry Kessler, Alan Cox,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Sat, Sep 28 2002, Linus Torvalds wrote:
> 
> On Sat, 28 Sep 2002, Ingo Molnar wrote:
> > 
> > i consider the VM and IO improvements one of the most important things
> > that happened in the past 5 years - and it's definitely something that
> > users will notice. Finally we have a top-notch VM and IO subsystem (in
> > addition to the already world-class networking subsystem) giving
> > significant improvements both on the desktop and the server - the jump
> > from 2.4 to 2.5 is much larger than from eg. 2.0 to 2.4.
> 
> Hey, _if_ people actually are universally happy with the VM in the current
> 2.5.x tree, I'll happily call the dang thing 5.0 or whatever (just
> kidding, but yeah, that would be a good enough reason to bump the major
> number).

Works For Me, at _least_ as well as 2.4.20-pre kernels. On my desktop
machine it feels better. After a few days of uptime it's fairly easy to
feel how well a kernel performs for that workload. And 2.5.39 is just
smoother than current 2.4.

> The block IO cleanups are important, and that was the major thing _I_ 
> personally wanted from the 2.5.x tree when it was opened. I agree with you 
> there. But I don't think they are major-number-material.

Dang :-)

--
Jens Axboe, rooting for 3.x


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  9:12                     ` Jens Axboe
@ 2002-09-29 11:19                       ` Murray J. Root
  2002-09-29 15:50                         ` Jens Axboe
  2002-09-29 16:04                         ` Zwane Mwaikambo
  2002-09-29 14:56                       ` Alan Cox
  2002-09-30 19:32                       ` Bill Davidsen
  2 siblings, 2 replies; 206+ messages in thread
From: Murray J. Root @ 2002-09-29 11:19 UTC (permalink / raw)
  To: linux-kernel

On Sun, Sep 29, 2002 at 11:12:29AM +0200, Jens Axboe wrote:
> On Sun, Sep 29 2002, jbradford@dial.pipex.com wrote:
> > > Anyway, people who are having VM trouble with the current 2.5.x series, 
> > > please _complain_, and tell what your workload is. Don't sit silent and 
> > > make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> > > thing.
> > 
> > I think the broken IDE in 2.5.x has meant that it got seriously less
> > testing overall than previous development trees :-(.  Maybe after
> > halloween when it stabilises a bit more we'll get more reports in.
> 
> 2.5 is definitely desktop stable, so please test it if you can. Until
> recently there was a personal show stopper for me, the tasklist
> deadline. Now 2.5 is happily running on my desktop as well.
> 
> 2.5 IDE stability should be just as good as 2.4-ac.
> 
Hmm - our definitions must be different.

ASUS P4S533 (SiS645DX chipset)
P4 2Ghz
1G PC2700 RAM

Disable SMP, enable APIC & IO APIC
Get "WARNING - Unexpected IO APIC found"
system freezes

Disable IO APIC, enable ACPI
system detects ACPI, builds table, freezes.

Disable ACPI, enable ide-scsi in the kernel
kernel panic analyzing hdc

None of these have been reported because I haven't had time to do all the
work involved in making a report that anyone on the team will read.

-- 
Murray J. Root
------------------------------------------------
DISCLAIMER: http://www.goldmark.org/jeff/stupid-disclaimers/
------------------------------------------------
Mandrake on irc.openprojects.net:
  #mandrake & #mandrake-linux = help for newbies 
  #mdk-cooker = Mandrake Cooker 


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  6:14                   ` james
  2002-09-29  6:55                     ` Andre Hedrick
@ 2002-09-29 12:59                     ` Gerhard Mack
  2002-09-29 13:46                       ` Dr. David Alan Gilbert
  2002-09-29 17:06                       ` Jochen Friedrich
  2002-09-29 15:18                     ` Trever L. Adams
                                       ` (2 subsequent siblings)
  4 siblings, 2 replies; 206+ messages in thread
From: Gerhard Mack @ 2002-09-29 12:59 UTC (permalink / raw)
  To: james
  Cc: Linus Torvalds, Ingo Molnar, Jeff Garzik, Larry Kessler,
	Alan Cox, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore

nOn Sun, 29 Sep 2002, james wrote:

> How many people are sitting on the sidelines waiting for guarantee that ide is
> not going to blow up on our filesystems and take our data with it. Guarantee
> that ide is working and not dangerous to our data, then I bet a lot more
> people will come back and bang on 2.5.
> James

Some of us are waiting until it actually compiles for us ;) (see previous
bug report)

	Gerhard

--
Gerhard Mack

gmack@innerfire.net

<>< As a computer I find your faith in technology amusing.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 12:59                     ` Gerhard Mack
@ 2002-09-29 13:46                       ` Dr. David Alan Gilbert
  2002-09-29 14:06                         ` Wakko Warner
  2002-09-29 15:42                         ` Jens Axboe
  2002-09-29 17:06                       ` Jochen Friedrich
  1 sibling, 2 replies; 206+ messages in thread
From: Dr. David Alan Gilbert @ 2002-09-29 13:46 UTC (permalink / raw)
  To: linux-kernel mailing list


In my case I gave 2.5.x an attempt at building on my x86 box a few weeks
ago but had to give up because of the lack of LVM which I rely on.

I fancy having a go on some of my non-x86 boxen; does anyone know the
state of 2.5.x for non-x86?

(Does anyone other than some marketing bods really care if it is 2.6 or
3.0 - I definitly don't).

Dave
 ---------------- Have a happy GNU millennium! ----------------------   
/ Dr. David Alan Gilbert    | Running GNU/Linux on Alpha,68K| Happy  \ 
\ gro.gilbert @ treblig.org | MIPS,x86,ARM, SPARC and HP-PA | In Hex /
 \ _________________________|_____ http://www.treblig.org   |_______/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 13:46                       ` Dr. David Alan Gilbert
@ 2002-09-29 14:06                         ` Wakko Warner
  2002-09-29 15:42                         ` Jens Axboe
  1 sibling, 0 replies; 206+ messages in thread
From: Wakko Warner @ 2002-09-29 14:06 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: linux-kernel mailing list

> In my case I gave 2.5.x an attempt at building on my x86 box a few weeks
> ago but had to give up because of the lack of LVM which I rely on.
> 
> I fancy having a go on some of my non-x86 boxen; does anyone know the
> state of 2.5.x for non-x86?
> 
> (Does anyone other than some marketing bods really care if it is 2.6 or
> 3.0 - I definitly don't).

I thought 2.4 should be 3.0 since 1.3 went to 2.0 =)

-- 
 Lab tests show that use of micro$oft causes cancer in lab animals

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  9:12                     ` Jens Axboe
  2002-09-29 11:19                       ` Murray J. Root
@ 2002-09-29 14:56                       ` Alan Cox
  2002-09-29 15:38                         ` Jens Axboe
  2002-09-29 17:48                         ` Linus Torvalds
  2002-09-30 19:32                       ` Bill Davidsen
  2 siblings, 2 replies; 206+ messages in thread
From: Alan Cox @ 2002-09-29 14:56 UTC (permalink / raw)
  To: Jens Axboe
  Cc: jbradford, Linus Torvalds, jdickens, mingo, jgarzik, kessler,
	linux-kernel, saw, rusty, richardj_moore, andre

On Sun, 2002-09-29 at 10:12, Jens Axboe wrote:
> 2.5 is definitely desktop stable, so please test it if you can. Until
> recently there was a personal show stopper for me, the tasklist
> deadline. Now 2.5 is happily running on my desktop as well.

Its very hard to make that assessment when the audio layer still doesnt
work, most scsi drivers havent been ported, most other drivers are full
of 2.4 fixed problems and so on.

Most of my boxes won't even run a 2.5 tree yet. I'm sure its hardly
unique. Middle of November we may begin to find out how solid the core
code actually is, as drivers get fixed up and also in the other
direction as we eliminate numerous crashes caused by "fixed in 2.4" bugs


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  6:14                   ` james
  2002-09-29  6:55                     ` Andre Hedrick
  2002-09-29 12:59                     ` Gerhard Mack
@ 2002-09-29 15:18                     ` Trever L. Adams
  2002-09-29 15:45                       ` Jens Axboe
  2002-09-29 17:42                     ` Linus Torvalds
  2002-09-30 16:47                     ` Pau Aliagas
  4 siblings, 1 reply; 206+ messages in thread
From: Trever L. Adams @ 2002-09-29 15:18 UTC (permalink / raw)
  To: james; +Cc: Linux Kernel Mailing List

On Sun, 2002-09-29 at 02:14, james wrote:
> How many people are sitting on the sidelines waiting for guarantee that ide is 
> not going to blow up on our filesystems and take our data with it. Guarantee 
> that ide is working and not dangerous to our data, then I bet a lot more 
> people will come back and bang on 2.5. 

I can tell you right now that I am one of these.  I usually would have
been involved in testing it for my situations/needs several months ago,
but I have been very leary of the IDE and block changes.  I have one
machine (a router) that I could test it on if I knew that the dangers of
IDE and block were at least low and that the IPv4 and associated
networking connection tracking and NAT stuff worked.

Trever


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
                                     ` (2 preceding siblings ...)
  2002-09-29  9:15                   ` v2.6 vs v3.0 Jens Axboe
@ 2002-09-29 15:26                   ` Matthias Andree
  2002-09-29 16:24                     ` Alan Cox
  2002-09-30 18:37                   ` Bill Davidsen
  4 siblings, 1 reply; 206+ messages in thread
From: Matthias Andree @ 2002-09-29 15:26 UTC (permalink / raw)
  To: linux-kernel mailing list

On Sat, 28 Sep 2002, Linus Torvalds wrote:

> Am I hapyy with current 2.5.x?  Sure. Are others? Apparently. But does 
> that mean that we have a top-notch VM and we should bump the major number? 
> I wish.
> 
> The block IO cleanups are important, and that was the major thing _I_ 
> personally wanted from the 2.5.x tree when it was opened. I agree with you 
> there. But I don't think they are major-number-material.
> 
> Anyway, people who are having VM trouble with the current 2.5.x series, 
> please _complain_, and tell what your workload is. Don't sit silent and 
> make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> thing.

I personally have the feeling that 2.2.x performed better than 2.4.x
does, but I cannot go figure because I'm using ReiserFS 3.6 file
systems. I'd also really like to give Linux 2.5.39 or whatever is
current a whirl, but I'm currently using LVM and I'd need anything to
read that. Which one (EVMS or LVM2) is an ignorant-proof install and
reliable enough to read old LVM1 partitions and volumes?

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  7:16                   ` jbradford
                                       ` (2 preceding siblings ...)
  2002-09-29  9:12                     ` Jens Axboe
@ 2002-09-29 15:34                     ` Andi Kleen
  2002-09-29 17:26                       ` Jochen Friedrich
  3 siblings, 1 reply; 206+ messages in thread
From: Andi Kleen @ 2002-09-29 15:34 UTC (permalink / raw)
  To: jbradford; +Cc: linux-kernel

jbradford@dial.pipex.com writes:

> > The block IO cleanups are important, and that was the major thing _I_ 
> > personally wanted from the 2.5.x tree when it was opened. I agree with you 
> > there. But I don't think they are major-number-material.
> 
> I'd definitely have voted for stable IPV6 being a 3.0.x requirement, but I guess it's a bit late now :-/

Actually current IPv6 is stable and has been for a long time, it's just not 
completely standards compliant (but still quite usable for a lot of people)

If you mean stable implies the latest whizbang features you have a different
meaning of stable than me.

-Andi

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 14:56                       ` Alan Cox
@ 2002-09-29 15:38                         ` Jens Axboe
  2002-09-29 16:30                           ` Dave Jones
                                             ` (5 more replies)
  2002-09-29 17:48                         ` Linus Torvalds
  1 sibling, 6 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 15:38 UTC (permalink / raw)
  To: Alan Cox
  Cc: jbradford, Linus Torvalds, jdickens, mingo, jgarzik, kessler,
	linux-kernel, saw, rusty, richardj_moore, andre

On Sun, Sep 29 2002, Alan Cox wrote:
> On Sun, 2002-09-29 at 10:12, Jens Axboe wrote:
> > 2.5 is definitely desktop stable, so please test it if you can. Until
> > recently there was a personal show stopper for me, the tasklist
> > deadlock. Now 2.5 is happily running on my desktop as well.
> 
> Its very hard to make that assessment when the audio layer still doesnt
> work, most scsi drivers havent been ported, most other drivers are full
> of 2.4 fixed problems and so on.

I can only talk for myself, 2.5 works fine here on my boxes. Dunno what
you mean about audio layer, emu10k works for me.

SCSI drivers can be a real problem. Not the porting of them, most of
that is _trivial_ and can be done as we enter 3.0-pre and people show up
running that on hardware that actually needs to be ported. The worst bit
is error handling, this I view as the only problem.

Update of drivers to 2.4 level is mainly a matter of Dave (or someone
else) resyncing his -dj tree and feeding it back to Linus.

> Most of my boxes won't even run a 2.5 tree yet. I'm sure its hardly
> unique. Middle of November we may begin to find out how solid the core
> code actually is, as drivers get fixed up and also in the other
> direction as we eliminate numerous crashes caused by "fixed in 2.4" bugs

Well why don't they run with 2.5?

Alan, I think you are a pessimist painting a much bleaker picture of 2.5
than it deserves. Sure lots of drivers may be broken still, I would be
naive if I thought that this is all changed in time for oct 31. Most of
these will not be fixed until people actually _use_ 2.5 (or 3.0-pre, or
whatever it will be called), and that will not happen until Linus
actually releases a -rc or similar. And so the fsck what? Noone expects
2.6-pre/3.0-pre to be perfect.

I'm not worried.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 13:46                       ` Dr. David Alan Gilbert
  2002-09-29 14:06                         ` Wakko Warner
@ 2002-09-29 15:42                         ` Jens Axboe
  2002-09-29 16:21                           ` Alan Cox
  2002-09-29 16:22                           ` Dave Jones
  1 sibling, 2 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 15:42 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: linux-kernel mailing list, Linus Torvalds

On Sun, Sep 29 2002, Dr. David Alan Gilbert wrote:
> 
> In my case I gave 2.5.x an attempt at building on my x86 box a few weeks
> ago but had to give up because of the lack of LVM which I rely on.

This is a good point. Noone has cared enough about LVM to work on it,
looking at the code in the kernel I cannot blame them. Sistina have
abandoned 2.5 LVM.

Has anyone actually sent patches to Linus removing LVM completely from
2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
exactly that long ago. Linus, what's your oppinion on this?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:18                     ` Trever L. Adams
@ 2002-09-29 15:45                       ` Jens Axboe
  2002-09-29 15:59                         ` Trever L. Adams
  0 siblings, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 15:45 UTC (permalink / raw)
  To: Trever L. Adams; +Cc: james, Linux Kernel Mailing List

On Sun, Sep 29 2002, Trever L. Adams wrote:
> On Sun, 2002-09-29 at 02:14, james wrote:
> > How many people are sitting on the sidelines waiting for guarantee
> > that ide is not going to blow up on our filesystems and take our
> > data with it. Guarantee that ide is working and not dangerous to our
> > data, then I bet a lot more people will come back and bang on 2.5. 
> 
> I can tell you right now that I am one of these.  I usually would have
> been involved in testing it for my situations/needs several months
> ago, but I have been very leary of the IDE and block changes.  I have
> one machine (a router) that I could test it on if I knew that the
> dangers of IDE and block were at least low and that the IPv4 and
> associated networking connection tracking and NAT stuff worked.

How many accounts of the new block layer corrupting data have you been
aware of? Since 2.5.1-preX when bio was introduced, I know of one such
bug: floppy, due to the partial completion changes. Hardly critical.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 11:19                       ` Murray J. Root
@ 2002-09-29 15:50                         ` Jens Axboe
  2002-09-30  7:01                           ` Kai Henningsen
  2002-09-29 16:04                         ` Zwane Mwaikambo
  1 sibling, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 15:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: murrayr

On Sun, Sep 29 2002, Murray J. Root wrote:
> On Sun, Sep 29, 2002 at 11:12:29AM +0200, Jens Axboe wrote:
> > On Sun, Sep 29 2002, jbradford@dial.pipex.com wrote:
> > > > Anyway, people who are having VM trouble with the current 2.5.x series, 
> > > > please _complain_, and tell what your workload is. Don't sit silent and 
> > > > make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> > > > thing.
> > > 
> > > I think the broken IDE in 2.5.x has meant that it got seriously less
> > > testing overall than previous development trees :-(.  Maybe after
> > > halloween when it stabilises a bit more we'll get more reports in.
> > 
> > 2.5 is definitely desktop stable, so please test it if you can. Until
> > recently there was a personal show stopper for me, the tasklist
> > deadline. Now 2.5 is happily running on my desktop as well.
> > 
> > 2.5 IDE stability should be just as good as 2.4-ac.
> > 
> Hmm - our definitions must be different.

Not necessarily, you may just have worse luck than me.

> ASUS P4S533 (SiS645DX chipset)
> P4 2Ghz
> 1G PC2700 RAM
> 
> Disable SMP, enable APIC & IO APIC
> Get "WARNING - Unexpected IO APIC found"
> system freezes
> 
> Disable IO APIC, enable ACPI
> system detects ACPI, builds table, freezes.
> 
> Disable ACPI, enable ide-scsi in the kernel
> kernel panic analyzing hdc
> 
> None of these have been reported because I haven't had time to do all the
> work involved in making a report that anyone on the team will read.

But you have time to write this email and complain that it doesn't work?
-> /dev/null, until you send proper reports.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:45                       ` Jens Axboe
@ 2002-09-29 15:59                         ` Trever L. Adams
  2002-09-29 16:06                           ` Jens Axboe
  0 siblings, 1 reply; 206+ messages in thread
From: Trever L. Adams @ 2002-09-29 15:59 UTC (permalink / raw)
  To: Jens Axboe; +Cc: james, Linux Kernel Mailing List

On Sun, 2002-09-29 at 11:45, Jens Axboe wrote:
> How many accounts of the new block layer corrupting data have you been
> aware of? Since 2.5.1-preX when bio was introduced, I know of one such
> bug: floppy, due to the partial completion changes. Hardly critical.
> 
> -- 
> Jens Axboe

Sorry Jens, I never meant to imply I had heard of any since that floppy
bug.  I just understand there were some problems at the beginning. 
Also, I haven't been able to follow LKM as well as I would have liked
lately, but a few months ago, in one of the many IDE bash sessions that
have happened in 2.5.x I read a few people blaiming some of the problems
on interactions between the new block layer and the IDE layer.

Sorry about the worries.  I am just trying to be cautious.  I am
guessing you are saying that the block layer is now solid?   If this is
the case, it sure knocks a few of my worries out of the ball park and I
will be that much closer to trying out 2.5.x myself.

Trever ADams


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 11:19                       ` Murray J. Root
  2002-09-29 15:50                         ` Jens Axboe
@ 2002-09-29 16:04                         ` Zwane Mwaikambo
  1 sibling, 0 replies; 206+ messages in thread
From: Zwane Mwaikambo @ 2002-09-29 16:04 UTC (permalink / raw)
  To: Murray J. Root; +Cc: linux-kernel

On Sun, 29 Sep 2002, Murray J. Root wrote:

> ASUS P4S533 (SiS645DX chipset)
> P4 2Ghz
> 1G PC2700 RAM
> 
> Disable SMP, enable APIC & IO APIC
> Get "WARNING - Unexpected IO APIC found"
> system freezes

Send the subsequent messages (iirc it prints some verbose info about the 
IOAPIC in question).

> Disable IO APIC, enable ACPI
> system detects ACPI, builds table, freezes.

Send messages, motherboard/chipset info..

> Disable ACPI, enable ide-scsi in the kernel
> kernel panic analyzing hdc

ditto.

> None of these have been reported because I haven't had time to do all the
> work involved in making a report that anyone on the team will read.

Shouldn't take too long, most time would be spent writing them down if you 
can't retrieve via serial console.

	Zwane
-- 
function.linuxpower.ca


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:59                         ` Trever L. Adams
@ 2002-09-29 16:06                           ` Jens Axboe
  2002-09-29 16:13                             ` Trever L. Adams
  0 siblings, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 16:06 UTC (permalink / raw)
  To: Trever L. Adams; +Cc: james, Linux Kernel Mailing List

On Sun, Sep 29 2002, Trever L. Adams wrote:
> On Sun, 2002-09-29 at 11:45, Jens Axboe wrote:
> > How many accounts of the new block layer corrupting data have you been
> > aware of? Since 2.5.1-preX when bio was introduced, I know of one such
> > bug: floppy, due to the partial completion changes. Hardly critical.
> > 
> > -- 
> > Jens Axboe
> 
> Sorry Jens, I never meant to imply I had heard of any since that floppy
> bug.  I just understand there were some problems at the beginning. 
> Also, I haven't been able to follow LKM as well as I would have liked
> lately, but a few months ago, in one of the many IDE bash sessions that
> have happened in 2.5.x I read a few people blaiming some of the problems
> on interactions between the new block layer and the IDE layer.

No worries. I can understand how people would be weary of block layer
changes, as they have the potential to corrupt your data.

> Sorry about the worries.  I am just trying to be cautious.  I am
> guessing you are saying that the block layer is now solid?   If this is

Nah I'm saying that it's always been solid. Why would I suddenly
destabilize it now? :-)

> the case, it sure knocks a few of my worries out of the ball park and I
> will be that much closer to trying out 2.5.x myself.

As always, it's untested territory so a backup may be in order. But I
don't view testing 2.5 as any more dangerous as testing 2.4-ac.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:06                           ` Jens Axboe
@ 2002-09-29 16:13                             ` Trever L. Adams
  2002-09-30  6:54                               ` Kai Henningsen
  0 siblings, 1 reply; 206+ messages in thread
From: Trever L. Adams @ 2002-09-29 16:13 UTC (permalink / raw)
  To: Jens Axboe; +Cc: james, Linux Kernel Mailing List

On Sun, 2002-09-29 at 12:06, Jens Axboe wrote:
> Nah I'm saying that it's always been solid. Why would I suddenly
> destabilize it now? :-)
> 

Close enough.  Thank you.

> > the case, it sure knocks a few of my worries out of the ball park and I
> > will be that much closer to trying out 2.5.x myself.
> 
> As always, it's untested territory so a backup may be in order. But I
> don't view testing 2.5 as any more dangerous as testing 2.4-ac.
> 
> -- 
> Jens Axboe

I used to religiously test out ac kernels (in the 2.2, 2.3.x and early
2.4.x days).  I don't anymore, so the comparison may not be valid here. 
Anyway, I will try to either test 2.5.x on my router or else find a box
I can play with that doesnt' have so much important data on it.  (I hate
to say it, but I haven't been able to afford, $$ wise, backup for a few
years... I know... I can't afford not to either).

Trever


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:21                           ` Alan Cox
@ 2002-09-29 16:17                             ` Jens Axboe
  2002-09-30  0:39                             ` Jeff Chua
  1 sibling, 0 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 16:17 UTC (permalink / raw)
  To: Alan Cox
  Cc: Dr. David Alan Gilbert, linux-kernel mailing list, Linus Torvalds

On Sun, Sep 29 2002, Alan Cox wrote:
> On Sun, 2002-09-29 at 16:42, Jens Axboe wrote:
> > Has anyone actually sent patches to Linus removing LVM completely from
> > 2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
> > exactly that long ago. Linus, what's your oppinion on this?
> 
> I added LVM2 a while ago for my 2.4-ac tree and haven't looked back, its
> much nicer code and its clean and easy to understand. I wouldnt
> guarantee its bug free but its the kind of code where you can *find* a
> bug if one turns up

As far as I'm concerned that settles it for me. I'll check up on 2.5
lvm2 status tomorrow.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:42                         ` Jens Axboe
@ 2002-09-29 16:21                           ` Alan Cox
  2002-09-29 16:17                             ` Jens Axboe
  2002-09-30  0:39                             ` Jeff Chua
  2002-09-29 16:22                           ` Dave Jones
  1 sibling, 2 replies; 206+ messages in thread
From: Alan Cox @ 2002-09-29 16:21 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Dr. David Alan Gilbert, linux-kernel mailing list, Linus Torvalds

On Sun, 2002-09-29 at 16:42, Jens Axboe wrote:
> Has anyone actually sent patches to Linus removing LVM completely from
> 2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
> exactly that long ago. Linus, what's your oppinion on this?

I added LVM2 a while ago for my 2.4-ac tree and haven't looked back, its
much nicer code and its clean and easy to understand. I wouldnt
guarantee its bug free but its the kind of code where you can *find* a
bug if one turns up


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:42                         ` Jens Axboe
  2002-09-29 16:21                           ` Alan Cox
@ 2002-09-29 16:22                           ` Dave Jones
  2002-09-29 16:26                             ` Jens Axboe
  2002-09-29 21:46                             ` Matthias Andree
  1 sibling, 2 replies; 206+ messages in thread
From: Dave Jones @ 2002-09-29 16:22 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Dr. David Alan Gilbert, linux-kernel mailing list, Linus Torvalds

On Sun, Sep 29, 2002 at 05:42:54PM +0200, Jens Axboe wrote:

 > Has anyone actually sent patches to Linus removing LVM completely from
 > 2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
 > exactly that long ago. Linus, what's your oppinion on this?

Joe Thornber sent a patch removing LVM1, but LVM2 has yet to
make an appearance in 2.5.x patchform afair.  LVM is in one of
those sneaky positions where they could theoretically cheat
the feature freeze, as whats in the tree right now is fubar,
and we need /something/ before going 2.6/3.0.

It'd be nice to get /something/ in before the feature freeze so
people can bang on this after halloween when we ramp up stability
testing instead of waiting until the last minute.

There are some patches in -dj which make the existing LVM1 code
compile and 'sort of' work, but they're not fit for inclusion imo.

		Dave

-- 
| Dave Jones.        http://www.codemonkey.org.uk
| SuSE Labs

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:26                   ` Matthias Andree
@ 2002-09-29 16:24                     ` Alan Cox
  2002-09-29 22:00                       ` Matthias Andree
  2002-09-30 19:02                       ` Bill Davidsen
  0 siblings, 2 replies; 206+ messages in thread
From: Alan Cox @ 2002-09-29 16:24 UTC (permalink / raw)
  To: Matthias Andree; +Cc: linux-kernel mailing list

On Sun, 2002-09-29 at 16:26, Matthias Andree wrote:
> I personally have the feeling that 2.2.x performed better than 2.4.x
> does, but I cannot go figure because I'm using ReiserFS 3.6 file

On low end boxes the benchmarks I did show later 2.4-rmap beats 2.2. 2.0
worked suprisingly well (better than pre-rmap 2.4) and as Stephen
claimed the best code was about 2.1.100, 2.2 then dropped badly from
that point.

Low memory is of course where rmap does best, so the 2.4-rmap v 2.4
parts of such testing are not actually that useful



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:22                           ` Dave Jones
@ 2002-09-29 16:26                             ` Jens Axboe
  2002-09-29 21:46                             ` Matthias Andree
  1 sibling, 0 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-29 16:26 UTC (permalink / raw)
  To: Dave Jones, Dr. David Alan Gilbert, linux-kernel mailing list,
	Linus Torvalds

On Sun, Sep 29 2002, Dave Jones wrote:
> On Sun, Sep 29, 2002 at 05:42:54PM +0200, Jens Axboe wrote:
> 
>  > Has anyone actually sent patches to Linus removing LVM completely from
>  > 2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
>  > exactly that long ago. Linus, what's your oppinion on this?
> 
> Joe Thornber sent a patch removing LVM1, but LVM2 has yet to
> make an appearance in 2.5.x patchform afair.  LVM is in one of
> those sneaky positions where they could theoretically cheat
> the feature freeze, as whats in the tree right now is fubar,
> and we need /something/ before going 2.6/3.0.

Indeed. Joe, what's the status on dm2 for 2.5? I seem to recall seeing
patches for 2.5, maybe even as long as 6 months ago.

> It'd be nice to get /something/ in before the feature freeze so
> people can bang on this after halloween when we ramp up stability
> testing instead of waiting until the last minute.

Yep, as far as I'm concerned, if a 2.5 dm2 is in decent shape then I'd
glady kill lvm1 immediately.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
@ 2002-09-29 16:30                           ` Dave Jones
  2002-09-29 16:42                           ` Bjoern A. Zeeb
                                             ` (4 subsequent siblings)
  5 siblings, 0 replies; 206+ messages in thread
From: Dave Jones @ 2002-09-29 16:30 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Alan Cox, jbradford, Linus Torvalds, jdickens, mingo, jgarzik,
	kessler, linux-kernel, saw, rusty, richardj_moore, andre

On Sun, Sep 29, 2002 at 05:38:17PM +0200, Jens Axboe wrote:

 > Update of drivers to 2.4 level is mainly a matter of Dave (or someone
 > else) resyncing his -dj tree and feeding it back to Linus.

Theres still boatloads of bits in my tree (around 4MB worth),
last night I spent some time banging on it trying to get things
into a usable, testable state again. The fact it doesn't boot
on my testboxes right now is somewhat limiting, as is being
buried alive in non-2.5 work.
 
 > > Most of my boxes won't even run a 2.5 tree yet. I'm sure its hardly
 > > unique. Middle of November we may begin to find out how solid the core
 > > code actually is, as drivers get fixed up and also in the other
 > > direction as we eliminate numerous crashes caused by "fixed in 2.4" bugs
 > Well why don't they run with 2.5?

Probably numerous reasons (as me). My laptop hangs on boot (no idea why),
my VIA C3 box dies with preemption, some other boxes are still unusable
due to broken SCSI drivers afair.

 > Alan, I think you are a pessimist painting a much bleaker picture of 2.5
 > than it deserves. Sure lots of drivers may be broken still, I would be
 > naive if I thought that this is all changed in time for oct 31.

There's mountains of silly one liner fixes for various problems
(from compile fixes to stability to security issues) in my tree
that need pushing to Linus, the hard part right now is finding
time to do so, but lots of it can even wait until after the feature freeze.
What's important right now is getting everything in that we *need*
included, (biggest absense imo is probably a replacement LVM right now)
 
 > Most of
 > these will not be fixed until people actually _use_ 2.5 (or 3.0-pre, or
 > whatever it will be called), and that will not happen until Linus
 > actually releases a -rc or similar. And so the fsck what? Noone expects
 > 2.6-pre/3.0-pre to be perfect.

*nods*, and with the addition of the various debugging aids that have
popped up in the last week or so, I've no doubt we're on track to nail
down a lot more hard-to-find bugs than we ever have been before long
before hitting a x.x.0 release

		Dave

-- 
| Dave Jones.        http://www.codemonkey.org.uk
| SuSE Labs

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
  2002-09-29 16:30                           ` Dave Jones
@ 2002-09-29 16:42                           ` Bjoern A. Zeeb
  2002-09-29 21:16                           ` Russell King
                                             ` (3 subsequent siblings)
  5 siblings, 0 replies; 206+ messages in thread
From: Bjoern A. Zeeb @ 2002-09-29 16:42 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Alan Cox, linux-kernel, andre

On Sun, 29 Sep 2002, Jens Axboe wrote:

Hi,

> On Sun, Sep 29 2002, Alan Cox wrote:
> > On Sun, 2002-09-29 at 10:12, Jens Axboe wrote:
> > > 2.5 is definitely desktop stable, so please test it if you can. Until
> > > recently there was a personal show stopper for me, the tasklist
> > > deadlock. Now 2.5 is happily running on my desktop as well.
> >
> > Its very hard to make that assessment when the audio layer still doesnt
> > work, most scsi drivers havent been ported, most other drivers are full
> > of 2.4 fixed problems and so on.
>
> I can only talk for myself, 2.5 works fine here on my boxes. Dunno what
> you mean about audio layer, emu10k works for me.
>
> SCSI drivers can be a real problem. Not the porting of them, most of
[snip]

simply replying to one of you all ...

Most important problem I currently see is that one of two kernels
do not boot on my MP machine I use as a workstation.

Apart from that and after early 2.5.3x probs were sorted out
I already had 2.5-bk-kernels running and did the following on that
MP machine:

- compiled linux-2.5-bks
- compiled X (runs with multi head)
- listend to music (emu10k)
- watched TV (bttv)
- burned CDs (SCSI)
- ran amanda: dumped multiple input streams from network to IDE disks
  before writing to SCSI tape
- ran vmware (after patchwork to compile ;-)
- started looking at sym53c416 cli() removal and had the scanner
  doing his work (started to debug some pnp things there too, results
  to be posted)
- changed to devfs
- printing and serial are fine too
- the new input stuff now behaves properly too

often did multiple things in parallel (watching tv while compiling
a new kernel, ...)

had really few crashes (~4-6 since 2.5.34)
had some compilation probs with modules and MP but they got either
fixed too fast or patches went into bk within 1-2 days :-)

Going to check JFS (and XFS) in the near future...

So I think I am either one almost happy person with a lotta luck or
you all (did) do a very excellent job!!! ... but please get those
MP (boot) probs sorted out ;-)

Before you start asking what probs: this time it's around ACPI init.

--- snipp ---
PCI: PCI BIOS revision 2.10 entry at 0xfdb91, last bus=1
PCI: Using configuration type 1
ACPI: Subsystem revision 20020918
 tbxface-0099 [03] Acpi_load_tables      : ACPI Tables successfully loaded
Parsing Methods:......................................................................................................
Table [DSDT] - 309 Objects with 22 Devices 102 Methods 19 Regions
ACPI Namespace successfully loaded at root c03a741c
--- dead end where no keyboard or serial console sysreqs are answered ---


so it must be around ... and I assume it's mp_config_ioapic_for_sci()
but still have to trace ...

--- drivers/acpi/bus.c:606 ---
        /*
         * Get a separate copy of the FADT for use by other drivers.
         */
        status = acpi_get_table(ACPI_TABLE_FADT, 1, &buffer);
        if (ACPI_FAILURE(status)) {
                printk(KERN_ERR PREFIX "Unable to get the FADT\n");
                goto error1;
        }

#ifdef CONFIG_X86
        /* Ensure the SCI is set to level-triggered, active-low */
        if (acpi_ioapic)
                mp_config_ioapic_for_sci(acpi_fadt.sci_int);
        else
                eisa_set_level_irq(acpi_fadt.sci_int);
#endif

        status = acpi_enable_subsystem(ACPI_FULL_INITIALIZATION);
        if (ACPI_FAILURE(status)) {
                printk(KERN_ERR PREFIX "Unable to start the ACPI Interpreter\n");
                goto error1;
        }
--- end ---

-- 
Greetings

Bjoern A. Zeeb				bzeeb at Zabbadoz dot NeT
56 69 73 69 74				http://www.zabbadoz.net/


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 12:59                     ` Gerhard Mack
  2002-09-29 13:46                       ` Dr. David Alan Gilbert
@ 2002-09-29 17:06                       ` Jochen Friedrich
  1 sibling, 0 replies; 206+ messages in thread
From: Jochen Friedrich @ 2002-09-29 17:06 UTC (permalink / raw)
  To: Gerhard Mack
  Cc: james, Linus Torvalds, Ingo Molnar, Jeff Garzik, Larry Kessler,
	Alan Cox, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore

Hi Gerhard,

> Some of us are waiting until it actually compiles for us ;) (see previous
> bug report)

Ack (on Alpha), and waiting that after compiling, it also boots :-)

My Avanti (currently running 2.5.18):

cat /proc/cpuinfo
cpu                     : Alpha
cpu model               : EV4
cpu variation           : 0
cpu revision            : 0
cpu serial number       : Linux_is_Great!
system type             : Avanti
system variation        : 0
system revision         : 0
system serial number    : MILO-2.2-18
cycle frequency [Hz]    : 166521620
timer frequency [Hz]    : 1024.00
page size [bytes]       : 8192
phys. address bits      : 34
max. addr. space #      : 63
BogoMIPS                : 326.08
kernel unaligned acc    : 7671003
(pc=fffffc0000954730,va=fffffc00052da056)
user unaligned acc      : 252 (pc=120011758,va=12006c7e4)
platform string         : N/A
cpus detected           : 0

with

CONFIG_FB_ATY=y
CONFIG_FB_ATY_GX=y
CONFIG_FB_ATY_CT=y

i just get a black screen with a wild jumping cursor and than a hang. With
"normal" console, the boot dies with an zero-pointer exception.

I'll try to compile 2.5.39 and send more details about the compile
failures and boot exceptions...

--jochen


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:34                     ` Andi Kleen
@ 2002-09-29 17:26                       ` Jochen Friedrich
  2002-09-29 17:35                         ` Jeff Garzik
                                           ` (2 more replies)
  0 siblings, 3 replies; 206+ messages in thread
From: Jochen Friedrich @ 2002-09-29 17:26 UTC (permalink / raw)
  To: Andi Kleen; +Cc: jbradford, linux-kernel, debian-ipv6

Hi Andi,

> Actually current IPv6 is stable and has been for a long time, it's just not
> completely standards compliant (but still quite usable for a lot of people)

For end systems (no router) with static IPv6 definitions this seems to be
true. However, for machines which use autoconfiguration (stateless as
there isn't a usable IPv6 capable DHCP server AFAIK) or act as routers,
the current state of the implementation of the default route can best be
described as buggy. (Autoconfigured machines seem to loose their default
route after some time, e.g.).

Also, there could be a better communication between the kernel and the
resolver to check if if IPv6 is available, at all. Currently, on IPv4 only
kernels, we often see dialogs like this:

ssh -v mail.scram.de
OpenSSH_3.4p1 Debian 1:3.4p1-2.1, SSH protocols 1.5/2.0, OpenSSL
0x0090607f
debug1: Reading configuration data /etc/ssh/ssh_config
debug1: Rhosts Authentication disabled, originating port will not be
trusted.
debug1: ssh_connect: needpriv 0
debug1: Connecting to mail.scram.de [3ffe:400:470:1::1:1] port 22.
socket: Address family not supported by protocol
debug1: Connecting to mail.scram.de [195.226.127.117] port 22.
debug1: Connection established.

So IPv6 is returned by the resolver even though IPv6 isn't available in
the kernel. The default of the resolver options should be dependent
on the presence or absence of IPv6 in the currently running kernel IMHO.

Finally, IPv6 sockets which also communicate over IPv4 using mapped
addresses are considered bad nowadays ;-)

Cheers,
--jochen


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:26                       ` Jochen Friedrich
@ 2002-09-29 17:35                         ` Jeff Garzik
  2002-09-30  0:00                         ` Andi Kleen
  2002-10-01 19:28                         ` IPv6 stability (success story ;) Petr Baudis
  2 siblings, 0 replies; 206+ messages in thread
From: Jeff Garzik @ 2002-09-29 17:35 UTC (permalink / raw)
  To: Jochen Friedrich; +Cc: Andi Kleen, jbradford, linux-kernel, debian-ipv6

Jochen Friedrich wrote:
> So IPv6 is returned by the resolver even though IPv6 isn't available in
> the kernel. The default of the resolver options should be dependent
> on the presence or absence of IPv6 in the currently running kernel IMHO.


That sounds like glibc's problem...

glibc also has really stupid and annoying /etc/hosts behavior which 
needs fixing, and IIRC it is related to IPv6...

	Jeff




^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  6:14                   ` james
                                       ` (2 preceding siblings ...)
  2002-09-29 15:18                     ` Trever L. Adams
@ 2002-09-29 17:42                     ` Linus Torvalds
  2002-09-29 17:54                       ` Rik van Riel
                                         ` (2 more replies)
  2002-09-30 16:47                     ` Pau Aliagas
  4 siblings, 3 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-09-29 17:42 UTC (permalink / raw)
  To: james
  Cc: Ingo Molnar, Jeff Garzik, Larry Kessler, Alan Cox,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore


On Sun, 29 Sep 2002, james wrote:
>
> How many people are sitting on the sidelines waiting for guarantee that ide is 
> not going to blow up on our filesystems and take our data with it. Guarantee 
> that ide is working and not dangerous to our data, then I bet a lot more 
> people will come back and bang on 2.5. 

How the hell can I _guarantee_ anything like that?

I can say that the IDE code is the same code that is in 2.4.x, so if 
you're comfortable with 2.4.x wrt IDE, then you should be comfy with 
2.5.x too.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 14:56                       ` Alan Cox
  2002-09-29 15:38                         ` Jens Axboe
@ 2002-09-29 17:48                         ` Linus Torvalds
  2002-09-29 18:13                           ` Jaroslav Kysela
  1 sibling, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-09-29 17:48 UTC (permalink / raw)
  To: Alan Cox
  Cc: Jens Axboe, jbradford, jdickens, mingo, jgarzik, kessler,
	linux-kernel, saw, rusty, richardj_moore, andre


On 29 Sep 2002, Alan Cox wrote:
> 
> Its very hard to make that assessment when the audio layer still doesnt
> work,

Which reminds me: it would be good to have somebody try to merge stuff
from the ALSA tree.

ALSA never got out of their CVS mentality, and apparently nobody bothers 
to do incrementeal merges. Is anybody interested and listening?

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:42                     ` Linus Torvalds
@ 2002-09-29 17:54                       ` Rik van Riel
  2002-09-29 18:24                       ` Alan Cox
  2002-09-30 16:39                       ` jbradford
  2 siblings, 0 replies; 206+ messages in thread
From: Rik van Riel @ 2002-09-29 17:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: james, Ingo Molnar, Jeff Garzik, Larry Kessler, Alan Cox,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Sun, 29 Sep 2002, Linus Torvalds wrote:

> How the hell can I _guarantee_ anything like that?

"Quality IDE code, or your disk space back"

No wait, that didn't come out quite right...

Rik
-- 
Bravely reimplemented by the knights who say "NIH".

http://www.surriel.com/		http://distro.conectiva.com/

Spamtraps of the month:  september@surriel.com trac@trac.org


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:48                         ` Linus Torvalds
@ 2002-09-29 18:13                           ` Jaroslav Kysela
  0 siblings, 0 replies; 206+ messages in thread
From: Jaroslav Kysela @ 2002-09-29 18:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Alan Cox, Jens Axboe, jbradford, jdickens, mingo, jgarzik,
	kessler, linux-kernel, saw, rusty, richardj_moore, andre

On Sun, 29 Sep 2002, Linus Torvalds wrote:

> 
> On 29 Sep 2002, Alan Cox wrote:
> > 
> > Its very hard to make that assessment when the audio layer still doesnt
> > work,
> 
> Which reminds me: it would be good to have somebody try to merge stuff
> from the ALSA tree.
> 
> ALSA never got out of their CVS mentality, and apparently nobody bothers 
> to do incrementeal merges. Is anybody interested and listening?

I am doing that. It seems that you have rejected my big patch, so I am 
trying to split our changed to small chunks. I have about 10 patches, I will
send them to you and lkml. All patches are in BK style with imported 
comments from CVS.

						Jaroslav

-----
Jaroslav Kysela <perex@suse.cz>
Linux Kernel Sound Maintainer
ALSA Project  http://www.alsa-project.org
SuSE Linux    http://www.suse.com


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:42                     ` Linus Torvalds
  2002-09-29 17:54                       ` Rik van Riel
@ 2002-09-29 18:24                       ` Alan Cox
  2002-09-30  7:56                         ` Jens Axboe
  2002-09-30 16:39                       ` jbradford
  2 siblings, 1 reply; 206+ messages in thread
From: Alan Cox @ 2002-09-29 18:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: james, Ingo Molnar, Jeff Garzik, Larry Kessler,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Sun, 2002-09-29 at 18:42, Linus Torvalds wrote:
> I can say that the IDE code is the same code that is in 2.4.x, so if 
> you're comfortable with 2.4.x wrt IDE, then you should be comfy with 
> 2.5.x too.

*NO*

The IDE code is the experimental code in 2.4-ac. It is _NOT_ the IDE
code in 2.4 and its a lot less tested. I don't think it has any
corruption bugs but it is most definitely not the base 2.4 code and has
plenty of non corruption bugs (PCMCIA hang, taskfile write hang, irq
blocking performance problems)

I use the 2.4-ac version of that code for day to day work. Thats about
as good a guarantee as I can give.

Alan


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  9:15                   ` v2.6 vs v3.0 Jens Axboe
@ 2002-09-29 19:53                     ` james
  0 siblings, 0 replies; 206+ messages in thread
From: james @ 2002-09-29 19:53 UTC (permalink / raw)
  To: Jens Axboe, Linus Torvalds
  Cc: Ingo Molnar, Jeff Garzik, Larry Kessler, Alan Cox,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore


Upon thinking about 2.6 v3.0 argument, I think we may be  looking at this 
version comparison in the wrong light, it is not wether we have come far 
enough from 2.4.x to make it 3.0 it is wether we have change enough from 
version 2.0.x. 

When I compare running linux 2.0.x to running what will be the next version we 
are looking at a completely different system. For example in v2.0 the only 
file system choices were ext2 or DOS, with a few others that wern't in wide 
spread use.  where you created small partitions to keep fsck's fast, even if 
you had battery backup, you were still basicly limited to 8 gig file systems. 
Today we have ext2, ext3, reiserfs, JFS, XFS, in the last four,  journaling 
capabilities. it is possible and expected  have huge filesystems and patches 
exist to break the 2 terabyte file systems  exist in various stages of 
testing. Not to mention we have LVM, and raid file systems, being used on 
desktop as well server systems. 

Networking has changed as well, we went from mostly 10mbit eternet cards and a 
few 100 mbit cards, to now having 100mbit ethernet as the base of home 
networking, not to mention gigabit ethernet, and ATM gaining popularity in 
the server market, while they are just drivers, the real shift of thinking 
comes in zero copy file transfer and a mature state of the art 
firewalling/routing/bridging etc. in NAT and iptables 

For video we changed from base VGA video text and X, to acellerated video 
processors not just in X, but in framebuffers  used as consoles.

We also have support for diverse set of buses, that change the way we think 
about our system, multiple bridges on PCI, USB v1 and v2, to firewire. 

I will let others more in the know in memory management, discuss the finer 
points of this one, but it is a major change, in 2.0 we just killed random 
programs when out of memory.  today we make a slightly more educated guess as 
what to kill when we are out of memory, not to mention a just one base mix of 
address support, I think it was 2gig user and 2gig, Today we can choose, 1. 
2, or 3 gig of kernel space.  Large memory support in the Kernel , supporting  
36bit memory accessing, That support more memory than I will ever see in the 
near future. 

we have changed from a System that barely supported smp with 2 processors with 
basicly one big kernel lock to a system with finely grained locks and 
semaphores and subsystem spinlocks,  that has decent performance on 8+ cpu 
systems. Numa system surport also appeared since version 2.0.x 

In 2.0.0 we had a 15bit pid with a maximum of 1000 active ( i beleve it is 
less than this) today we have a 32+bit pid on the table with support of many 
more active processes. of couse we have numourous internal file systems that 
did not exist, tmpfs, devfs, etc.....  and changed the way we all think about 
our systems. 

A prempted kernel, need I say more. 


well that is just a small list of the globals systems that change the way we 
think of linux. 

If we continue to justify major version changes based on change in minor 
version to minor version, can we expect linux 2.98,x in the future?  In each 
minor version we rewrite one or two subsytems. And these take many months to 
plan, complete and test, so big enough change in a single minor version 
number to minor version may not be possible at the current size of this 
devolement effort, So yes we have come far enougth from v2.0.x to justify a 
version 3.0.x. If I was a marketing person I would call it linux 3.0.0 
enterprize edition, if we can get LVM2, raid and break the 2 terabyte 
filesystem limit along with what we allready have accomplised. 

Just my opionion 

James

 






^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
  2002-09-29 16:30                           ` Dave Jones
  2002-09-29 16:42                           ` Bjoern A. Zeeb
@ 2002-09-29 21:16                           ` Russell King
  2002-09-29 21:32                             ` Alan Cox
  2002-09-29 21:49                             ` steve
  2002-09-29 21:52                           ` Matthias Andree
                                             ` (2 subsequent siblings)
  5 siblings, 2 replies; 206+ messages in thread
From: Russell King @ 2002-09-29 21:16 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Alan Cox, jbradford, Linus Torvalds, jdickens, mingo, jgarzik,
	kessler, linux-kernel, saw, rusty, richardj_moore, andre

On Sun, Sep 29, 2002 at 05:38:17PM +0200, Jens Axboe wrote:
> SCSI drivers can be a real problem. Not the porting of them, most of
> that is _trivial_ and can be done as we enter 3.0-pre and people show up
> running that on hardware that actually needs to be ported. The worst bit
> is error handling, this I view as the only problem.

2.4.19 SCSI error handling leaves a lot to be desired currently.  I have
a growing pile of patches that fix up that mess.  They are/have been having
an airing on linux-scsi.

Unfortunately, Alan seems to be ignoring those which linux-scsi is happy
with for unknown reasons currently, so I haven't sent them to Marcelo
(even the ones linux-scsi have said should go to Marcelo; I'd prefer them
to get an airing and some feedback from elsewhere first.)

-- 
Russell King (rmk@arm.linux.org.uk)                The developer of ARM Linux
             http://www.arm.linux.org.uk/personal/aboutme.html


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 21:16                           ` Russell King
@ 2002-09-29 21:32                             ` Alan Cox
  2002-09-29 21:49                             ` steve
  1 sibling, 0 replies; 206+ messages in thread
From: Alan Cox @ 2002-09-29 21:32 UTC (permalink / raw)
  To: Russell King
  Cc: Jens Axboe, jbradford, Linus Torvalds, jdickens, mingo, jgarzik,
	kessler, linux-kernel, saw, rusty, richardj_moore, andre

On Sun, 2002-09-29 at 22:16, Russell King wrote:
> Unfortunately, Alan seems to be ignoring those which linux-scsi is happy
> with for unknown reasons currently,

Because I've been in Finland 



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:22                           ` Dave Jones
  2002-09-29 16:26                             ` Jens Axboe
@ 2002-09-29 21:46                             ` Matthias Andree
  2002-09-30  7:05                               ` Michael Clark
  1 sibling, 1 reply; 206+ messages in thread
From: Matthias Andree @ 2002-09-29 21:46 UTC (permalink / raw)
  To: linux-kernel mailing list
  Cc: Dave Jones, Jens Axboe, Dr. David Alan Gilbert, Linus Torvalds

On Sun, 29 Sep 2002, Dave Jones wrote:

> Joe Thornber sent a patch removing LVM1, but LVM2 has yet to
> make an appearance in 2.5.x patchform afair.  LVM is in one of
> those sneaky positions where they could theoretically cheat
> the feature freeze, as whats in the tree right now is fubar,
> and we need /something/ before going 2.6/3.0.

Is not EVMS ready for the show? Is Linux >=2.6 going to have LVM2 and
EVMS? Or just LVM2? I'm not aware of the current status, but I do recall
having seen EVMS stable announcements (but not sure about 2.5 status).

> It'd be nice to get /something/ in before the feature freeze so
> people can bang on this after halloween when we ramp up stability
> testing instead of waiting until the last minute.

Indeed.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 21:16                           ` Russell King
  2002-09-29 21:32                             ` Alan Cox
@ 2002-09-29 21:49                             ` steve
  1 sibling, 0 replies; 206+ messages in thread
From: steve @ 2002-09-29 21:49 UTC (permalink / raw)
  To: linux-kernel



We did catch flak on stability issues on 2.4 for whatever the 
reasons. The way I see it we should not move to 3.0 until it's been 
running stable under at least 2.6. The less technical the person 
the more valuable perception becomes. By only moving to 3.0 when 
2.x is seen as totally stable, more new (corporate) people will 
consider it as the foundation for their infrastructure. Look at the 
views of 2.2...

Besides, stability must be more important than features!

-- 

Steve Szmidt
______________________________________________________


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
                                             ` (2 preceding siblings ...)
  2002-09-29 21:16                           ` Russell King
@ 2002-09-29 21:52                           ` Matthias Andree
  2002-09-30  7:31                             ` Tomas Szepe
  2002-09-30 15:33                           ` Jan Harkes
  2002-09-30 18:13                           ` Jeff Willis
  5 siblings, 1 reply; 206+ messages in thread
From: Matthias Andree @ 2002-09-29 21:52 UTC (permalink / raw)
  To: linux-kernel

On Sun, 29 Sep 2002, Jens Axboe wrote:

> SCSI drivers can be a real problem. Not the porting of them, most of
> that is _trivial_ and can be done as we enter 3.0-pre and people show up
> running that on hardware that actually needs to be ported. The worst bit
> is error handling, this I view as the only problem.

And a long-standing one. This should have been fixed in 2.2, it has not
been fixed in 2.4, it's much desired for 2.6 -- and people are going to
point away from Linux (and expect Jörg Schilling speaking up again
should 2.6 be released with what he considers broken API -- I cannot
tell if all his items are right, but if a third of what he says is true,
Linux SCSI is not in good shape).


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:24                     ` Alan Cox
@ 2002-09-29 22:00                       ` Matthias Andree
  2002-09-30 19:02                       ` Bill Davidsen
  1 sibling, 0 replies; 206+ messages in thread
From: Matthias Andree @ 2002-09-29 22:00 UTC (permalink / raw)
  To: linux-kernel mailing list

On Sun, 29 Sep 2002, Alan Cox wrote:

> On Sun, 2002-09-29 at 16:26, Matthias Andree wrote:
> > I personally have the feeling that 2.2.x performed better than 2.4.x
> > does, but I cannot go figure because I'm using ReiserFS 3.6 file
> 
> On low end boxes the benchmarks I did show later 2.4-rmap beats 2.2. 2.0
> worked suprisingly well (better than pre-rmap 2.4) and as Stephen
> claimed the best code was about 2.1.100, 2.2 then dropped badly from
> that point.

Granted, but I don't expect any roll-back to happen. If Stephen can dig
up the best version VM-wise, then if somebody could benchmark 2.6pre
against 2.1.BEST, that might be a good competition to 2.6pre -- modulo
different application profile, of course.

My major concern is usability: VM can be so bad it freezes hell or so
good it brings instant world peace: It won't buy me anything if I cannot
get to my data because LVM1 is unusable and neither EVMS nor LVM2 is in.
I'd like to test-drive 2.5, but booting my kernel and mounting a small
root partition from ext3 (non-LVM) and going without /usr and /opt
(because these are in LVM) is not terribly helpful to give it a try.

It's some big things that must be fixed before the tuning (towards
stability, fixes, performance) can take place. You really can't do the
tasting before you've put the meat in.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:26                       ` Jochen Friedrich
  2002-09-29 17:35                         ` Jeff Garzik
@ 2002-09-30  0:00                         ` Andi Kleen
  2002-10-01 19:28                         ` IPv6 stability (success story ;) Petr Baudis
  2 siblings, 0 replies; 206+ messages in thread
From: Andi Kleen @ 2002-09-30  0:00 UTC (permalink / raw)
  To: Jochen Friedrich; +Cc: linux-kernel

Jochen Friedrich <jochen@scram.de> writes:

> Hi Andi,
> 
> > Actually current IPv6 is stable and has been for a long time, it's just not
> > completely standards compliant (but still quite usable for a lot of people)
> 
> For end systems (no router) with static IPv6 definitions this seems to be
> true. However, for machines which use autoconfiguration (stateless as
> there isn't a usable IPv6 capable DHCP server AFAIK) or act as routers,
> the current state of the implementation of the default route can best be
> described as buggy. (Autoconfigured machines seem to loose their default
> route after some time, e.g.).

Are you sure this is not related to the routing daemon or rdisc daemon you 
use ? In the past when I had problems with lost default routes always such
a daemon was to blame.

> So IPv6 is returned by the resolver even though IPv6 isn't available in
> the kernel. The default of the resolver options should be dependent
> on the presence or absence of IPv6 in the currently running kernel IMHO.

Sounds more like an glibc issue. I would file a glibc gnats bug on this,
then it may even get fixed. The kernel has nothing to do with this at least.
 
> Finally, IPv6 sockets which also communicate over IPv4 using mapped
> addresses are considered bad nowadays ;-)

Hmm? 

-Andi

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:21                           ` Alan Cox
  2002-09-29 16:17                             ` Jens Axboe
@ 2002-09-30  0:39                             ` Jeff Chua
  1 sibling, 0 replies; 206+ messages in thread
From: Jeff Chua @ 2002-09-30  0:39 UTC (permalink / raw)
  To: Alan Cox
  Cc: Jens Axboe, Dr. David Alan Gilbert, linux-kernel mailing list,
	Linus Torvalds


On 29 Sep 2002, Alan Cox wrote:

> On Sun, 2002-09-29 at 16:42, Jens Axboe wrote:
> > Has anyone actually sent patches to Linus removing LVM completely from
> > 2.5 and adding the LVM2 device mapper? If I used LVM, I would have done
> > exactly that long ago. Linus, what's your oppinion on this?
>
> I added LVM2 a while ago for my 2.4-ac tree and haven't looked back, its
> much nicer code and its clean and easy to understand. I wouldnt
> guarantee its bug free but its the kind of code where you can *find* a
> bug if one turns up

I can't even get past "make apply-patches" with device-mapper.0.96.04 on
2.5.39.

Anyone running lvm2 on 2.5.3x ?

Thanks,
Jeff



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:13                             ` Trever L. Adams
@ 2002-09-30  6:54                               ` Kai Henningsen
  2002-09-30 18:40                                 ` Bill Davidsen
  0 siblings, 1 reply; 206+ messages in thread
From: Kai Henningsen @ 2002-09-30  6:54 UTC (permalink / raw)
  To: linux-kernel

tadams-lists@myrealbox.com (Trever L. Adams)  wrote on 29.09.02 in <1033316012.1326.17.camel@aurora.localdomain>:

> I can play with that doesnt' have so much important data on it.  (I hate
> to say it, but I haven't been able to afford, $$ wise, backup for a few
> years... I know... I can't afford not to either).

Tape drive cost?

One idea we've come up (and surely we're not the only ones) is to use  
cheap IDE disks for backup, possibly in a cold-swappable insert. As long  
as you can keep several backups per disk (say using some of those 100GB  
disks), preferrably even on a different machine, that's fairly cheap.

If you want to keep daily backups for a week, weekly for a year, and all  
on separate media, of course, that's *not* cheap with this method, and  
even DLT or similar prices become acceptable in comparision. But it  
certainly beats *no* backup!

MfG Kai

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:50                         ` Jens Axboe
@ 2002-09-30  7:01                           ` Kai Henningsen
  0 siblings, 0 replies; 206+ messages in thread
From: Kai Henningsen @ 2002-09-30  7:01 UTC (permalink / raw)
  To: linux-kernel

axboe@suse.de (Jens Axboe)  wrote on 29.09.02 in <20020929155051.GF1014@suse.de>:

> On Sun, Sep 29 2002, Murray J. Root wrote:

> > None of these have been reported because I haven't had time to do all the
> > work involved in making a report that anyone on the team will read.
>
> But you have time to write this email and complain that it doesn't work?
> -> /dev/null, until you send proper reports.

That was precisely the point, no?

For some people, this goes "bake kernel, make sure nobody is doing  
something critical, reboot, hang, curse, reboot to old kernel, apologize  
for delay, stop fiddling with this thing for today" as the machine in  
question needs to do other stuff.

That's certainly the reason why I haven't figured out yet why our damn  
"new" central server doesn't boot bloody 2.4 without hanging - I certainly  
don't *want* to run 2.2 on that thing. Probably config options.

MfG Kai

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 21:46                             ` Matthias Andree
@ 2002-09-30  7:05                               ` Michael Clark
  2002-09-30  7:22                                 ` Andrew Morton
  2002-09-30 13:05                                 ` Kevin Corry
  0 siblings, 2 replies; 206+ messages in thread
From: Michael Clark @ 2002-09-30  7:05 UTC (permalink / raw)
  To: Matthias Andree
  Cc: linux-kernel mailing list, Dave Jones, Jens Axboe,
	Dr. David Alan Gilbert, Linus Torvalds

On 09/30/02 05:46, Matthias Andree wrote:
> On Sun, 29 Sep 2002, Dave Jones wrote:
> 
> 
>>Joe Thornber sent a patch removing LVM1, but LVM2 has yet to
>>make an appearance in 2.5.x patchform afair.  LVM is in one of
>>those sneaky positions where they could theoretically cheat
>>the feature freeze, as whats in the tree right now is fubar,
>>and we need /something/ before going 2.6/3.0.
> 
> 
> Is not EVMS ready for the show? Is Linux >=2.6 going to have LVM2 and
> EVMS? Or just LVM2? I'm not aware of the current status, but I do recall
> having seen EVMS stable announcements (but not sure about 2.5 status).

 From reading the EVMS list, it was working with 2.5.36 a couple weeks
ago but needs some small bio and gendisk changes to work in 2.5.39.

http://sourceforge.net/mailarchive/forum.php?thread_id=1105826&forum_id=2003

CVS version may be up-to-date quite soon from reading the thread.
It seems to be further along in 2.5 support than LVM2 - also including
the fact that EVMS supports LVM1 metadata (which the 2.5 version of LVM2
may not do so quite so soon from mentions on the lvm list).

I haven't tried EVMS but certainly from looking at the feature set,
it looks more comprehensive and modular than LVM (with its support
for multiple metadata personalities).

I too have LVM on quite a few of my machines, including my desktop,
and if I wanted to test 2.5 right now - i'd probably have to do it
using EVMS.

~mc


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  7:05                               ` Michael Clark
@ 2002-09-30  7:22                                 ` Andrew Morton
  2002-09-30 13:08                                   ` Kevin Corry
  2002-09-30 13:05                                 ` Kevin Corry
  1 sibling, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-09-30  7:22 UTC (permalink / raw)
  To: Michael Clark
  Cc: Matthias Andree, linux-kernel mailing list, Dave Jones,
	Jens Axboe, Dr. David Alan Gilbert, Linus Torvalds

Michael Clark wrote:
> 
> On 09/30/02 05:46, Matthias Andree wrote:
> > On Sun, 29 Sep 2002, Dave Jones wrote:
> >
> >
> >>Joe Thornber sent a patch removing LVM1, but LVM2 has yet to
> >>make an appearance in 2.5.x patchform afair.  LVM is in one of
> >>those sneaky positions where they could theoretically cheat
> >>the feature freeze, as whats in the tree right now is fubar,
> >>and we need /something/ before going 2.6/3.0.
> >
> >
> > Is not EVMS ready for the show? Is Linux >=2.6 going to have LVM2 and
> > EVMS? Or just LVM2? I'm not aware of the current status, but I do recall
> > having seen EVMS stable announcements (but not sure about 2.5 status).
> 
>  From reading the EVMS list, it was working with 2.5.36 a couple weeks
> ago but needs some small bio and gendisk changes to work in 2.5.39.
> 

It's going to break bigtime if someone ups and removes all the
kiobuf code.....

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 21:52                           ` Matthias Andree
@ 2002-09-30  7:31                             ` Tomas Szepe
  0 siblings, 0 replies; 206+ messages in thread
From: Tomas Szepe @ 2002-09-30  7:31 UTC (permalink / raw)
  To: linux-kernel

> > SCSI drivers can be a real problem. Not the porting of them, most of
> > that is _trivial_ and can be done as we enter 3.0-pre and people show up
> > running that on hardware that actually needs to be ported. The worst bit
> > is error handling, this I view as the only problem.
> 
> And a long-standing one. This should have been fixed in 2.2, it has not
> been fixed in 2.4, it's much desired for 2.6 -- and people are going to
> point away from Linux (and expect Jörg Schilling speaking up again
> should 2.6 be released with what he considers broken API -- I cannot
> tell if all his items are right, but if a third of what he says is true,
> Linux SCSI is not in good shape).

As long as most of that bloke's argumentation strips down to "you don't do
it like everyone else [solaris/irix/whatever] implies you're bound to suck,"
nobody with a bit of sense is going to take him seriously regardless of how
much blah blah he posts on l-k.

T.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 18:24                       ` Alan Cox
@ 2002-09-30  7:56                         ` Jens Axboe
  2002-09-30  9:53                           ` Andre Hedrick
  2002-09-30 12:58                           ` Alan Cox
  0 siblings, 2 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-30  7:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: Linus Torvalds, james, Ingo Molnar, Jeff Garzik, Larry Kessler,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Sun, Sep 29 2002, Alan Cox wrote:
> On Sun, 2002-09-29 at 18:42, Linus Torvalds wrote:
> > I can say that the IDE code is the same code that is in 2.4.x, so if 
> > you're comfortable with 2.4.x wrt IDE, then you should be comfy with 
> > 2.5.x too.
> 
> *NO*
> 
> The IDE code is the experimental code in 2.4-ac. It is _NOT_ the IDE
> code in 2.4 and its a lot less tested. I don't think it has any
> corruption bugs but it is most definitely not the base 2.4 code and has
> plenty of non corruption bugs (PCMCIA hang, taskfile write hang, irq
> blocking performance problems)

2.5 at least does not have the taskfile hang, because I killed taskfile
io.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  7:56                         ` Jens Axboe
@ 2002-09-30  9:53                           ` Andre Hedrick
  2002-09-30 11:54                             ` Jens Axboe
  2002-09-30 12:58                           ` Alan Cox
  1 sibling, 1 reply; 206+ messages in thread
From: Andre Hedrick @ 2002-09-30  9:53 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Alan Cox, Linus Torvalds, james, Ingo Molnar, Jeff Garzik,
	Larry Kessler, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore

On Mon, 30 Sep 2002, Jens Axboe wrote:

> On Sun, Sep 29 2002, Alan Cox wrote:
> > On Sun, 2002-09-29 at 18:42, Linus Torvalds wrote:
> > > I can say that the IDE code is the same code that is in 2.4.x, so if 
> > > you're comfortable with 2.4.x wrt IDE, then you should be comfy with 
> > > 2.5.x too.
> > 
> > *NO*
> > 
> > The IDE code is the experimental code in 2.4-ac. It is _NOT_ the IDE
> > code in 2.4 and its a lot less tested. I don't think it has any
> > corruption bugs but it is most definitely not the base 2.4 code and has
> > plenty of non corruption bugs (PCMCIA hang, taskfile write hang, irq
> > blocking performance problems)
> 
> 2.5 at least does not have the taskfile hang, because I killed taskfile
> io.

Great :-/  Now that you have restored the "rq->wrq" aka working copy of
the request which in its past life under PIO only updated to block when
the entire request was completed.  So there are no partial completions
possible given the old method in the legacy path.

One of the issues Linus kick my can over was the "requirement" of partial
completeions.  What I need rom block is a way to know how much is
completed of the original total request.  So whatever value is the
original rq->nr_sectors assigned to "TF.2/HF.2" or nsector_offset(s),
needs to be carried in block and updated to reflect how much more is
remaining of this CDB task.

I do not care if you call it "rq->dumbass_accounting_for_andre", but
provide this dummy accounting variable in "struct request" and I will be
happy.  This has nothing to do with bio or bh segments from the kernel.
It is everything about device side accounting carried by block; whereas,
the ll_driver can use it to determine what or if there is to be another
interrupt.

Why are we getting lost interrupts?

Because there is a beautiful "data-block completion" v/s "immediate
interrupt assertion" race between the device and the kernel.  So please
provide a counter which can be used to determine where the interrupt
driven partial completion model the driver is wrt the device/request.

Jens, not asking for much.

Otherwise the ADMA/VDMA is not doable period.

Cheers,

Andre Hedrick
LAD Storage Consulting Group


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-30 14:05                   ` Denis Vlasenko
@ 2002-09-30 10:22                     ` Tomas Szepe
  2002-09-30 11:10                       ` jw schultz
                                         ` (2 more replies)
  0 siblings, 3 replies; 206+ messages in thread
From: Tomas Szepe @ 2002-09-30 10:22 UTC (permalink / raw)
  To: Denis Vlasenko; +Cc: jw schultz, linux-kernel mailing list

> Technically correct. Major version jump should be made when there is
> a binary incompatibility. It can be made without, but it is usually
> done for marketing reasons. I hope we'll never have marketing reasons
> for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
> 3.0

... and go Solaris, as in 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 7, 8, 9.  :D

T.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-30 10:22                     ` Tomas Szepe
@ 2002-09-30 11:10                       ` jw schultz
  2002-09-30 11:17                       ` Adrian Bunk
  2002-09-30 19:48                       ` Rik van Riel
  2 siblings, 0 replies; 206+ messages in thread
From: jw schultz @ 2002-09-30 11:10 UTC (permalink / raw)
  To: linux-kernel mailing list

On Mon, Sep 30, 2002 at 12:22:28PM +0200, Tomas Szepe wrote:
> > Technically correct. Major version jump should be made when there is
> > a binary incompatibility. It can be made without, but it is usually
> > done for marketing reasons. I hope we'll never have marketing reasons
> > for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
> > 3.0
> 
> ... and go Solaris, as in 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 7, 8, 9.  :D

I've no problem per-se with 2.6, 2.7, 2.8, 2.9, 2.10...
And i see no reason to compare it with Solaris where numbers
are mostly marketing although their major number refereed to
the codebase (bsd vs SVr4).

I can see a number of real reasons to advance to 3.x:
Finishing the block layer and VM rewrite, maybe;
making FS blocksize independent of pagesize, probably;
flexibility with regard to pagesize for archs that support
variable pagesize (if market share and performance gains add
up the CPU designers will give it to us), probably;
initramfs, perhaps;
new module interface, possibly;
hotplug everything (and i mean everything), maybe;
elimination of the 32 bit versions of system-calls
and other deprecated interfaces, absolutely;
new filesystems and device drivers, nope;  
incremental performance improvements, you've got to be kidding;

It is just that right now, from what little i can see, 2.5
is part way through the process of a block layer redesign
and the VM is in a similar state.  Evidently LVM is in limbo
but that has to be at least operational before code freeze.
Driverfs looks promising but the API isn't even set and
documented yet.  BTW what happened to moving away from
major/minor numbers?

The developers are doing a great job and things are moving
along but 2.6 looks more than anything else like a stabilized
snapshot so the improvements become available (trustworthy)
for production.  That is consistent with "release early,
release often".



-- 
________________________________________________________________
	J.W. Schultz            Pegasystems Technologies
	email address:		jw@pegasys.ws

		Remember Cernan and Schmitt

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-30 10:22                     ` Tomas Szepe
  2002-09-30 11:10                       ` jw schultz
@ 2002-09-30 11:17                       ` Adrian Bunk
  2002-09-30 19:48                       ` Rik van Riel
  2 siblings, 0 replies; 206+ messages in thread
From: Adrian Bunk @ 2002-09-30 11:17 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: linux-kernel mailing list

On Mon, 30 Sep 2002, Tomas Szepe wrote:

> > Technically correct. Major version jump should be made when there is
> > a binary incompatibility. It can be made without, but it is usually
> > done for marketing reasons. I hope we'll never have marketing reasons
> > for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
> > 3.0
>
> ... and go Solaris, as in 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 7, 8, 9.  :D

NetBSD still has sane version numbers:  :-)

0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6

> T.


cu
Adrian

-- 

You only think this is a free country. Like the US the UK spends a lot of
time explaining its a free country because its a police state.
								Alan Cox


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  9:53                           ` Andre Hedrick
@ 2002-09-30 11:54                             ` Jens Axboe
  0 siblings, 0 replies; 206+ messages in thread
From: Jens Axboe @ 2002-09-30 11:54 UTC (permalink / raw)
  To: Andre Hedrick
  Cc: Alan Cox, Linus Torvalds, james, Ingo Molnar, Jeff Garzik,
	Larry Kessler, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore

On Mon, Sep 30 2002, Andre Hedrick wrote:
> On Mon, 30 Sep 2002, Jens Axboe wrote:
> 
> > On Sun, Sep 29 2002, Alan Cox wrote:
> > > On Sun, 2002-09-29 at 18:42, Linus Torvalds wrote:
> > > > I can say that the IDE code is the same code that is in 2.4.x, so if 
> > > > you're comfortable with 2.4.x wrt IDE, then you should be comfy with 
> > > > 2.5.x too.
> > > 
> > > *NO*
> > > 
> > > The IDE code is the experimental code in 2.4-ac. It is _NOT_ the IDE
> > > code in 2.4 and its a lot less tested. I don't think it has any
> > > corruption bugs but it is most definitely not the base 2.4 code and has
> > > plenty of non corruption bugs (PCMCIA hang, taskfile write hang, irq
> > > blocking performance problems)
> > 
> > 2.5 at least does not have the taskfile hang, because I killed taskfile
> > io.
> 
> Great :-/  Now that you have restored the "rq->wrq" aka working copy of

Make taskfile io work 2.4-ac, and it will work in 2.5 as well. The only
sensible thing to do right now was to disable it in 2.5, imo, and so I
did.

> the request which in its past life under PIO only updated to block when
> the entire request was completed.  So there are no partial completions
> possible given the old method in the legacy path.

I haven't restored anything. 2.4-ac (your base) uses ->wrq copy, so does
2.5.

> One of the issues Linus kick my can over was the "requirement" of partial
> completeions.  What I need rom block is a way to know how much is
> completed of the original total request.  So whatever value is the
> original rq->nr_sectors assigned to "TF.2/HF.2" or nsector_offset(s),
> needs to be carried in block and updated to reflect how much more is
> remaining of this CDB task.

Now that the block layer really can do partial completions properly, I
patched ide-disk to do just that. It's not very well tested, just did it
last week as proof-of-concept.

This breaks the typical offset rules, ie

current_segment_offset = rq->hard_cur_sectors - rq->current_nr_sectors;
total_offset = rq->hard_nr_sectors - rq->nr_sectors;

Haven't though too much about that yet.

> I do not care if you call it "rq->dumbass_accounting_for_andre", but
> provide this dummy accounting variable in "struct request" and I will be
> happy.  This has nothing to do with bio or bh segments from the kernel.
> It is everything about device side accounting carried by block; whereas,
> the ll_driver can use it to determine what or if there is to be another
> interrupt.

What you ask for is already there, but requires that you massage
current_nr_sectors and nr_sectors like ide has always done.

> Why are we getting lost interrupts?
> 
> Because there is a beautiful "data-block completion" v/s "immediate
> interrupt assertion" race between the device and the kernel.  So please
> provide a counter which can be used to determine where the interrupt
> driven partial completion model the driver is wrt the device/request.
> 
> Jens, not asking for much.

Indeed, you are asking for stuff we've had for years.

===== drivers/ide/ide-disk.c 1.16 vs edited =====
--- 1.16/drivers/ide/ide-disk.c	Sat Sep 21 02:32:22 2002
+++ edited/drivers/ide/ide-disk.c	Mon Sep 23 17:18:48 2002
@@ -139,8 +139,8 @@
  */
 static ide_startstop_t read_intr (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
-	int i = 0, nsect	= 0, msect = drive->mult_count;
+	ide_hwif_t *hwif = HWIF(drive);
+	int nsect = 0, msect = drive->mult_count;
 	struct request *rq;
 	unsigned long flags;
 	u8 stat;
@@ -174,25 +174,24 @@
 		(unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
 #endif
 	ide_unmap_buffer(rq, to, &flags);
-	rq->sector += nsect;
-	rq->errors = 0;
-	i = (rq->nr_sectors -= nsect);
-	if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
-		ide_end_request(drive, 1, rq->hard_cur_sectors);
+
+	/*
+	 * all done
+	 */
+	if (!ide_end_request(drive, 1, nsect))
+		return ide_stopped;
+
 	/*
 	 * Another BH Page walker and DATA INTERGRITY Questioned on ERROR.
 	 * If passed back up on multimode read, BAD DATA could be ACKED
 	 * to FILE SYSTEMS above ...
 	 */
-	if (i > 0) {
-		if (msect)
-			goto read_next;
-		if (HWGROUP(drive)->handler != NULL)
-			BUG();
-		ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
-                return ide_started;
-	}
-        return ide_stopped;
+	if (msect)
+		goto read_next;
+	if (HWGROUP(drive)->handler != NULL)
+		BUG();
+	ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+	return ide_started;
 }
 
 /*
@@ -203,7 +202,6 @@
 	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
 	ide_hwif_t *hwif	= HWIF(drive);
 	struct request *rq	= hwgroup->rq;
-	int i = 0;
 	u8 stat;
 
 	if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),
@@ -217,23 +215,19 @@
 			rq->nr_sectors-1);
 #endif
 		if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
-			rq->sector++;
-			rq->errors = 0;
-			i = --rq->nr_sectors;
-			--rq->current_nr_sectors;
-			if (((long)rq->current_nr_sectors) <= 0)
-				ide_end_request(drive, 1, rq->hard_cur_sectors);
-			if (i > 0) {
-				unsigned long flags;
-				char *to = ide_map_buffer(rq, &flags);
-				taskfile_output_data(drive, to, SECTOR_WORDS);
-				ide_unmap_buffer(rq, to, &flags);
-				if (HWGROUP(drive)->handler != NULL)
-					BUG();
-				ide_set_handler(drive, &write_intr, WAIT_CMD, NULL);
-                                return ide_started;
-			}
-                        return ide_stopped;
+			unsigned long flags;
+			char *to;
+
+			if (!ide_end_request(drive, 1, 1))
+				return ide_stopped;
+
+			to = ide_map_buffer(rq, &flags);
+			taskfile_output_data(drive, to, SECTOR_WORDS);
+			ide_unmap_buffer(rq, to, &flags);
+			if (HWGROUP(drive)->handler != NULL)
+				BUG();
+			ide_set_handler(drive, &write_intr, WAIT_CMD, NULL);
+			return ide_started;
 		}
 		/* the original code did this here (?) */
 		return ide_stopped;
===== drivers/ide/ide-taskfile.c 1.4 vs edited =====
--- 1.4/drivers/ide/ide-taskfile.c	Fri Sep 20 00:13:51 2002
+++ edited/drivers/ide/ide-taskfile.c	Mon Sep 23 17:04:47 2002
@@ -611,9 +611,8 @@
 	 * BH walking or segment can only be updated after we have a good
 	 * hwif->INB(IDE_STATUS_REG); return.
 	 */
-	if (--rq->current_nr_sectors <= 0)
-		if (!DRIVER(drive)->end_request(drive, 1, 0))
-			return ide_stopped;
+	if (!DRIVER(drive)->end_request(drive, 1, 1))
+		return ide_stopped;
 	/*
 	 * ERM, it is techincally legal to leave/exit here but it makes
 	 * a mess of the code ...
@@ -669,7 +668,6 @@
 		taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
 		task_unmap_rq(rq, pBuf, &flags);
 		rq->errors = 0;
-		rq->current_nr_sectors -= nsect;
 		msect -= nsect;
 		/*
 		 * FIXME :: We really can not legally get a new page/bh
@@ -677,10 +675,8 @@
 		 * BH walking or segment can only be updated after we have a
 		 * good hwif->INB(IDE_STATUS_REG); return.
 		 */
-		if (!rq->current_nr_sectors) {
-			if (!DRIVER(drive)->end_request(drive, 1, 0))
-				return ide_stopped;
-		}
+		if (!DRIVER(drive)->end_request(drive, 1, 1))
+			return ide_stopped;
 	} while (msect);
 	if (HWGROUP(drive)->handler == NULL)
 		ide_set_handler(drive, &task_mulin_intr, WAIT_WORSTCASE, NULL);
@@ -740,9 +736,9 @@
 	 * Safe to update request for partial completions.
 	 * We have a good STATUS CHECK!!!
 	 */
-	if (!rq->current_nr_sectors)
-		if (!DRIVER(drive)->end_request(drive, 1, 0))
-			return ide_stopped;
+	if (!DRIVER(drive)->end_request(drive, 1, 1))
+		return ide_stopped;
+
 	if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) {
 		rq = HWGROUP(drive)->rq;
 		pBuf = task_map_rq(rq, &flags);
@@ -802,13 +798,10 @@
 		msect -= nsect;
 		taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
 		task_unmap_rq(rq, pBuf, &flags);
-		rq->current_nr_sectors -= nsect;
-		if (!rq->current_nr_sectors) {
-			if (!DRIVER(drive)->end_request(drive, 1, 0))
-				if (!rq->bio) {
-					stat = hwif->INB(IDE_STATUS_REG);
-					return ide_stopped;
-				}
+		if (!DRIVER(drive)->end_request(drive, 1, 1)) {
+			/* stat for...? */
+			stat = hwif->INB(IDE_STATUS_REG);
+			return ide_stopped;
 		}
 	} while (msect);
 	rq->errors = 0;
@@ -922,18 +915,14 @@
 		msect -= nsect;
 		taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
 		task_unmap_rq(rq, pBuf, &flags);
-		rq->current_nr_sectors -= nsect;
 		/*
 		 * FIXME :: We really can not legally get a new page/bh
 		 * regardless, if this is the end of our segment.
 		 * BH walking or segment can only be updated after we
 		 * have a good  hwif->INB(IDE_STATUS_REG); return.
 		 */
-		if (!rq->current_nr_sectors) {
-			if (!DRIVER(drive)->end_request(drive, 1, 0))
-				if (!rq->bio)
-					return ide_stopped;
-		}
+		if (!DRIVER(drive)->end_request(drive, 1, 1))
+			return ide_stopped;
 	} while (msect);
 	rq->errors = 0;
 	if (HWGROUP(drive)->handler == NULL)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  7:56                         ` Jens Axboe
  2002-09-30  9:53                           ` Andre Hedrick
@ 2002-09-30 12:58                           ` Alan Cox
  2002-09-30 13:05                             ` Jens Axboe
  1 sibling, 1 reply; 206+ messages in thread
From: Alan Cox @ 2002-09-30 12:58 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Linus Torvalds, james, Ingo Molnar, Jeff Garzik, Larry Kessler,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Mon, 2002-09-30 at 08:56, Jens Axboe wrote:
> 2.5 at least does not have the taskfile hang, because I killed taskfile
> io.

Thats not exactly a fix 8). 2.5 certainly has the others. Taskfile I/O
is pretty low on my fix list. The fix isnt trivial because we set the
IRQ handler late - so the IRQ can beat us setting the handler, but
equally if we set it early we get to worry about all the old races in
2.3.x


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 12:58                           ` Alan Cox
@ 2002-09-30 13:05                             ` Jens Axboe
  2002-10-01  2:17                               ` Andre Hedrick
  0 siblings, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-09-30 13:05 UTC (permalink / raw)
  To: Alan Cox
  Cc: Linus Torvalds, james, Ingo Molnar, Jeff Garzik, Larry Kessler,
	linux-kernel mailing list, Andrew V. Savochkin, Rusty Russell,
	Richard J Moore

On Mon, Sep 30 2002, Alan Cox wrote:
> On Mon, 2002-09-30 at 08:56, Jens Axboe wrote:
> > 2.5 at least does not have the taskfile hang, because I killed taskfile
> > io.
> 
> Thats not exactly a fix 8). 2.5 certainly has the others. Taskfile I/O

I didn't claim it was, I just don't want a user setting taskfile io to
'y' because he thinks its cool when we know its broken.

> is pretty low on my fix list. The fix isnt trivial because we set the
> IRQ handler late - so the IRQ can beat us setting the handler, but
> equally if we set it early we get to worry about all the old races in
> 2.3.x

Where exactly is the race?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  7:05                               ` Michael Clark
  2002-09-30  7:22                                 ` Andrew Morton
@ 2002-09-30 13:05                                 ` Kevin Corry
  2002-09-30 13:49                                   ` Michael Clark
  2002-09-30 13:59                                   ` Michael Clark
  1 sibling, 2 replies; 206+ messages in thread
From: Kevin Corry @ 2002-09-30 13:05 UTC (permalink / raw)
  To: Michael Clark, Matthias Andree; +Cc: linux-kernel mailing list

On Monday 30 September 2002 02:05, Michael Clark wrote:
> On 09/30/02 05:46, Matthias Andree wrote:
> >
> > Is not EVMS ready for the show? Is Linux >=2.6 going to have LVM2 and
> > EVMS? Or just LVM2? I'm not aware of the current status, but I do recall
> > having seen EVMS stable announcements (but not sure about 2.5 status).
>
>  From reading the EVMS list, it was working with 2.5.36 a couple weeks
> ago but needs some small bio and gendisk changes to work in 2.5.39.
>
> http://sourceforge.net/mailarchive/forum.php?thread_id=1105826&forum_id=2003
>
> CVS version may be up-to-date quite soon from reading the thread.
> It seems to be further along in 2.5 support than LVM2 - also including
> the fact that EVMS supports LVM1 metadata (which the 2.5 version of LVM2
> may not do so quite so soon from mentions on the lvm list).
>
> I haven't tried EVMS but certainly from looking at the feature set,
> it looks more comprehensive and modular than LVM (with its support
> for multiple metadata personalities).
>
> I too have LVM on quite a few of my machines, including my desktop,
> and if I wanted to test 2.5 right now - i'd probably have to do it
> using EVMS.

EVMS is now up-to-date and running on 2.5.39. You can get the latest kernel 
code from CVS (http://sourceforge.net/cvs/?group_id=25076) or Bitkeepr 
(http://evms.bkbits.net/). There will be a new, full release (1.2) coming out 
this week.

Kevin Corry
corryk@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  7:22                                 ` Andrew Morton
@ 2002-09-30 13:08                                   ` Kevin Corry
  0 siblings, 0 replies; 206+ messages in thread
From: Kevin Corry @ 2002-09-30 13:08 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel mailing list

On Monday 30 September 2002 02:22, Andrew Morton wrote:
> Michael Clark wrote:
> >  From reading the EVMS list, it was working with 2.5.36 a couple weeks
> > ago but needs some small bio and gendisk changes to work in 2.5.39.
>
> It's going to break bigtime if someone ups and removes all the
> kiobuf code.....

I don't think that would be the case, since EVMS doesn't use kiobuf's.

Kevin Corry
corryk@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 13:05                                 ` Kevin Corry
@ 2002-09-30 13:49                                   ` Michael Clark
  2002-09-30 14:26                                     ` Kevin Corry
  2002-09-30 13:59                                   ` Michael Clark
  1 sibling, 1 reply; 206+ messages in thread
From: Michael Clark @ 2002-09-30 13:49 UTC (permalink / raw)
  To: Kevin Corry; +Cc: Matthias Andree, linux-kernel mailing list

On 09/30/02 21:05, Kevin Corry wrote:
> On Monday 30 September 2002 02:05, Michael Clark wrote:
> 
>>On 09/30/02 05:46, Matthias Andree wrote:
>>
>>>Is not EVMS ready for the show? Is Linux >=2.6 going to have LVM2 and
>>>EVMS? Or just LVM2? I'm not aware of the current status, but I do recall
>>>having seen EVMS stable announcements (but not sure about 2.5 status).
>>
>> From reading the EVMS list, it was working with 2.5.36 a couple weeks
>>ago but needs some small bio and gendisk changes to work in 2.5.39.
>>
>>http://sourceforge.net/mailarchive/forum.php?thread_id=1105826&forum_id=2003
>>
>>CVS version may be up-to-date quite soon from reading the thread.
>>It seems to be further along in 2.5 support than LVM2 - also including
>>the fact that EVMS supports LVM1 metadata (which the 2.5 version of LVM2
>>may not do so quite so soon from mentions on the lvm list).
>>
>>I haven't tried EVMS but certainly from looking at the feature set,
>>it looks more comprehensive and modular than LVM (with its support
>>for multiple metadata personalities).
>>
>>I too have LVM on quite a few of my machines, including my desktop,
>>and if I wanted to test 2.5 right now - i'd probably have to do it
>>using EVMS.
> 
> 
> EVMS is now up-to-date and running on 2.5.39. You can get the latest kernel 
> code from CVS (http://sourceforge.net/cvs/?group_id=25076) or Bitkeepr 
> (http://evms.bkbits.net/). There will be a new, full release (1.2) coming out 
> this week.

Yes, i just booted up with EVMS CVS on 2.5.39. Detected all my LVM LV's fine.
After cautious tests with them mounted ro, i then preceded to mount them rw
and continued boot up. Working fine so far. Great work.

All i needed to do was change my vgscan to evms_vgscan and adjust my mount
points to the new style ( /dev/evms/lvm/<vg></<lv> ).

~mc


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 13:05                                 ` Kevin Corry
  2002-09-30 13:49                                   ` Michael Clark
@ 2002-09-30 13:59                                   ` Michael Clark
  2002-09-30 15:50                                     ` Kevin Corry
  1 sibling, 1 reply; 206+ messages in thread
From: Michael Clark @ 2002-09-30 13:59 UTC (permalink / raw)
  To: Kevin Corry; +Cc: Matthias Andree, linux-kernel mailing list

Hi Kevin,

On 09/30/02 21:05, Kevin Corry wrote:
> EVMS is now up-to-date and running on 2.5.39. You can get the latest kernel 
> code from CVS (http://sourceforge.net/cvs/?group_id=25076) or Bitkeepr 
> (http://evms.bkbits.net/). There will be a new, full release (1.2) coming out 
> this week.

Seems you guys are the furthest ahead for a working logical volume manager
in 2.5. Does the EVMS team plan to send patches for 2.5 before the freeze?

It would be great to have EVMS in 2.5 (assuming the community approves of
EVMS going in). Seems to be very non-invasive touching almost no common code.

How far along are you with the clustering support (distributed locking of
cluster metadata and update notification, etc)? This is what i'm really after.

~mc


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-28  9:16                 ` jw schultz
@ 2002-09-30 14:05                   ` Denis Vlasenko
  2002-09-30 10:22                     ` Tomas Szepe
  0 siblings, 1 reply; 206+ messages in thread
From: Denis Vlasenko @ 2002-09-30 14:05 UTC (permalink / raw)
  To: jw schultz, linux-kernel mailing list

On 28 September 2002 07:16, jw schultz wrote:
> Ingo, I agree with Linus.  My recollection of when we moved
> to 2.0 was that the major number reflected the user<->kernel
> ABI.  I have no problem with a version 2.42 if things stay
> stable that long.   I hope they don't but that is another
> issue.
>
> Version 3.0 implies incompatibility with binaries from 2.x
> The distributions can play around with version numbers
> reflecting the GUI interface, libraries or installers but
> the kernel major version should stay the same until binary
> compatibility is broken.  When we move old syscalls (such as
> 32 bit file ops) from deprecated to unsupported is when we
> increment the major number.
>
> It may be that 2.7 will see the cruft cut out and be the end
> of 2.x but 2.5 isn't that.  So far 2.5 is performance
> enhancement.  Terrific performance enhancement, thanks to you
> and many others.  But it isn't adding major new features nor
> is it removing old interfaces.  In many ways 2.6 looks like
> a sign that the 2.x kernel is getting mature.  2.6 means
> users can expect improvements but don't have to make big changes.
> 2.6 is an upgrade, 3.0 would be a replacement.

Technically correct. Major version jump should be made when there is
a binary incompatibility. It can be made without, but it is usually
done for marketing reasons. I hope we'll never have marketing reasons
for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
3.0
--
vda

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 13:49                                   ` Michael Clark
@ 2002-09-30 14:26                                     ` Kevin Corry
  0 siblings, 0 replies; 206+ messages in thread
From: Kevin Corry @ 2002-09-30 14:26 UTC (permalink / raw)
  To: Michael Clark; +Cc: linux-kernel mailing list

On Monday 30 September 2002 08:49, Michael Clark wrote:
> On 09/30/02 21:05, Kevin Corry wrote:
> > EVMS is now up-to-date and running on 2.5.39. You can get the latest
> > kernel code from CVS (http://sourceforge.net/cvs/?group_id=25076) or
> > Bitkeepr (http://evms.bkbits.net/). There will be a new, full release
> > (1.2) coming out this week.
>
> Yes, i just booted up with EVMS CVS on 2.5.39. Detected all my LVM LV's
> fine. After cautious tests with them mounted ro, i then preceded to mount
> them rw and continued boot up. Working fine so far. Great work.
>
> All i needed to do was change my vgscan to evms_vgscan and adjust my mount
> points to the new style ( /dev/evms/lvm/<vg></<lv> ).

Instead of using "evms_vgscan", you should probably run "evms_rediscover". 
But you really only need that if you've compiled EVMS as modules in your 
kernel.

For volume admin tasks, I would recommend using "evmsgui" if you have X 
available, or "evmsn" if you need text-mode.

The LVM-style commands (like evms_vgscan) were originally written as testing 
tools before we had the fully-functional UIs. They were left around as kind 
of a proof-of-concept that the EVMS engine library API can be used to emulate 
existing volume management tools.

-- 
Kevin Corry
corryk@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
                                             ` (3 preceding siblings ...)
  2002-09-29 21:52                           ` Matthias Andree
@ 2002-09-30 15:33                           ` Jan Harkes
  2002-09-30 18:13                           ` Jeff Willis
  5 siblings, 0 replies; 206+ messages in thread
From: Jan Harkes @ 2002-09-30 15:33 UTC (permalink / raw)
  To: linux-kernel

On Sun, Sep 29, 2002 at 05:38:17PM +0200, Jens Axboe wrote:
> On Sun, Sep 29 2002, Alan Cox wrote:
> > Most of my boxes won't even run a 2.5 tree yet. I'm sure its hardly
> > unique. Middle of November we may begin to find out how solid the core
> > code actually is, as drivers get fixed up and also in the other
> > direction as we eliminate numerous crashes caused by "fixed in 2.4" bugs
> 
> Well why don't they run with 2.5?
> 
> Alan, I think you are a pessimist painting a much bleaker picture of 2.5
> than it deserves. Sure lots of drivers may be broken still, I would be
> naive if I thought that this is all changed in time for oct 31. Most of
> these will not be fixed until people actually _use_ 2.5 (or 3.0-pre, or
> whatever it will be called), and that will not happen until Linus
> actually releases a -rc or similar. And so the fsck what? Noone expects
> 2.6-pre/3.0-pre to be perfect.

Ok, after losing a disk in the early 2.5 series, and not being able to
compile pretty much any kernel since 2.5.33, I decided to give 2.5.39 a
try last weekend.

Built kernel, rebooted, almost seems to get stuch during the ide-probing
(10 seconds wait is a conservative estimate), but it came up in single
user. Checking for errors in /proc/kmsg, nothing. Great reboot
multiuser start X open a window lose all access to my keyboard. Completely
log in remotely with ssh, hmm kernel errors about unknown scancodes.

Reboot, just don't use X for the moment, maybe I can catch an oops,
lockup during boot while loading the uhci usb driver. Alt-sysrq works,
another fsck later (these seem to take a lot longer, but that could be
subjective). Disable hotplug/usb during startup, reboot, within 2
minutes orinoco_cs driver locks up and starts throwing debugging goo
about transmit timeouts and resetting card. Nice, except for the fact
that interrupts seem to be disabled and this time magic-sysrq doesn't
work.

Pull the battery out to be able to reboot the laptop, and went back to
2.4.20-latest for now. 2.5.33 did work mostly (after fixing up a bunch
of compile fixes and the oss cs4281 driver), but seems to last only
about 1 hour on battery life vs. the solid 3 1/2 hours with a 2.4 kernel.
All of this is on a Thinkpad X20, which doesn't have a serial console.

Using APM, not ACPI. But this is not a bugreport, because I haven't even
got a chance to isolate any single problem in a way that I can create a
useful report.

> I'm not worried.

I am a bit worried, at least as far as Coda is concerned, there is a lot
of unmerged stuff, and as long as I can't do any testing of the changes
it is a bit useless to send them off to Linus. I hope things stabilize
before the feature freeze.

Jan


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 13:59                                   ` Michael Clark
@ 2002-09-30 15:50                                     ` Kevin Corry
  0 siblings, 0 replies; 206+ messages in thread
From: Kevin Corry @ 2002-09-30 15:50 UTC (permalink / raw)
  To: Michael Clark; +Cc: linux-kernel mailing list

On Monday 30 September 2002 08:59, Michael Clark wrote:
> Hi Kevin,
>
> On 09/30/02 21:05, Kevin Corry wrote:
> > EVMS is now up-to-date and running on 2.5.39. You can get the latest
> > kernel code from CVS (http://sourceforge.net/cvs/?group_id=25076) or
> > Bitkeepr (http://evms.bkbits.net/). There will be a new, full release
> > (1.2) coming out this week.
>
> Seems you guys are the furthest ahead for a working logical volume manager
> in 2.5. Does the EVMS team plan to send patches for 2.5 before the freeze?

Yes. We may send something in for review this week.

> It would be great to have EVMS in 2.5 (assuming the community approves of
> EVMS going in). Seems to be very non-invasive touching almost no common
> code.
>
> How far along are you with the clustering support (distributed locking of
> cluster metadata and update notification, etc)? This is what i'm really
> after.

Right now we are talking about ways to use EVMS in a fail-over cluster 
environment. E.g.: You have four nodes in a cluster each attached to a large 
SAN device. EVMS will provide software fencing of the shared storage so each 
node in the cluster will have a private portion of the SAN. EVMS will allow 
reassigning of storage to other nodes in the cluster in the event of a node 
failure. This approach involves the smallest hit to the existing code and 
very little extra kernel code.

More general cluster support, with support for fully-shared storage (and all 
of the necessary distributed locking and such) will come in 2003. This will 
obviously involve more in-depth code changes.

-- 
Kevin Corry
corryk@us.ibm.com
http://evms.sourceforge.net/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 17:42                     ` Linus Torvalds
  2002-09-29 17:54                       ` Rik van Riel
  2002-09-29 18:24                       ` Alan Cox
@ 2002-09-30 16:39                       ` jbradford
  2 siblings, 0 replies; 206+ messages in thread
From: jbradford @ 2002-09-30 16:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jdickens, mingo, jgarzik, kessler, alan, linux-kernel, saw,
	rusty, richardj_moore

> > How many people are sitting on the sidelines waiting for guarantee that ide is 
> > not going to blow up on our filesystems and take our data with it. Guarantee 
> > that ide is working and not dangerous to our data, then I bet a lot more 
> > people will come back and bang on 2.5. 
> 
> How the hell can I _guarantee_ anything like that?

You don't need to - just post "2.5.x ide is working, and not dangerous to your data", and loads of people will start using it.  That way, we get it tested a decent amount.

Of course when somebody's root fs get fsck'ed, (pun intended), the list is bound to get a flamewar^Whelpfully worded bug report.

The false rumors that IDE was fubar for a long time in 2.5.x, coupled with the fact that a lot of recent 2.5.x kernels don't compile, seem to have scared off people which is rediculous.

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  6:14                   ` james
                                       ` (3 preceding siblings ...)
  2002-09-29 17:42                     ` Linus Torvalds
@ 2002-09-30 16:47                     ` Pau Aliagas
  4 siblings, 0 replies; 206+ messages in thread
From: Pau Aliagas @ 2002-09-30 16:47 UTC (permalink / raw)
  To: lkml

On Sun, 29 Sep 2002, james wrote:

> I know this whole ide mess have taken me away from the devolemental series. 
> And I bet a lot of others. 

That is precisely what has kept me out of 2.5. I do not want to risk my 
data due to the IDE problems; otherwise I'd be happy testing 2.5 all 
around in all kind of machines I had available.

Pau


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 15:38                         ` Jens Axboe
                                             ` (4 preceding siblings ...)
  2002-09-30 15:33                           ` Jan Harkes
@ 2002-09-30 18:13                           ` Jeff Willis
  5 siblings, 0 replies; 206+ messages in thread
From: Jeff Willis @ 2002-09-30 18:13 UTC (permalink / raw)
  To: linux-kernel

> > Most of my boxes won't even run a 2.5 tree yet. I'm sure its hardly
> > unique. Middle of November we may begin to find out how solid the core
> > code actually is, as drivers get fixed up and also in the other
> > direction as we eliminate numerous crashes caused by "fixed in 2.4" bugs

You're right, it's not unique.  Will they run 2.4?  I've got about a dozen
boxes that have had over a year uptime with 2.0 or 2.2, but won't boot with
the 2.4 or the recent 2.5 I tried.

> Well why don't they run with 2.5?

Good question.  With the 2.4 kernels I've tried zImages worked fine but
bzImages wouldn't boot.   Unfortunately, with the options I need, the kernel
won't fit in a zImage.  The servers were all originally AMI motherboards,
but after replacing a few due to failures, there's a few Abit, Tyans and
Gigabyte replacements.  The Gigabyte (model GA-8IRXP, I think) will boot
bzImages, but I hate to replace motherboards that have worked fine for years
just to boot the new 2.6/3.0.



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
                                     ` (3 preceding siblings ...)
  2002-09-29 15:26                   ` Matthias Andree
@ 2002-09-30 18:37                   ` Bill Davidsen
  4 siblings, 0 replies; 206+ messages in thread
From: Bill Davidsen @ 2002-09-30 18:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel mailing list

On Sat, 28 Sep 2002, Linus Torvalds wrote:

> However, I'll believe that when I see it. Usually people don't complain 
> during a development kernel, because they think they shouldn't, and then 
> when it becomes stable (ie when the version number changes) they are 
> surprised that the behabviour didn't magically improve, and _then_ we get 
> tons of complaints about how bad the VM is under their load.

Part of this is because people who complain often get answers which sound
a lot like "what do you expect, it's a test kernel," or "you have the
source, go fix it," or even "if you don't like go run Windows." This list
is FAR more cordial than newsgroups, but I have seen people who suggested
an improvement get invited to submit a patch.

The other reason is the "it must be me" effect, if something doesn't work
for the user there is a general reaction that something must be configured
wrong.

Anyway that's my impression of why the complaints come as you say, I think
it's going to happen regardless of the version number. 

For what it's worth the changes feel more like 2.2 to 2.4 than 1.2.13 to
2.0, but as long as you don't call it Windows I don't really care;-) 

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30  6:54                               ` Kai Henningsen
@ 2002-09-30 18:40                                 ` Bill Davidsen
  2002-10-01 12:38                                   ` Matthias Andree
  0 siblings, 1 reply; 206+ messages in thread
From: Bill Davidsen @ 2002-09-30 18:40 UTC (permalink / raw)
  To: Kai Henningsen; +Cc: linux-kernel

On 30 Sep 2002, Kai Henningsen wrote:

> One idea we've come up (and surely we're not the only ones) is to use  
> cheap IDE disks for backup, possibly in a cold-swappable insert. As long  
> as you can keep several backups per disk (say using some of those 100GB  
> disks), preferrably even on a different machine, that's fairly cheap.
> 
> If you want to keep daily backups for a week, weekly for a year, and all  
> on separate media, of course, that's *not* cheap with this method, and  
> even DLT or similar prices become acceptable in comparision. But it  
> certainly beats *no* backup!

I do that, but it doesn't make for a storage medium I can easily use on
another system. The cost of DVD writers is coming down, and non-magnetic
media may have some advantages as well. Still, thay're small compared to
disk sizes.

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29 16:24                     ` Alan Cox
  2002-09-29 22:00                       ` Matthias Andree
@ 2002-09-30 19:02                       ` Bill Davidsen
  1 sibling, 0 replies; 206+ messages in thread
From: Bill Davidsen @ 2002-09-30 19:02 UTC (permalink / raw)
  To: Alan Cox; +Cc: Matthias Andree, linux-kernel mailing list

On 29 Sep 2002, Alan Cox wrote:

> On Sun, 2002-09-29 at 16:26, Matthias Andree wrote:
> > I personally have the feeling that 2.2.x performed better than 2.4.x
> > does, but I cannot go figure because I'm using ReiserFS 3.6 file
> 
> On low end boxes the benchmarks I did show later 2.4-rmap beats 2.2. 2.0
> worked suprisingly well (better than pre-rmap 2.4) and as Stephen
> claimed the best code was about 2.1.100, 2.2 then dropped badly from
> that point.

I might have said 2.1.106 (I'm still running that on one box), but that's
the general sweet spot.
 
> Low memory is of course where rmap does best, so the 2.4-rmap v 2.4
> parts of such testing are not actually that useful

In the 2.4-ac vs. 2.4-aa tests I did in the spring, rmap was better on
small memory, -aa was better with large memory and heavy write load. I
expect ioscheduling to address this, and when I get a totally expendable
large machine I'll try 2.5 again.

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-29  9:12                     ` Jens Axboe
  2002-09-29 11:19                       ` Murray J. Root
  2002-09-29 14:56                       ` Alan Cox
@ 2002-09-30 19:32                       ` Bill Davidsen
  2002-10-01  6:26                         ` Jens Axboe
  2 siblings, 1 reply; 206+ messages in thread
From: Bill Davidsen @ 2002-09-30 19:32 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux-Kernel Mailing List

On Sun, 29 Sep 2002, Jens Axboe wrote:

> On Sun, Sep 29 2002, jbradford@dial.pipex.com wrote:
> > > Anyway, people who are having VM trouble with the current 2.5.x series, 
> > > please _complain_, and tell what your workload is. Don't sit silent and 
> > > make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> > > thing.
> > 
> > I think the broken IDE in 2.5.x has meant that it got seriously less
> > testing overall than previous development trees :-(.  Maybe after
> > halloween when it stabilises a bit more we'll get more reports in.
> 
> 2.5 is definitely desktop stable, so please test it if you can. Until
> recently there was a personal show stopper for me, the tasklist
> deadline. Now 2.5 is happily running on my desktop as well.

2.5.38-mm2 has been stable for me on uni, what is the status of SMP? I had
what looked like logical to physical mapping problems on a BP6 and Abit
dual P5C-166, resulting in syslog data on every drive including those with
no Linux partition. That was somewhere around 2.5.22 to 2.5.26.
 
> 2.5 IDE stability should be just as good as 2.4-ac.

A laudable goal.

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-30 10:22                     ` Tomas Szepe
  2002-09-30 11:10                       ` jw schultz
  2002-09-30 11:17                       ` Adrian Bunk
@ 2002-09-30 19:48                       ` Rik van Riel
  2002-09-30 20:30                         ` Christoph Hellwig
  2 siblings, 1 reply; 206+ messages in thread
From: Rik van Riel @ 2002-09-30 19:48 UTC (permalink / raw)
  To: Tomas Szepe; +Cc: Denis Vlasenko, jw schultz, linux-kernel mailing list

On Mon, 30 Sep 2002, Tomas Szepe wrote:

> > for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
> > 3.0
>
> ... and go Solaris, as in 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 7, 8, 9.  :D

I wonder what SunOS 6.0 is going to be called ;)

Rik
-- 
Bravely reimplemented by the knights who say "NIH".

http://www.surriel.com/		http://distro.conectiva.com/

Spamtraps of the month:  september@surriel.com trac@trac.org


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver
  2002-09-30 19:48                       ` Rik van Riel
@ 2002-09-30 20:30                         ` Christoph Hellwig
  0 siblings, 0 replies; 206+ messages in thread
From: Christoph Hellwig @ 2002-09-30 20:30 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Tomas Szepe, Denis Vlasenko, jw schultz, linux-kernel mailing list

On Mon, Sep 30, 2002 at 04:48:00PM -0300, Rik van Riel wrote:
> On Mon, 30 Sep 2002, Tomas Szepe wrote:
> 
> > > for lk. :-) We can be actually _proud_ to have 2.$BIGNUM instead of
> > > 3.0
> >
> > ... and go Solaris, as in 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 7, 8, 9.  :D
> 
> I wonder what SunOS 6.0 is going to be called ;)

Solaris .COM

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 13:05                             ` Jens Axboe
@ 2002-10-01  2:17                               ` Andre Hedrick
  0 siblings, 0 replies; 206+ messages in thread
From: Andre Hedrick @ 2002-10-01  2:17 UTC (permalink / raw)
  To: Jens Axboe, Russell King
  Cc: Alan Cox, Linus Torvalds, james, Ingo Molnar, Jeff Garzik,
	Larry Kessler, linux-kernel mailing list, Andrew V. Savochkin,
	Rusty Russell, Richard J Moore


First an apology to Russell for bring him into this thread.

On Mon, 30 Sep 2002, Jens Axboe wrote:

> On Mon, Sep 30 2002, Alan Cox wrote:
> > On Mon, 2002-09-30 at 08:56, Jens Axboe wrote:
> > > 2.5 at least does not have the taskfile hang, because I killed taskfile
> > > io.
> > 
> > Thats not exactly a fix 8). 2.5 certainly has the others. Taskfile I/O
> 
> I didn't claim it was, I just don't want a user setting taskfile io to
> 'y' because he thinks its cool when we know its broken.
> 
> > is pretty low on my fix list. The fix isnt trivial because we set the
> > IRQ handler late - so the IRQ can beat us setting the handler, but
> > equally if we set it early we get to worry about all the old races in
> > 2.3.x
> 
> Where exactly is the race?

As soon as you complete read or writing the final byte in a pio state
diagram, the device can interrupt instantly!  I do mean instantly.


ide_startstop_t task_out_intr (ide_drive_t *drive)
{
        ide_hwif_t *hwif        = HWIF(drive);
        struct request *rq      = HWGROUP(drive)->rq;
        char *pBuf              = NULL;
        unsigned long flags;
        u8 stat;

        if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),
                     DRIVE_READY, drive->bad_wstat)) {
                DTF("%s: WRITE attempting to recover last " \
                        "sector counter status=0x%02x\n",
                        drive->name, stat);
                rq->current_nr_sectors++;
                return DRIVER(drive)->error(drive, "task_out_intr", stat);
        }
        /*
         * Safe to update request for partial completions.
         * We have a good STATUS CHECK!!!
         */
        if (!rq->current_nr_sectors)
                if (!DRIVER(drive)->end_request(drive, 1))
                        return ide_stopped;
        if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) {
                 rq = HWGROUP(drive)->rq;
                pBuf = task_map_rq(rq, &flags);
                DTF("write: %p, rq->current_nr_sectors: %d\n",
                        pBuf, (int) rq->current_nr_sectors);
                taskfile_output_data(drive, pBuf, SECTOR_WORDS);
KABOOM! The RACE is on! (The handler start point)
                task_unmap_rq(rq, pBuf, &flags);
                rq->errors = 0;
                rq->current_nr_sectors--;
        }
        if (HWGROUP(drive)->handler == NULL)
                ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL);
Driver WINS!
        return ide_started;
}

If the device issues an interrupt to the host controller before we can arm
the handler we are dead.

void taskfile_output_data (ide_drive_t *drive, void *buffer, u32 wcount)
{
        if (drive->bswap) {
                ata_bswap_data(buffer, wcount);
                HWIF(drive)->ata_output_data(drive, buffer, wcount);
KABOOM! The RACE is on! (The Second fake start point)
                ata_bswap_data(buffer, wcount);
        } else {
                HWIF(drive)->ata_output_data(drive, buffer, wcount);
KABOOM! The RACE is on! (The Second fake start point)
        }
}

void ata_output_data (ide_drive_t *drive, void *buffer, u32 wcount)
{
        ide_hwif_t *hwif        = HWIF(drive);
        u8 io_32bit             = drive->io_32bit;

        if (io_32bit) {
                if (io_32bit & 2) {
                        unsigned long flags;
                        local_irq_save(flags);
                        ata_vlb_sync(drive, IDE_NSECTOR_REG);
                        hwif->OUTSL(IDE_DATA_REG, buffer, wcount);
                        local_irq_restore(flags);
                } else
                        hwif->OUTSL(IDE_DATA_REG, buffer, wcount);
        } else {
                hwif->OUTSW(IDE_DATA_REG, buffer, wcount<<1);
        }
KABOOM! The RACE is on! (The Real start point)
}


If we are having to lollygag in the kernel for a byteswap or a bounce
buffer (aka memcpy/free) we can/will loose the interrupt.  The old code
would push the handler early resulting in timeouts and double handlers
added.

Now the question is how to addresss the race.

At this point we have two paths each with bugs.
The old legacy path can allow for the wrong handler to be executed for a
given interrupt.  The old path can with the above bug can potentially crap
data.  Specifically wrong handle execution.

The new path can miss setting the handler in time.

It can be fixed and maybe the account process stuff is already present,
and we are at another communication delay but it shall be worked through
calmly, not like the past where nothing gets done and people just become
offended.

Cheers,


Andre Hedrick
LAD Storage Consulting Group




^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 19:32                       ` Bill Davidsen
@ 2002-10-01  6:26                         ` Jens Axboe
  2002-10-01  7:54                           ` Mikael Pettersson
  0 siblings, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-10-01  6:26 UTC (permalink / raw)
  To: Bill Davidsen; +Cc: Linux-Kernel Mailing List

On Mon, Sep 30 2002, Bill Davidsen wrote:
> On Sun, 29 Sep 2002, Jens Axboe wrote:
> 
> > On Sun, Sep 29 2002, jbradford@dial.pipex.com wrote:
> > > > Anyway, people who are having VM trouble with the current 2.5.x series, 
> > > > please _complain_, and tell what your workload is. Don't sit silent and 
> > > > make us think we're good to go.. And if Ingo is right, I'll do the 3.0.x 
> > > > thing.
> > > 
> > > I think the broken IDE in 2.5.x has meant that it got seriously less
> > > testing overall than previous development trees :-(.  Maybe after
> > > halloween when it stabilises a bit more we'll get more reports in.
> > 
> > 2.5 is definitely desktop stable, so please test it if you can. Until
> > recently there was a personal show stopper for me, the tasklist
> > deadline. Now 2.5 is happily running on my desktop as well.
> 
> 2.5.38-mm2 has been stable for me on uni, what is the status of SMP? I had
> what looked like logical to physical mapping problems on a BP6 and Abit
> dual P5C-166, resulting in syslog data on every drive including those with
> no Linux partition. That was somewhere around 2.5.22 to 2.5.26.

Well I do all my 2.5 testing on SMP, I don't even remember when I last
compiled a UP 2.5 kernel. Well works for me as I wrote earlier, I don't
keep the deskop up more than a few days at the time though. Then I boot
a newer 2.5 on it.

> > 2.5 IDE stability should be just as good as 2.4-ac.
> 
> A laudable goal.

If you know of any points where this is currently not true, I'd like to
hear about it. I'm considering this goal reached. Whether 2.4-ac is at
the level we want is a different story.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01  6:26                         ` Jens Axboe
@ 2002-10-01  7:54                           ` Mikael Pettersson
  2002-10-01  8:27                             ` Jens Axboe
  2002-10-01 11:31                             ` Alan Cox
  0 siblings, 2 replies; 206+ messages in thread
From: Mikael Pettersson @ 2002-10-01  7:54 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linux-Kernel Mailing List

Jens Axboe writes:
 > On Mon, Sep 30 2002, Bill Davidsen wrote:
 > > On Sun, 29 Sep 2002, Jens Axboe wrote:
 > > > 2.5 IDE stability should be just as good as 2.4-ac.
 > > 
 > > A laudable goal.
 > 
 > If you know of any points where this is currently not true, I'd like to
 > hear about it. I'm considering this goal reached. Whether 2.4-ac is at
 > the level we want is a different story.

2.5.39 IDE is nowhere near as stable as 2.4.20-pre8:

- I have several boxes with decent PCI chipsets (BX, HX) but old disks.
  With 2.5.39, they tend to spew a couple of ..._intr errors on boot.
  (Sorry, can't be more specific right now. I won't be near those
  boxes until Saturday.)

- Same ..._intr errors on my 486 with a qd6580 VLB controller.
  It also has, in post-2.5.36 kernels, an instant-reboot problem which
  occurs whenever I pass the ide0=qd65xx kernel option required to
  activate its chipset support. (I _believe_ this is because the code
  does something, like a kmalloc, which is illegal at the early
  point IDE's __setup runs.) With 2.5.3x kernels, this box also sees
  a steady stream of spurious interrupts while doing a kernel recompile,
  something it doesn't see in older kernels.

- My Intel AL440LX box (440LX chipset, 20G Quantum Fireball) worked
  brilliantly up to 2.5.36, but hangs *hard* with 2.5.39 as soon
  as I tar zxf the kernel source tarball.
  (May or may not be IDE. I'll try a minimal 2.5.39 tonight.)

All of these work perfectly with 2.4.20-pre8, indeed all previous 2.4
standard kernels, 2.2 + Andre's ide-patch, and with the exception of
the ..._intr errors, 2.5.36.

OTOH, I have three boxes which do appear to work fine with 2.5.39.

/Mikael

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01  7:54                           ` Mikael Pettersson
@ 2002-10-01  8:27                             ` Jens Axboe
  2002-10-01  8:44                               ` jbradford
  2002-10-01 11:31                             ` Alan Cox
  1 sibling, 1 reply; 206+ messages in thread
From: Jens Axboe @ 2002-10-01  8:27 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: Linux-Kernel Mailing List

On Tue, Oct 01 2002, Mikael Pettersson wrote:
> Jens Axboe writes:
>  > On Mon, Sep 30 2002, Bill Davidsen wrote:
>  > > On Sun, 29 Sep 2002, Jens Axboe wrote:
>  > > > 2.5 IDE stability should be just as good as 2.4-ac.
>  > > 
>  > > A laudable goal.
>  > 
>  > If you know of any points where this is currently not true, I'd like to
>  > hear about it. I'm considering this goal reached. Whether 2.4-ac is at
>  > the level we want is a different story.
> 
> 2.5.39 IDE is nowhere near as stable as 2.4.20-pre8:

Common misconception. I wrote 2.4-ac, not 2.4 vanilla tre. 2.4-ac is in
flux, 2.5 is too. There are some quirks, most of the 'doesnt work'
nature and not the 'corrupting data' kind.

> - I have several boxes with decent PCI chipsets (BX, HX) but old disks.
>   With 2.5.39, they tend to spew a couple of ..._intr errors on boot.
>   (Sorry, can't be more specific right now. I won't be near those
>   boxes until Saturday.)

But they come up?

> - Same ..._intr errors on my 486 with a qd6580 VLB controller.
>   It also has, in post-2.5.36 kernels, an instant-reboot problem which
>   occurs whenever I pass the ide0=qd65xx kernel option required to
>   activate its chipset support. (I _believe_ this is because the code
>   does something, like a kmalloc, which is illegal at the early
>   point IDE's __setup runs.) With 2.5.3x kernels, this box also sees
>   a steady stream of spurious interrupts while doing a kernel recompile,
>   something it doesn't see in older kernels.

Ok this is a new one, at least to me

> - My Intel AL440LX box (440LX chipset, 20G Quantum Fireball) worked
>   brilliantly up to 2.5.36, but hangs *hard* with 2.5.39 as soon
>   as I tar zxf the kernel source tarball.
>   (May or may not be IDE. I'll try a minimal 2.5.39 tonight.)

Probably not ide, no important changes in there in between 2.6.36 and
present.

> All of these work perfectly with 2.4.20-pre8, indeed all previous 2.4
> standard kernels, 2.2 + Andre's ide-patch, and with the exception of
> the ..._intr errors, 2.5.36.

If you (or anyone else for that matter) come across ide oddities in 2.5,
please try 2.4.20-pre-ac kernels and see if you can reproduce.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01  8:27                             ` Jens Axboe
@ 2002-10-01  8:44                               ` jbradford
  0 siblings, 0 replies; 206+ messages in thread
From: jbradford @ 2002-10-01  8:44 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

> > - My Intel AL440LX box (440LX chipset, 20G Quantum Fireball) worked
> >   brilliantly up to 2.5.36, but hangs *hard* with 2.5.39 as soon
> >   as I tar zxf the kernel source tarball.
> >   (May or may not be IDE. I'll try a minimal 2.5.39 tonight.)
> 
> Probably not ide, no important changes in there in between 2.6.36 and
> present.

Where can I get the 2.6.x tree, then?  :-)

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01 11:31                             ` Alan Cox
@ 2002-10-01 11:25                               ` Jens Axboe
  0 siblings, 0 replies; 206+ messages in thread
From: Jens Axboe @ 2002-10-01 11:25 UTC (permalink / raw)
  To: Alan Cox; +Cc: Mikael Pettersson, Linux-Kernel Mailing List

On Tue, Oct 01 2002, Alan Cox wrote:
> > - Same ..._intr errors on my 486 with a qd6580 VLB controller.
> >   It also has, in post-2.5.36 kernels, an instant-reboot problem which
> >   occurs whenever I pass the ide0=qd65xx kernel option required to
> 
> Seems to be specific to the 2.5.x version of the new ide so I guess its
> a port error (or just bad luck it now breaks and was iffy before)

ok, I'll try it in 2.5 then

> > - My Intel AL440LX box (440LX chipset, 20G Quantum Fireball) worked
> >   brilliantly up to 2.5.36, but hangs *hard* with 2.5.39 as soon
> >   as I tar zxf the kernel source tarball.
> >   (May or may not be IDE. I'll try a minimal 2.5.39 tonight.)
> 
> Thats PIIX, which should be the most boringly stable configuration of
> the lot 8(

There's no evidence that this is an ide error yet. I'd like to see some
serial console or similar on that beast. I have no LX board here, but
piix is rock solid.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01  7:54                           ` Mikael Pettersson
  2002-10-01  8:27                             ` Jens Axboe
@ 2002-10-01 11:31                             ` Alan Cox
  2002-10-01 11:25                               ` Jens Axboe
  1 sibling, 1 reply; 206+ messages in thread
From: Alan Cox @ 2002-10-01 11:31 UTC (permalink / raw)
  To: Mikael Pettersson; +Cc: Jens Axboe, Linux-Kernel Mailing List

On Tue, 2002-10-01 at 08:54, Mikael Pettersson wrote:
> - I have several boxes with decent PCI chipsets (BX, HX) but old disks.
>   With 2.5.39, they tend to spew a couple of ..._intr errors on boot.
>   (Sorry, can't be more specific right now. I won't be near those
>   boxes until Saturday.)

Thats fine. Its issuing commands the drives reject. Right now we dont do
it quietly that is all.

> - Same ..._intr errors on my 486 with a qd6580 VLB controller.
>   It also has, in post-2.5.36 kernels, an instant-reboot problem which
>   occurs whenever I pass the ide0=qd65xx kernel option required to

Seems to be specific to the 2.5.x version of the new ide so I guess its
a port error (or just bad luck it now breaks and was iffy before)

> - My Intel AL440LX box (440LX chipset, 20G Quantum Fireball) worked
>   brilliantly up to 2.5.36, but hangs *hard* with 2.5.39 as soon
>   as I tar zxf the kernel source tarball.
>   (May or may not be IDE. I'll try a minimal 2.5.39 tonight.)

Thats PIIX, which should be the most boringly stable configuration of
the lot 8(


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-09-30 18:40                                 ` Bill Davidsen
@ 2002-10-01 12:38                                   ` Matthias Andree
  2002-10-04 19:58                                     ` Bill Davidsen
  0 siblings, 1 reply; 206+ messages in thread
From: Matthias Andree @ 2002-10-01 12:38 UTC (permalink / raw)
  To: linux-kernel

On Mon, 30 Sep 2002, Bill Davidsen wrote:

> I do that, but it doesn't make for a storage medium I can easily use on
> another system. The cost of DVD writers is coming down, and non-magnetic
> media may have some advantages as well. Still, thay're small compared to
> disk sizes.

There are big drives available if you really want one (and can afford
one, which is the bigger problem usually).

Tandberg has some big SLR drives (50 GB native data, maybe even more,
didn't check for some months), many companies have DLT and SuperDLT that
store several dozen GB each, then there's Ultrium, and if you're after
cheap stuff, there's also ADR (but there are some that require the osst
driver, which is not helpful if you need to support other OSs beyond
Windows and Linux). This list is not complete, and it deliberately omits
helical scan technologies such as DDS.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* IPv6 stability (success story ;)
  2002-09-29 17:26                       ` Jochen Friedrich
  2002-09-29 17:35                         ` Jeff Garzik
  2002-09-30  0:00                         ` Andi Kleen
@ 2002-10-01 19:28                         ` Petr Baudis
  2 siblings, 0 replies; 206+ messages in thread
From: Petr Baudis @ 2002-10-01 19:28 UTC (permalink / raw)
  To: Jochen Friedrich; +Cc: Andi Kleen, jbradford, linux-kernel, debian-ipv6

Dear diary, on Sun, Sep 29, 2002 at 07:26:37PM CEST, I got a letter,
where Jochen Friedrich <jochen@scram.de> told me, that...
> Hi Andi,
> 
> > Actually current IPv6 is stable and has been for a long time, it's just not
> > completely standards compliant (but still quite usable for a lot of people)
> 
> For end systems (no router) with static IPv6 definitions this seems to be
> true. However, for machines which use autoconfiguration (stateless as
> there isn't a usable IPv6 capable DHCP server AFAIK) or act as routers,
> the current state of the implementation of the default route can best be
> described as buggy. (Autoconfigured machines seem to loose their default
> route after some time, e.g.).

Well, I maintain Point of Presence for XS26 at Prague running on linux
(2.4.19), and it works with almost no problems routing about 20 kilobytes per
second through about 520 interfaces (tunnels) and with routing table consisting
of cca 2100 entries (there's zebra, ospf6d and bgpd running there ;). The only
one real problem we had was neighbour discovery bug up to 2.4.18 which was
fixed along the way to 2.4.19. There are no crashes, no routing instabilities,
we are absolutely happy with linux there ;-) (in fact, we have frequently much
more problems with the *BSDs running at some other PoPs).

Oh, of course, I must thank Alexey a lot for providing excellent support for us
:).

-- 
 
				Petr "Pasky" Baudis
 
* ELinks maintainer                * IPv6 guy (XS26 co-coordinator)
* IRCnet operator                  * FreeCiv AI occassional hacker
.
<Beeth> Girls are like internet domain names, the ones I like are already taken.
<honx> Well, you can still get one from a strange country :-P
.
Public PGP key && geekcode && homepage: http://pasky.ji.cz/~pasky/

^ permalink raw reply	[flat|nested] 206+ messages in thread

* [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-09-27  4:45             ` Linus Torvalds
  2002-09-28  7:46               ` Ingo Molnar
@ 2002-10-03 15:51               ` jbradford
  2002-10-03 15:57                 ` Linus Torvalds
  1 sibling, 1 reply; 206+ messages in thread
From: jbradford @ 2002-10-03 15:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jgarzik, kessler, alan, linux-kernel, saw, rusty, richardj_moore

> > Tangent question, is it definitely to be named 2.6?
> 
> I see no real reason to call it 3.0.
> 
> The order-of-magnitude threading improvements might just come closest to
> being a "new thing", but yeah, I still consider it 2.6.x. We don't have 
> new architectures or other really fundamental stuff. In many ways the jump 
> from 2.2 -> 2.4 was bigger than the 2.4 -> 2.6 thing will be, I suspect.

I think we should stick to incrementing the major number when binary compatibility is broken.

> But hey, it's just a number.  I don't feel that strongly either way. I 
> think version number inflation (can anybody say "distribution makers"?) is 
> a bit silly, and the way the kernel numbering works there is no reason to 
> bump the major number for regular releases.

Psycologically and sub-conciously, this kind of thing _does_ make people stand up and take notice.

For example, SNK made the NeoGeo arcade games print things like:

NEO GEO
MAX 330 MEGA
PRO GEAR SPEC

on start up and in attract mode.

As far as I know, the 330 MEGA means absolutely nothing, and pro gear spec is just an arbitrary name for the addressing system used.

John.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 15:51               ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) jbradford
@ 2002-10-03 15:57                 ` Linus Torvalds
  2002-10-03 16:16                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem jbradford
                                     ` (4 more replies)
  0 siblings, 5 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-10-03 15:57 UTC (permalink / raw)
  To: jbradford
  Cc: jgarzik, kessler, alan, linux-kernel, saw, rusty, richardj_moore


On Thu, 3 Oct 2002 jbradford@dial.pipex.com wrote:
> 
> I think we should stick to incrementing the major number when binary
> compatibility is broken.

"Stick to"? We've never had that as any criteria for major numbers in the
kernel. Binary compatibility has _never_ been broken as a release policy,
only as a "that code is old, and we've given people 5 years to migrate to
the new system calls, the old ones are TOAST".

The only policy for major numbers has always been "major capability
changes". 1.0 was "networking is stable and generally usable" (by the
standards of that time), while 2.0 was "SMP and true multi-architecture
support". My planned point for 3.0 was NuMA support, but while we actually
have some of that, the hardware just isn't relevant enough to matter.

The memory management issues would qualify for 3.0, but my argument there 
is really that I doubt everybody really is happy yet. Which was why I 
asked for people to test it and complain about VM behaviour - and we've 
had some ccomplaints ("too swap-happy") although they haven't sounded like 
really horrible problems.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-03 15:57                 ` Linus Torvalds
@ 2002-10-03 16:16                   ` jbradford
  2002-10-03 22:30                     ` Greg KH
  2002-10-03 16:37                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) Alan Cox
                                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 206+ messages in thread
From: jbradford @ 2002-10-03 16:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: kessler, alan, linux-kernel, saw, rusty, richardj_moore

> On Thu, 3 Oct 2002 jbradford@dial.pipex.com wrote:
> > 
> > I think we should stick to incrementing the major number when binary
> > compatibility is broken.
> 
> "Stick to"? We've never had that as any criteria for major numbers in the
> kernel. Binary compatibility has _never_ been broken as a release policy,
> only as a "that code is old, and we've given people 5 years to migrate to
> the new system calls, the old ones are TOAST".

Ah, I was getting confused, I thought that the move to 2.0 was when we moved from a.out to elf.  I didn't really follow kernel development very closely at all back then, to be truthful.

> The only policy for major numbers has always been "major capability
> changes".

Then it definitely shouldn't be 3.0 yet then.

> 1.0 was "networking is stable and generally usable" (by the
> standards of that time), while 2.0 was "SMP and true multi-architecture
> support". My planned point for 3.0 was NuMA support, but while we actually
> have some of that, the hardware just isn't relevant enough to matter.

Hmmm, then for 3.0 I'd vote for fully working and proven stable:

* High memory support, 
* IPV6
* IDE-SCSI
* Bluetooth
* USB (2)
* IEEE 1394

> The memory management issues would qualify for 3.0, but my argument there 
> is really that I doubt everybody really is happy yet. Which was why I 
> asked for people to test it and complain about VM behaviour - and we've 
> had some ccomplaints ("too swap-happy") although they haven't sounded like 
> really horrible problems.

To be completely honest, I dont't see any improvement in 2.5.x over 2.4.x on my boxes that are running both :-(.

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 15:57                 ` Linus Torvalds
  2002-10-03 16:16                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem jbradford
@ 2002-10-03 16:37                   ` Alan Cox
  2002-10-03 16:56                     ` Linus Torvalds
  2002-10-03 16:51                   ` Dave Jones
                                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 206+ messages in thread
From: Alan Cox @ 2002-10-03 16:37 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jbradford, jgarzik, kessler, Linux Kernel Mailing List, saw,
	rusty, richardj_moore

On Thu, 2002-10-03 at 16:57, Linus Torvalds wrote:
> 
> On Thu, 3 Oct 2002 jbradford@dial.pipex.com wrote:
> > 
> > I think we should stick to incrementing the major number when binary
> > compatibility is broken.
> 
> "Stick to"? We've never had that as any criteria for major numbers in the
> kernel. Binary compatibility has _never_ been broken as a release policy,
> only as a "that code is old, and we've given people 5 years to migrate to
> the new system calls, the old ones are TOAST".

We've generally done better than that. Libc 2.2.2 stil works


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 15:57                 ` Linus Torvalds
  2002-10-03 16:16                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem jbradford
  2002-10-03 16:37                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) Alan Cox
@ 2002-10-03 16:51                   ` Dave Jones
  2002-10-03 17:04                     ` Alan Cox
  2002-10-03 20:43                     ` Andrew Morton
  2002-10-03 19:51                   ` Rik van Riel
  2002-10-04 22:26                   ` [OT] 2.6 not 3.0 - (NUMA) Martin J. Bligh
  4 siblings, 2 replies; 206+ messages in thread
From: Dave Jones @ 2002-10-03 16:51 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: alan, linux-kernel, akpm

On Thu, Oct 03, 2002 at 08:57:13AM -0700, Linus Torvalds wrote:

 > The memory management issues would qualify for 3.0, but my argument there 
 > is really that I doubt everybody really is happy yet. Which was why I 
 > asked for people to test it and complain about VM behaviour - and we've 
 > had some ccomplaints ("too swap-happy") although they haven't sounded like 
 > really horrible problems.

We still need some work for low memory boxes (where low isn't
necessarily all that low). On my 128MB laptop I can lock up the box
for a minute or two at a time by doing two things at the same time,
like a bk pull, and switching desktops.

I dread to think how a 16 or 32MB box performs these days..

		Dave				

-- 
| Dave Jones.        http://www.codemonkey.org.uk

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 16:37                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) Alan Cox
@ 2002-10-03 16:56                     ` Linus Torvalds
  2002-10-03 17:40                       ` Alan Cox
  2002-10-03 19:55                       ` jlnance
  0 siblings, 2 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-10-03 16:56 UTC (permalink / raw)
  To: Alan Cox
  Cc: jbradford, jgarzik, kessler, Linux Kernel Mailing List, saw,
	rusty, richardj_moore


On 3 Oct 2002, Alan Cox wrote:
> > 
> > "Stick to"? We've never had that as any criteria for major numbers in the
> > kernel. Binary compatibility has _never_ been broken as a release policy,
> > only as a "that code is old, and we've given people 5 years to migrate to
> > the new system calls, the old ones are TOAST".
> 
> We've generally done better than that. Libc 2.2.2 stil works

We have removed _some_ stuff, and we've definitely broken some of the more
esoteric configuration stuff (ie things like "top" and "ps" and "ifconfig"
have broken multiple times over the last 11 years).

And that "old_stat()" thing really ought to go some day.. It's not much of
a support burden, and yeah, we can point people to "that old a.out binary
from 1993 still works fine", so I guess we'll keep it another ten years,
but at this point that has less to do with technical judgement than with
sentimentality, I think ;^)

But yeah, I think on the whole we've done pretty well on being binary 
compatible.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 16:51                   ` Dave Jones
@ 2002-10-03 17:04                     ` Alan Cox
  2002-10-03 20:43                     ` Andrew Morton
  1 sibling, 0 replies; 206+ messages in thread
From: Alan Cox @ 2002-10-03 17:04 UTC (permalink / raw)
  To: Dave Jones; +Cc: Linus Torvalds, Linux Kernel Mailing List, akpm

> We still need some work for low memory boxes (where low isn't
> necessarily all that low). On my 128MB laptop I can lock up the box
> for a minute or two at a time by doing two things at the same time,
> like a bk pull, and switching desktops.
> 
> I dread to think how a 16 or 32MB box performs these days..

On 2.4.1x with rmap, better than 2.2. A 32Mb box with rmap vm on 2.4,
running the xfce/rox desktop and sylpheed is very snappy indeed


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 16:56                     ` Linus Torvalds
@ 2002-10-03 17:40                       ` Alan Cox
  2002-10-03 19:55                       ` jlnance
  1 sibling, 0 replies; 206+ messages in thread
From: Alan Cox @ 2002-10-03 17:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jbradford, jgarzik, kessler, Linux Kernel Mailing List, saw,
	rusty, richardj_moore

On Thu, 2002-10-03 at 17:56, Linus Torvalds wrote:
> And that "old_stat()" thing really ought to go some day.. It's not much of
> a support burden, and yeah, we can point people to "that old a.out binary
> from 1993 still works fine", so I guess we'll keep it another ten years,
> but at this point that has less to do with technical judgement than with
> sentimentality, I think ;^)
> 
> But yeah, I think on the whole we've done pretty well on being binary 
> compatible.

Im not sure we want to throw those things out. However all the stuff
that went out before libc5 could go into a legacy.c file that is only
liked if a.out loaders are present


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 15:57                 ` Linus Torvalds
                                     ` (2 preceding siblings ...)
  2002-10-03 16:51                   ` Dave Jones
@ 2002-10-03 19:51                   ` Rik van Riel
  2002-10-04 22:26                   ` [OT] 2.6 not 3.0 - (NUMA) Martin J. Bligh
  4 siblings, 0 replies; 206+ messages in thread
From: Rik van Riel @ 2002-10-03 19:51 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: jbradford, jgarzik, kessler, alan, linux-kernel, saw, rusty,
	richardj_moore

On Thu, 3 Oct 2002, Linus Torvalds wrote:

> The memory management issues would qualify for 3.0, but my argument
> there is really that I doubt everybody really is happy yet.

I'm absolutely convinced some people won't be happy, simply
because of the fundamental limitations of global page replacement.
However, Andrew Morton has done a great job and the 2.5 VM seems
to be looking as good as anything we've had before.

For me 3.0 arguments would be Ingo's threading stuff, not anything
else.

regards,

Rik
-- 
A: No.
Q: Should I include quotations after my reply?

http://www.surriel.com/		http://distro.conectiva.com/


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 16:56                     ` Linus Torvalds
  2002-10-03 17:40                       ` Alan Cox
@ 2002-10-03 19:55                       ` jlnance
  1 sibling, 0 replies; 206+ messages in thread
From: jlnance @ 2002-10-03 19:55 UTC (permalink / raw)
  To: linux-kernel

On Thu, Oct 03, 2002 at 09:56:42AM -0700, Linus Torvalds wrote:

> And that "old_stat()" thing really ought to go some day.. It's not much of
> a support burden, and yeah, we can point people to "that old a.out binary
> from 1993 still works fine", so I guess we'll keep it another ten years,
> but at this point that has less to do with technical judgement than with
> sentimentality, I think ;^)
> 
> But yeah, I think on the whole we've done pretty well on being binary 
> compatible.

My wife still uses Applix, which I purchased when Red Hat first
started selling it.  The kernel runs it just fine.  Interestingly
enough, Red Hat no longer ships the shared libs that it uses,
but installing the necessary rpms from Red Hat 6.0 makes it work.
I looked at the dates on the binaries and they are from 1996,
but I am pretty sure they are substantially older than that.
I do appreciate you putting effort into binary compatability.
My wife would be quite upset with me if Applix quit working :-)

Jim

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging  macros, SCSI RAIDdevice)
  2002-10-03 16:51                   ` Dave Jones
  2002-10-03 17:04                     ` Alan Cox
@ 2002-10-03 20:43                     ` Andrew Morton
  2002-10-03 22:05                       ` Dave Jones
  1 sibling, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-03 20:43 UTC (permalink / raw)
  To: Dave Jones; +Cc: Linus Torvalds, alan, linux-kernel

Dave Jones wrote:
> 
> On Thu, Oct 03, 2002 at 08:57:13AM -0700, Linus Torvalds wrote:
> 
>  > The memory management issues would qualify for 3.0, but my argument there
>  > is really that I doubt everybody really is happy yet. Which was why I
>  > asked for people to test it and complain about VM behaviour - and we've
>  > had some ccomplaints ("too swap-happy") although they haven't sounded like
>  > really horrible problems.
> 
> We still need some work for low memory boxes (where low isn't
> necessarily all that low). On my 128MB laptop I can lock up the box
> for a minute or two at a time by doing two things at the same time,
> like a bk pull, and switching desktops.

Specific version info and all the usual how-to-reproduce info
would help here.  Things have changed a _lot_ in the past
week or two.

Comparisons with 2.4 are useful.  Simple "here's how to
reproduce" instructions are 100% golden ;)

> I dread to think how a 16 or 32MB box performs these days..

Well last I looked, a 2.5 kernel with NR_CPUS=8 had 22MB
of unreclaimable memory by the time it reached the console
login prompt.

Yet John Bradford says that in swapless 8MB, 2.5.40 is "springier"
than 2.4.x, so weird.

Jens did some aggressive scaling work against the BIO pools
recently which saved a ton of memory, and 2.5.40 now consumes
slightly less than 2.4.x to get started.

But the major thing we can do for the tiny boxes is to scale back
much harder on the big caches, the mempools, etc.  I hope to
be able to remove the radix-tree and pte_chain mempools altogether,
which will free up a quarter meg or so.


Apart from that, I'm reasonably happy with where the VM stands at
present.  It's very simple, very fast to identify which pages to
replace, and pretty accurate and efficient at doing that.

It should be immune to our traditional catastrophic failure
scenarios, and that's something which we want to keep.  There are
some ten- or twenty-percent regressions in some areas, but at this
time that's a reasonable price to pay for not locking up, not having
five-minute comas, not exhibiting massive stalls when there's a
lot of disk writeout, etc.  I think history teaches us to value
simplicity, predictability and robustness over performance-in-corner-cases.

There are some OOM problems on really big highmem machines which
still need investigation.  I expect they can be largely cleaned
up by making the throttling be per-zone rather than global.  Which
would complete the migration of the VM to being a per-zone thing.
Zone fallbacks then become known only to the page allocator and
the VM proper only cares for individual zones.

The reverse map was a huge conceptual cleanup.  It trumped a
whole class of nasty, fallible when-to-unmap decision making
logic.

Yeah, it swaps a lot.  It's the use-it-or-lose-it VM, and it's
mean.  People (damn them) don't like that.

Right now, I am rather disinclined to fix this via algorithmic changes,
by twiddling with aging-of-mapped-memory versus aging-of-pagecache,
or anything like that.  Because any such algorithmic change tends to
unbalance things, and to cause incorrect latency under sudden load
changes which could cause false OOM failures, or excessive CPU burn.  

What I'm more inclined to do is to leave things conceptually unchanged,
and to bolt a really obvious, bloody great ugly knob on the side;
maybe something as simple as:

	if (mapped_memory / total_memory < sysctl_the_user_is_a_wimp)
		only_reclaim_pagecache()

We shall see...

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 20:43                     ` Andrew Morton
@ 2002-10-03 22:05                       ` Dave Jones
  2002-10-04  3:46                         ` Andreas Boman
  2002-10-04  7:44                         ` jbradford
  0 siblings, 2 replies; 206+ messages in thread
From: Dave Jones @ 2002-10-03 22:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, alan, linux-kernel

On Thu, Oct 03, 2002 at 01:43:33PM -0700, Andrew Morton wrote:

 > >  > The memory management issues would qualify for 3.0, but my argument there
 > >  > is really that I doubt everybody really is happy yet. Which was why I
 > >  > asked for people to test it and complain about VM behaviour - and we've
 > >  > had some ccomplaints ("too swap-happy") although they haven't sounded like
 > >  > really horrible problems.
 > > 
 > > We still need some work for low memory boxes (where low isn't
 > > necessarily all that low). On my 128MB laptop I can lock up the box
 > > for a minute or two at a time by doing two things at the same time,
 > > like a bk pull, and switching desktops.
 > 
 > Specific version info and all the usual how-to-reproduce info
 > would help here.  Things have changed a _lot_ in the past
 > week or two.

That was 2.5.39 + bk from just before .40 hit the streets.
I'll pull something current in a tick, and give that a shot.

 > Comparisons with 2.4 are useful.  Simple "here's how to
 > reproduce" instructions are 100% golden ;)

theres usually not too much going on on the laptop.
It runs enlightenment + gnome 1.4. A few gnome-terminals,
and thats about it. After bitkeeper had sucked down a few
changesets and started its "lets grind the disk for a while"
consistency thing, interactive feel is approaching nil.
Trying to focus a different window takes about 5 seconds minimum.
Switching desktops takes 30 seconds minimum.

My completely unscientific guess here is that bitkeeper is
whoring all the I/O bandwidth, and we're trying to swap at
the same time, which is getting starved.
I'll try and reproduce after some sleep with vmstat running
if this will be of use.

 > > I dread to think how a 16 or 32MB box performs these days..
 > Well last I looked, a 2.5 kernel with NR_CPUS=8 had 22MB
 > of unreclaimable memory by the time it reached the console
 > login prompt.

Ouch.

 > Yet John Bradford says that in swapless 8MB, 2.5.40 is "springier"
 > than 2.4.x, so weird.

Depends on what tests are I suppose. "springier" doesn't
really say too much. We do minimise memory usage in a few
places if mem<16M though iirc which could be helping this case.
 
 > It should be immune to our traditional catastrophic failure
 > scenarios, and that's something which we want to keep.  There are
 > some ten- or twenty-percent regressions in some areas, but at this
 > time that's a reasonable price to pay for not locking up, not having
 > five-minute comas, not exhibiting massive stalls when there's a
 > lot of disk writeout, etc.  I think history teaches us to value
 > simplicity, predictability and robustness over performance-in-corner-cases.

Hmm, my case seems to be everything you say should not be happening
any more. Sorry 8-)
I *can't* be the only one seeing this though.
The laptop disk is no speed demon, but its quite nippy at ~12MB/s
For obvious reasons, having swap and / on the same disk is making
a considerable impact here.

		Dave

-- 
| Dave Jones.        http://www.codemonkey.org.uk

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-03 16:16                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem jbradford
@ 2002-10-03 22:30                     ` Greg KH
  2002-10-04  6:33                       ` jbradford
  0 siblings, 1 reply; 206+ messages in thread
From: Greg KH @ 2002-10-03 22:30 UTC (permalink / raw)
  To: jbradford; +Cc: linux-kernel

On Thu, Oct 03, 2002 at 05:16:10PM +0100, jbradford@dial.pipex.com wrote:
> 
> Hmmm, then for 3.0 I'd vote for fully working and proven stable:

Hm, how do you "prove" any of these are stable :)

> * Bluetooth

Been there since 2.4

> * USB (2)

Present in 2.5 (and 2.4 now too)

> * IEEE 1394

Been there since 2.4.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 22:05                       ` Dave Jones
@ 2002-10-04  3:46                         ` Andreas Boman
  2002-10-04  7:44                         ` jbradford
  1 sibling, 0 replies; 206+ messages in thread
From: Andreas Boman @ 2002-10-04  3:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, torvalds, alan


[-- Attachment #1.1: Type: text/plain, Size: 3952 bytes --]

On Thu, 2002-10-03 at 17:05, Dave Jones wrote:
<SNIP>
>  > > We still need some work for low memory boxes (where low isn't
>  > > necessarily all that low). On my 128MB laptop I can lock up the box
>  > > for a minute or two at a time by doing two things at the same time,
>  > > like a bk pull, and switching desktops.
>  > 
>  > Specific version info and all the usual how-to-reproduce info
>  > would help here.  Things have changed a _lot_ in the past
>  > week or two.
> That was 2.5.39 + bk from just before .40 hit the streets.
> I'll pull something current in a tick, and give that a shot.
> 
>  > Comparisons with 2.4 are useful.  Simple "here's how to
>  > reproduce" instructions are 100% golden ;)
Usually its difficult with theese 'feeling' issues though...

> theres usually not too much going on on the laptop.
> It runs enlightenment + gnome 1.4. A few gnome-terminals,
> and thats about it. After bitkeeper had sucked down a few
> changesets and started its "lets grind the disk for a while"
> consistency thing, interactive feel is approaching nil.
> Trying to focus a different window takes about 5 seconds minimum.
> Switching desktops takes 30 seconds minimum.
> 
> My completely unscientific guess here is that bitkeeper is
> whoring all the I/O bandwidth, and we're trying to swap at
> the same time, which is getting starved.
> I'll try and reproduce after some sleep with vmstat running
> if this will be of use.
> 
<SNIP>
>  
>  > It should be immune to our traditional catastrophic failure
>  > scenarios, and that's something which we want to keep.  There are
>  > some ten- or twenty-percent regressions in some areas, but at this
>  > time that's a reasonable price to pay for not locking up, not having
>  > five-minute comas, not exhibiting massive stalls when there's a
>  > lot of disk writeout, etc.  I think history teaches us to value
>  > simplicity, predictability and robustness over performance-in-corner-cases.
> 
> Hmm, my case seems to be everything you say should not be happening
> any more. Sorry 8-)
> I *can't* be the only one seeing this though.
You arent ;)

> The laptop disk is no speed demon, but its quite nippy at ~12MB/s
> For obvious reasons, having swap and / on the same disk is making
> a considerable impact here.
> 		Dave

I'm seeing similar behavior, though not to the extent you describe. 512M
ram, ~600 or so swap on a U160 scsi disk (only one disk in the box
-definitely need one more). 

rpm -ba mozilla.spec and while its untar/gunziping i keep switching
desktops ("edge flip" in E) between one with a few Eterms and misc
stuff, and one with mozilla. At first it behaves fine, but eventually
the mouse pointer will start jerking around and itll be slightly slower
to switch, a little later the swapping starts and xmms will skip
(sometimes just once, othertimes repetedly). Once the untaring is done
and the build starts the box becomes responsive again. 

Doing the same thing on 2.4.20-pre5aa2 xmms never skipped, starting a
build of mozilla and evolution at the same time, still no skipping. Drop
xmms and play a music video in MPLayer -still no skipping. I could even
move the MPlayer output window back and forth between the desktops
repetedly although i didnt move it around as fast as when i was just
switching desktops, without sound skips video playback did freeze up a
bit and left funky trails across the mozilla page at times), but sound
didnt skip. Sound started skipping when i had mozilla and evolution both
untar/ungzipping and I moved the window around madly between heads and
desktops.

the attached vmstat 1 from 2.5.40 is taken from when the build has just
started until a little after I killed it (when it had untared and
started ./configure). A little time goes by after i kill it I see a
little more IO and then the box just idles again.

-- 
Andreas Boman <aboman@nerdfest.org>

[-- Attachment #1.2: vmstat-2.5 --]
[-- Type: text/x-troff-man, Size: 13760 bytes --]

   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 0  1  0   3396  14400  20252 281056    0    0     7    31 1049   966  2  1 97
 2  1  0   3396  15276  20252 281032    0    0     0  9070 1435  2781  4 17 79
 2  1  1   3396  24692  20252 272660    0    0     0  9356 1391  2359  3 20 77
 1  1  0   3396  34404  20252 263960    0    0   108  8720 1361  2307  3 19 78
 2  1  1   3396  53132  20284 251936    0    0    36  9439 1459  2731  4 24 72
 1  1  0   3396  63500  20296 242480    0    0    12  7476 1378  3008 33 19 49
 4  0  0   3396  79712  20316 227232    0    0    20  9491 1586  3942 17 17 66
 2  1  0   3396  90452  20316 217404    0    0     0  6977 1338  4500 40 19 41
 3  0  0   3396 100776  20348 208072    0    0    32  8500 1462  3455 37 20 43
 2  1  0   3396 104468  20376 199008    0    0    28  7257 1371  4254 42 20 38
 1  1  0   3396 131784  20376 183716    0    0     0  9865 1431  4052 15 21 64
 3  1  0   3396 143788  20376 172712    0    0     0  8677 1391  4010 39 20 42
 1  1  1   3396 153436  20376 163860    0    0     0  7420 1331  2924 36 17 47
 9  0  0   3396 165512  20380 152860    0    0     4 10658 1481  4092 15 19 65
 1  1  0   3396 174536  20380 148644    0    0     0  8932 1514  4917 38 22 40
 1  1  0   3396 189456  20384 134752    0    0     4  8789 1418  4624 40 26 34
 2  1  0   3396 197220  20384 127816    0    0     0  8775 1448  3069 34 26 40
 1  1  0   3396 210400  20384 115596    0    0     0  7912 1406  4613 42 23 35
 4  0  0   3396 208716  20388 107332    0    0     4  7208 1383  4494 38 21 41
 3  0  0   3396 228860  20388  93812    0    0     0  8451 1447  4674 34 23 44
 2  1  0   3396 247848  20388  85264    0    0     0  8784 1444  4362 31 22 47
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 1  1  0   3396 236480  20532  96428    0    0   572  6653 1376 10165 53 31 16
 3  0  1   3396 207996  20680 112528    0    0  1332  4628 1448 12121 60 29 11
 4  1  0   3396 183432  20868 134940    0    0   808  5856 1393 14025 55 36  9
 2  1  1   3396 183628  20892 141036    0    0     0  8423 1311  7492 42 14 45
 2  0  1   3396 182692  20956 147336    0    0   256  6869 1314  6438 30 18 52
 1  1  1   3396 177700  21028 151484    0    0     0  7266 1368  6928 47 19 34
 4  0  0   3396 172476  21108 156040    0    0   256  9156 1331  4612 10 14 76
 1  1  1   3396 166332  21144 161548    0    0    76  5618 1221  6092 45 13 42
 2  1  1   3396 166288  21144 161548    0    0     0 14624 1231  3201 17  7 76
 4  0  0   3396 148396  21196 167636    0    0     0  3817 1362  5651 39 18 44
 1  1  1   3396 145884  21348 178680    0    0    44  6402 1338 10058 23 31 45
 3  1  0   3396 144648  21356 179820    0    0   136  9280 1232  4117 44  9 48
 1  1  1   3396 133908  21496 188956    0    0   152  4673 1214  6416 44 22 34
 1  1  1   3396 133836  21500 188988    0    0     0 12320 1172  2957 13  6 81
 3  0  0   3396 118440  21644 202500    0    0   644  4637 1264  9888 47 28 26
 4  0  0   3396 107768  21668 203516    0    0     0  8848 1245  4542 29 10 61
 1  1  1   3396 109644  21748 209964    0    0   388  7884 1273  6321 34 22 45
 5  0  0   3396 100180  21844 218488    0    0   444  8121 1252  7027 41 20 40
 3  1  1   3396  97868  21852 220556    0    0    76  8588 1370  4439 22 14 64
 1  1  1   3396  91240  21904 226416    0    0     0  5969 1267  7393 42 18 40
 3  0  0   3396  85724  21920 231624    0    0   424  7400 1217  4708 37 14 48
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 1  1  2   3396  80444  21932 236348    0    0     0 10248 1267  5512 19 15 66
 1  1  1   3396  73232  22000 242852    0    0   148  8248 1236  6901 48 19 34
 5  0  0   3396  55848  22088 248628    0    0   244  6668 1217  5089 37 15 49
 1  1  1   3396  60656  22212 253772    0    0   348  8276 1304  6179 22 14 64
 2  1  0   3396  53080  22392 260172    0    0   136  5636 1326  7550 52 23 25
 1  1  1   3396  49040  22492 263620    0    0   188  7792 1379  3531  7 11 82
 4  0  0   3396  44024  22620 267796    0    0   116  8148 1222  4894 37 14 49
 2  1  1   3396  36344  22808 274128    0    0   428  7889 1298  6797 19 22 59
 1  1  0   3396  33844  22868 276328    0    0    32  9904 1230  5689 48 12 40
 1  1  0   3396  24416  23072 284420    0    0   216  3138 1313  6549 44 19 37
 1  1  1   3396  17684  23196 290352    0    0    72  8516 1242  6248 23 14 63
 3  0  0   3396  14308  23280 293168    0    0   160  7528 1287  3761 41 11 48
 1  2  1   3396   1512  21128 302688    0    0   280  5103 1346  8892 29 24 47
 1  2  1   5216   3348  19120 299492    0 1828     0  7631 1423  1560  4  5 91
 1  2  1   5216   2916  19128 299732    0    0     8  9833 1346  1953  2  5 93
 1  1  1  12552  13856  19120 298196    0 7336   136 14580 1253  4235 36  9 55
 3  0  0  12552   3216  19140 299408    0    0   136  5540 1279  3163 20  7 73
 2  1  1  20460   2780  19468 308672    0 7908   484 11125 1282  9327 39 26 35
 1  2  1  23540   1528  19436 303452    0 3080     0  8232 1265  2440 14  5 81
 1  1  0  25572  11904  19424 299704    0 2032    24  8980 1288  2168 24  9 67
 2  1  1  27128   2492  19604 308280    0 1556  1456  6205 1356  7760 23 19 58
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 1  3  2  27868   1472  19584 307736    0  740     0 11104 1305  2033 10  9 81
 0  5  1  30432   1576  19544 306240    0 2564     8  8257 1394  1206  1  5 94
 0  5  2  30688   1548  19544 306236    0  256     0  9483 1269   631  0  6 94
 0  4  1  34788   1632  19516 303624    0 4336     0 11993 1285  1351  3  5 92
 2  1  1  37736  12796  19520 298740    0 3328     4 12364 1237  4040 35  8 57
 2  1  2  40640   2560  19640 297620    0 2928   540  6092 1320  4530 31 14 55
 3  0  0  40640   5576  19760 304528    0    0  1160  5912 1332  6053 22 16 62
 3  1  1  41448   2568  19776 306176    0  832   664 10389 1300  5790 20 13 67
 5  0  0  41740   8124  19728 300808    0  292   256  5973 1378  6105 40 21 40
 3  1  2  41764   2240  19644 300156    0  280   640  7426 1403  4245 17 18 65
 2  1  0  41764  12140  19580 295992    0    0   128  8229 1399  3599 31 12 57
 1  2  1  41772   1516  19732 301572    0    8  1416  5621 1392 10771 33 31 37
 3  0  0  41772   9276  19784 296776    0    0   640  7432 1342  4156 31 14 55
 1  1  0  41772   2460  20024 301428    0    0   896  5322 1412  7754 22 30 48
 1  2  1  41808   1512  19936 294832    0   36   896  7349 1342  5280 19 16 65
 3  1  1  41808   4932  20104 296940   28    0   812  5002 1402  8322 42 26 33
 1  2  1  41816   1556  20200 294032    0    8   768  7435 1378  5621 22 24 54
 0  1  0  41820   5240  20316 294572    0    4   928  4532 1343  7414 40 19 40
 1  2  1  41832   1504  20388 291856    0   12   792  7414 1380  5893 22 17 61
 3  0  0  41840   2608  20692 294840    0    8  1048  4801 1433  7643 31 35 35
 4  0  1  41840   2596  20888 293356    0    0     8  6648 1359  5357 18 25 57
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 4  0  0  41840   6004  20756 284540   40    4    40  5057 1348  2733 26 30 44
 1  1  0  41820  12672  21180 283368    4   24   844  2281 1387  3338 28 28 44
 1  1  0  41820  12412  21432 283368    0    0   268  2947 1339  3808 18 14 68
 1  1  0  41816  13396  21640 282212    0    0   232  1633 1378  3240 40 12 49
 1  1  0  41816  13216  21648 282212    0    0     8  5642 1429  3585 16 22 62
 2  0  0  41816  13196  21832 282212    0    0   184  4016 1336  2220  7 39 54
 1  1  0  41816  12876  22016 282212    0    0   184  4112 1363  2347  9 22 69
 2  1  0  41816  11968  22192 282924    0    0   768  3088 1476  2351  9 22 69
 2  1  0  41816   9140  22200 284200    0    0  1272    32 1217  3178 45 36 19
 4  0  0  41820  13712  22188 280732    0  100  1544   260 1305  3531 66 21 13
 3  0  0  41820  10560  22252 281876    0    0  1188   391 1222  4079 56 30 14
 2  0  1  41820   9348  22252 282160    0    0   284   512 1102  2343 58 38  4
 3  0  0  41820  10616  22340 282552    0    0   360   352 1301  2887 50 35 15
 2  0  0  41820  10432  22412 283064    0    0   584   448 1172  2411 44 38 17
 2  0  0  41820   9508  22452 283332    0    0   300   480 1155  2551 54 33 13
 2  0  0  41820   9336  22492 283484    0    0   188   739 1162  2359 55 31 14
 1  1  0  41820   9496  22492 283780    0    0   236   352 1074  1878 73 21  6
 1  0  0  41820  11308  22540 283928    0    0   264    96 1083  1619  8  6 86
 2  0  0  41820  11300  22540 283928    0    0     0     0 1044  1511  4  1 95
 2  0  0  41820  11304  22540 283928    0    0     0     0 1108  1894  4  1 95
 2  0  1  41820  11136  22540 284056    0    0     0  6400 1385  2240  4 23 73
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 1  0  0  41820  11196  22540 284056    0    0     0  1680 1091  1597  3  2 95
 1  0  0  41820  11188  22540 284056    0    0     0     0 1099  1809  3  1 96
 1  0  0  41820  11160  22540 284056    0    0     0    34 1077  1674  5  1 94
 1  0  0  41820  11164  22540 284056    0    0     0     0 1101  1751  2  1 97
 2  0  1  41820  11108  22540 284056    0    0     0  8363 1149  1689  3  6 91
 1  0  1  41820  11148  22540 284056    0    0     0  7648 1233  1798  1  6 93
 1  0  1  41820  11200  22540 284056    0    0     0 10272 1204  1782  3  6 91
 1  0  0  41820  11088  22540 284184    0    0     0  2020 1198  1587  1  2 97
 1  0  0  41820  11088  22540 284184    0    0     0     0 1044  1525  2  1 97
 2  0  1  41820  11036  22540 284184    0    0     0  8414 1149  1548  3  7 90
 2  0  1  41820  11044  22540 284184    0    0     0 10152 1232  1906  3  8 89
 1  0  0  41820  11068  22540 284184    0    0     0  4160 1224  1954  3  8 89
 1  0  0  41820  11052  22540 284184    0    0     0     0 1044  1503  3  1 96
 1  0  0  41820  11056  22540 284184    0    0     0     0 1048  1458  3  1 96
 1  0  0  41820  11036  22540 284184    0    0     0  1106 1066  1470  4  7 89
 1  0  0  41820  11040  22540 284184    0    0     0     8 1067  1589  3  1 96
 1  0  0  41820  10896  22540 284312    0    0     0     0 1264  2202  3  1 96
 1  0  0  41820  10904  22540 284312    0    0     0     0 1098  1797  4  1 95
 1  0  0  41820  10964  22540 284312    0    0     0     0 1105  1802  4  1 95
 1  0  0  41820  10968  22540 284312    0    0     0    46 1047  1444  3  3 94
 1  0  0  41820  11056  22540 284312    0    0     0     0 1058  1561  4  1 95
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 1  0  0  41820  11060  22540 284312    0    0     0     0 1133  1925  3  1 96
 0  0  0  41788  11288  22540 284312    0    0     0     0 1085  1414  5  1 94
 0  0  0  41788  11088  22540 284312  172    0   172     0 1053   712  3  0 97
 1  0  0  41788  11108  22540 284312    0    0     0   175 1081   779  2  2 96
 0  0  0  41788  11104  22540 284312    0    0     0     0 1032   562  1  1 98
 1  0  0  41788  11108  22540 284312    0    0     0     0 1024   469  1  0 99
 0  0  0  41788  11100  22540 284312    0    0     0     0 1022   501  2  1 97
 1  0  0  41788  11104  22540 284312    0    0     0     0 1027   504  1  0 99
 0  0  0  41788  11176  22540 284312    0    0     0  1112 1090   646  1  1 98
 0  0  0  41788  10476  22540 284312  660    0   660     0 1144  1500  1  0 99
 0  0  0  41788  10468  22540 284312    0    0     0     0 1057   870  2  0 98
 0  0  0  41788  10464  22540 284312    0    0     0     0 1026   521  2  1 97
 2  0  1  41788  10456  22540 284312    0    0     0     0 1069  1050  1  0 99
 1  0  0  41788  10448  22540 284312    0    0     0    85 1056   721  2  0 98
 1  0  0  41788  10456  22540 284312    0    0     0     9 1030   469  1  0 99
 0  0  0  41788  10448  22540 284312    0    0     0     0 1022   481  2  1 97
 0  0  0  41788  10488  22544 284320    0    0     4     0 1036   471  1  0 99
 0  0  0  41788  10480  22544 284320    0    0     0     0 1030   556  1  0 99
 0  0  0  41788  10500  22544 284320    0    0     0    69 1088   879  1  1 98
 0  0  0  41788  10492  22544 284320    0    0     0     0 1092   995  2  0 98
 0  0  0  41788  10496  22544 284320    0    0     0     0 1090  1328  1  0 99
   procs                      memory      swap          io     system      cpu
 r  b  w   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id
 0  0  0  41788  10460  22544 284328   12    0    20     0 1034   552  2  1 97
 0  0  0  41788  10448  22544 284328    0    0     0     0 1030   471  2  0 98
 0  0  0  41788  10440  22544 284328   32    0    32     0 1027   536  1  1 98
 0  0  0  41788  10432  22544 284328    0    0     0     0 1024   488  2  1 97
 1  0  0  41788  10432  22544 284328    0    0     0     0 1098   991  2  6 92
 1  0  0  41788  10424  22544 284328    0    0     0     0 1030   515  2  0 98
 0  0  0  41788  10428  22544 284328    0    0     0     0 1055   702  2  3 95
 0  0  0  41788  10416  22544 284328    0    0     0     8 1124  1021  7  4 89
 0  0  0  41788  10416  22544 284328    0    0     0     0 1103   934  4  2 94

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 232 bytes --]

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-03 22:30                     ` Greg KH
@ 2002-10-04  6:33                       ` jbradford
  2002-10-04  6:37                         ` Greg KH
  0 siblings, 1 reply; 206+ messages in thread
From: jbradford @ 2002-10-04  6:33 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel

> > Hmmm, then for 3.0 I'd vote for fully working and proven stable:
> 
> Hm, how do you "prove" any of these are stable :)

Hmm, yeah, I see what you mean, but for me, proved stable is a couple of years of being in a major distribution, with people actually using it.

Now that major distributions no longer ship development kernels, (Slackware used to - I have slackware CDs with 1.3.x trees on them, for example), this is a less valid point.

> > * Bluetooth
> 
> Been there since 2.4

..and I'm sure the three people actually using it haven't found any bugs yet ;-)

> > * USB (2)
> 
> Present in 2.5 (and 2.4 now too)

..and yet there are still complaints that it doesn't work every day on the list.

> > * IEEE 1394
> 
> Been there since 2.4.

Still marked as experimental, though.  Not stable yet.

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-04  6:33                       ` jbradford
@ 2002-10-04  6:37                         ` Greg KH
  2002-10-04  7:17                           ` jbradford
  0 siblings, 1 reply; 206+ messages in thread
From: Greg KH @ 2002-10-04  6:37 UTC (permalink / raw)
  To: jbradford; +Cc: linux-kernel

On Fri, Oct 04, 2002 at 07:33:58AM +0100, jbradford@dial.pipex.com wrote:
> > > Hmmm, then for 3.0 I'd vote for fully working and proven stable:
> > 
> > Hm, how do you "prove" any of these are stable :)
> 
> Hmm, yeah, I see what you mean, but for me, proved stable is a couple
> of years of being in a major distribution, with people actually using
> it.

Ah, so no one actually uses those things in your list.  So glad to hear
that...

> > > * USB (2)
> > 
> > Present in 2.5 (and 2.4 now too)
> 
> ..and yet there are still complaints that it doesn't work every day on the list.

Hm, must have missed those.  I haven't seen any USB 2.0 complaints in
quite some time.  The majority of USB "issues" are crappy usb storage
devices that don't match the USB storage spec, or PCI IRQ routing
problems.

But hey, no one cares about USB, I'm used to it :)

greg k-h

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-04  6:37                         ` Greg KH
@ 2002-10-04  7:17                           ` jbradford
  2002-10-04  7:30                             ` Greg KH
  0 siblings, 1 reply; 206+ messages in thread
From: jbradford @ 2002-10-04  7:17 UTC (permalink / raw)
  To: Greg KH; +Cc: linux-kernel

> > > > Hmmm, then for 3.0 I'd vote for fully working and proven stable:
> > > 
> > > Hm, how do you "prove" any of these are stable :)
> > 
> > Hmm, yeah, I see what you mean, but for me, proved stable is a couple
> > of years of being in a major distribution, with people actually using
> > it.
> 
> Ah, so no one actually uses those things in your list.  So glad to hear
> that...

Whatever.  I wouldn't call them 3.0 material yet - would you?

> > > > * USB (2)
> > > 
> > > Present in 2.5 (and 2.4 now too)
> > 
> > ..and yet there are still complaints that it doesn't work every day on the list.
> 
> Hm, must have missed those.  I haven't seen any USB 2.0 complaints in
> quite some time.  The majority of USB "issues" are crappy usb storage
> devices that don't match the USB storage spec, or PCI IRQ routing
> problems.

We have to code for the devices that are out there.  Big deal if we follow the spec to the letter - if Mr Average plugs in his USB device and it doesn't work, well, it doesn't work.  It's no good lecturing him on the spec.  I don't usually take that view, but when there are a large number of broken devices, what are the other options?

> But hey, no one cares about USB, I'm used to it :)

I certainly don't care about USB, I don't even have a USB port on my main box, but if you're saying that the current support is 3.0 material, then I totally disagree.

I started this thread because I'd originally thought that 1.x.x -> 2.x.x happened due to moving from a.out to elf as the standard binary format.  Linus corrected me on that one, and pointed out that it was major feature enhancements that dictate the major version number change.  Given that, I am not in any hurry to see it move to 3.0.0  :-).

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem
  2002-10-04  7:17                           ` jbradford
@ 2002-10-04  7:30                             ` Greg KH
  0 siblings, 0 replies; 206+ messages in thread
From: Greg KH @ 2002-10-04  7:30 UTC (permalink / raw)
  To: jbradford; +Cc: linux-kernel

On Fri, Oct 04, 2002 at 08:17:58AM +0100, jbradford@dial.pipex.com wrote:
> > Hm, must have missed those.  I haven't seen any USB 2.0 complaints in
> > quite some time.  The majority of USB "issues" are crappy usb storage
> > devices that don't match the USB storage spec, or PCI IRQ routing
> > problems.
> 
> We have to code for the devices that are out there.  Big deal if we
> follow the spec to the letter - if Mr Average plugs in his USB device
> and it doesn't work, well, it doesn't work.  It's no good lecturing
> him on the spec.  I don't usually take that view, but when there are a
> large number of broken devices, what are the other options?

I agree, we must make these devices work.  But when your dealing with
odd devices, that violate the spec in random ways, and you don't have
documentation on how these devices are broken, and you aren't getting
paid to provide support for these devices, development can be a bit slow
at times.  And because of these factors, we will almost always lag
behind the OSes that manufacturers directly support.

> > But hey, no one cares about USB, I'm used to it :)
> 
> I certainly don't care about USB, I don't even have a USB port on my
> main box, but if you're saying that the current support is 3.0
> material, then I totally disagree.

I didn't say that it was "3.0 material", you did.

What is pretty major is the core device model.  Lots of driver api
changes and cleanups have happened in 2.5.  It's almost starting to look
sane in places :)

greg k-h

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice)
  2002-10-03 22:05                       ` Dave Jones
  2002-10-04  3:46                         ` Andreas Boman
@ 2002-10-04  7:44                         ` jbradford
  1 sibling, 0 replies; 206+ messages in thread
From: jbradford @ 2002-10-04  7:44 UTC (permalink / raw)
  To: Dave Jones; +Cc: akpm, torvalds, alan, linux-kernel

>  > Yet John Bradford says that in swapless 8MB, 2.5.40 is "springier"
>  > than 2.4.x, so weird.
> 
> Depends on what tests are I suppose. "springier" doesn't
> really say too much. We do minimise memory usage in a few
> places if mem<16M though iirc which could be helping this case.

Well, I've got the following:

486, SX-25 laptop, with 8 MB Ram, no swap, running 2.5.40 and also 2.4.19.
486, SX-20 laptop, with 4 MB Ram, 20 MB swap, running 2.2.21, and 2.2.13.

Both are capable of running the lastest Apache, with PHP support, and Lynx at a usable speed, (I use the 8 MB Ram machine for debugging small bits of PHP while I'm on the tube going up to London :-) ).

I know "feels springier" isn't very helpful, but what benchmarks do you expect me to run on machines with 120 Meg HDs?  :-)  Suggest something, and I'll give it a go.  It's not really faster, just more responsive, (E.G. doing a updatedb, and using jed at the same time is better in 2.5.x).

By the way, I've got X11 running on the 4 meg one, and it's quite usable.  I have even demoed a graphical browser accessing the local Apache, serving PHP content.

If anybody doesn't believe me, come along to Linux Expo UK next week, and see for yourselves :-).

John.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: v2.6 vs v3.0
  2002-10-01 12:38                                   ` Matthias Andree
@ 2002-10-04 19:58                                     ` Bill Davidsen
  0 siblings, 0 replies; 206+ messages in thread
From: Bill Davidsen @ 2002-10-04 19:58 UTC (permalink / raw)
  To: Matthias Andree; +Cc: linux-kernel

On Tue, 1 Oct 2002, Matthias Andree wrote:

> On Mon, 30 Sep 2002, Bill Davidsen wrote:
> 
> > I do that, but it doesn't make for a storage medium I can easily use on
> > another system. The cost of DVD writers is coming down, and non-magnetic
> > media may have some advantages as well. Still, thay're small compared to
> > disk sizes.
> 
> There are big drives available if you really want one (and can afford
> one, which is the bigger problem usually).

The real problem is that the media is expensive. DVD media is <$10 and
encourages taking backups fairly often. In the long run that's most
important, not the initial cost. Trying to get a client to take an
incremental and store it off-site daily is easier at $5-8 than $50+.

-- 
bill davidsen <davidsen@tmr.com>
  CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (NUMA)
  2002-10-03 15:57                 ` Linus Torvalds
                                     ` (3 preceding siblings ...)
  2002-10-03 19:51                   ` Rik van Riel
@ 2002-10-04 22:26                   ` Martin J. Bligh
  2002-10-04 23:13                     ` Linus Torvalds
  4 siblings, 1 reply; 206+ messages in thread
From: Martin J. Bligh @ 2002-10-04 22:26 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

> The only policy for major numbers has always been "major capability
> changes". 1.0 was "networking is stable and generally usable" (by the
> standards of that time), while 2.0 was "SMP and true multi-architecture
> support". My planned point for 3.0 was NuMA support, but while we actually
> have some of that, the hardware just isn't relevant enough to matter.

When you say we have "some of" that (NuMA support) ... what else would you 
like to see? The main things on the planned list as far as I'm concerned are:

1. NUMA aware scheduler.
2. multipath IO with NUMA support
3. per-node slabcache.
4. NUMA aware multidrop networking.

The first 3 of these three are floating around as patches, and I'm still hoping to get 
them merged before 2.5 (none are quite ready for merge yet, but should be in time).
I'll admit that people weren't desperately keen on doing multipath IO in the SCSI 
layer, but it seems like the only feasible way short term ....

I'd be most curious as to what else you think should be done (short or long term)
in this area, and any comments on the above 4 items?

Thanks,

Martin.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (NUMA)
  2002-10-04 22:26                   ` [OT] 2.6 not 3.0 - (NUMA) Martin J. Bligh
@ 2002-10-04 23:13                     ` Linus Torvalds
  2002-10-05  0:21                       ` Martin J. Bligh
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
  0 siblings, 2 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-10-04 23:13 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: linux-kernel


On Fri, 4 Oct 2002, Martin J. Bligh wrote:
> 
> When you say we have "some of" that (NuMA support) ... what else would you 
> like to see?

The main thing that I think is lacking is any relevance to any significant 
user base, thanks to lack of interesting hardware. So even if Linux itself 
was doing everything perfectly, as long as there is no wide hw base and 
users, it's all pretty much academic, the same way SMP was during the 
early 1.x days.

And I'm not trying to put you or any of the Linux NuMA work down here, I'm 
just saying that what makes it not important as a "3.0 feature" is just 
that deployment doesn't merit it yet.

			Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (NUMA)
  2002-10-04 23:13                     ` Linus Torvalds
@ 2002-10-05  0:21                       ` Martin J. Bligh
  2002-10-05  0:36                         ` Linus Torvalds
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
  1 sibling, 1 reply; 206+ messages in thread
From: Martin J. Bligh @ 2002-10-05  0:21 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel

> The main thing that I think is lacking is any relevance to any significant 
> user base, thanks to lack of interesting hardware. So even if Linux itself 
> was doing everything perfectly, as long as there is no wide hw base and 
> users, it's all pretty much academic, the same way SMP was during the 
> early 1.x days.
> 
> And I'm not trying to put you or any of the Linux NuMA work down here, I'm 
> just saying that what makes it not important as a "3.0 feature" is just 
> that deployment doesn't merit it yet.

Fair enough, I appreciate it's not a wide market segment right now.
It's not a quick and easy project though, so there's a long-ish ramp up time.
It would be nice to have it all working and in place by the time Hammer arrives 
and makes this much more widespread ;-) 

Just an order of magnitude figure for you ... number of seconds spent in kernel
space across all CPUs during a kernel compile on a 16-way NUMA-Q ... 

2.4 with every patch I had (including O(1) sched + NUMA mods) ... 120s. 
On 2.5.40-mm1 with one small NUMA scheduler patch ... 38s. 

Personally, I think that's pretty impressive - lots of very good things have been
happening, from Andrew in particular, the NUMA people, and VM people in general.
IMHO, the NUMA code is also much more readable and less buggy ;-)

M.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (NUMA)
  2002-10-05  0:21                       ` Martin J. Bligh
@ 2002-10-05  0:36                         ` Linus Torvalds
  2002-10-05  1:25                           ` Michael Hohnbaum
  0 siblings, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-05  0:36 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: linux-kernel


On Fri, 4 Oct 2002, Martin J. Bligh wrote:
> 
> It would be nice to have it all working and in place by the time Hammer arrives 
> and makes this much more widespread ;-) 

I agree, the Hammer is going to be interesting. But one of the most
interesting things to do will be to see if using it as a per-CPU memory
NUMA machine is slower or faster than using it with the memory interleaved
across CPU's (in which case it won't look NUMA at all).

My personal guess (assuming hypertransport works well) is that you'd
actually en dup interleaving at least for dual setups, and quite possibly
for quads as well. The per-node non-interleaved setup probably makes for
best _aggregate_ memory throughput if you have a load that has very
NUMA-friendly behaviour, but interleaving should make for best sustained
throughput for not-very-balanced-loads.

> Just an order of magnitude figure for you ... number of seconds spent in kernel
> space across all CPUs during a kernel compile on a 16-way NUMA-Q ... 
> 
> 2.4 with every patch I had (including O(1) sched + NUMA mods) ... 120s. 
> On 2.5.40-mm1 with one small NUMA scheduler patch ... 38s. 

Yeah, looking good..

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] 2.6 not 3.0 - (NUMA)
  2002-10-05  0:36                         ` Linus Torvalds
@ 2002-10-05  1:25                           ` Michael Hohnbaum
  0 siblings, 0 replies; 206+ messages in thread
From: Michael Hohnbaum @ 2002-10-05  1:25 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Martin J. Bligh, linux-kernel



On Fri, 2002-10-04 at 17:36, Linus Torvalds wrote:

> > Just an order of magnitude figure for you ... number of seconds spent in kernel
> > space across all CPUs during a kernel compile on a 16-way NUMA-Q ... 
> > 
> > 2.4 with every patch I had (including O(1) sched + NUMA mods) ... 120s. 
> > On 2.5.40-mm1 with one small NUMA scheduler patch ... 38s. 
> 
> Yeah, looking good..
> 
Now if we could get the "one small NUMA scheduler patch" into the
kernel...

> 		Linus
> 
> 
-- 
Michael Hohnbaum            503-578-5486
hohnbaum@us.ibm.com         T/L 775-5486


^ permalink raw reply	[flat|nested] 206+ messages in thread

* The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-04 23:13                     ` Linus Torvalds
  2002-10-05  0:21                       ` Martin J. Bligh
@ 2002-10-05 20:30                       ` Rob Landley
  2002-10-06  2:15                         ` Andrew Morton
                                           ` (5 more replies)
  1 sibling, 6 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-05 20:30 UTC (permalink / raw)
  To: Linus Torvalds, Martin J. Bligh; +Cc: linux-kernel

On Friday 04 October 2002 07:13 pm, Linus Torvalds wrote:
> On Fri, 4 Oct 2002, Martin J. Bligh wrote:
> > When you say we have "some of" that (NuMA support) ... what else would
> > you like to see?
>
> The main thing that I think is lacking is any relevance to any significant
> user base, thanks to lack of interesting hardware. So even if Linux itself
> was doing everything perfectly, as long as there is no wide hw base and
> users, it's all pretty much academic, the same way SMP was during the
> early 1.x days.
>
> And I'm not trying to put you or any of the Linux NuMA work down here, I'm
> just saying that what makes it not important as a "3.0 feature" is just
> that deployment doesn't merit it yet.

Linux isn't going to get  a new order of magnitude surge from the server 
space, because there isn't an order of magnitude left.  The figures I've seen 
from several sources broadly agree that Linux currently has somewhere between 
a fifth and a third of the server market, has been doing quite well on that 
score for some time, and continues to make steady incremental advances 
(taking about equal amounts of market share away from proprietary unixen and 
NT boxen).  2.4 is already pretty darn good on a server (assuming you never 
hit swap. :).  Even 2.2 wasn't at all bad at it.

The new uncharted territory for Linux, and the next major order-of-magnitude 
jump in the installed base, is the desktop.  A kernel that could make a 
credible stab at the desktop  would certainly be 3.0 material.  And the work 
that matters for the desktop  is LATENCY work.  Not SMP, not throughput, not 
more memory.  Latency.  O(1), deadline I/O scheduler, rmap, preempt, shorter 
clock ticks, 

Yeah, a lot of the necessary work is user space stuff.  But not all.  We've 
focused on the "MP3 skipping/cd burner underrun" type stuff, which is 
important, but in reality an awful lot of the windows "look and feel" issues 
boil down to the simple fact that enough of their windowing system is welded 
into the kernel that their mouse pointer keeps updating smoothly no matter 
how  heavily loaded the system is, and when you click on a window its Z-order 
gets  promoted snappily under just about all circumstances.  That's it.  
That's the  big secret.  The mouse pointer doesn't stall, and the windows 
respond immediately when you click on 'em.

This may not be a USEFUL response, but it's an immediate one.  The inside of 
the window may not redraw for 30 seconds, and the pulldown menus and buttons 
will just ignore you for a while after that, but what the user EXPERIENCES is 
snappy response to commands and smooth interactive feel.  Just from those two 
things.  The system is listening.  It may not do anything but drool in 
response, but you can see that it's LISTENING.  And it's not just a cosmetic  
thing: try using a touchpad or nipple mouse on a laptop when the pointer 
stalls: you have to wait for it to start up again or you overshoot your 
target.  It's not a question of "queue up the next three clicks and wait for 
it to get around to them", you need interactive feedback to get your mouse in 
the right place.  Having it stall is really annoying in that case.  The 
instant an app blocks on a swapped out page, or any other read, and then I/O 
starvation occurs with reads blocked by a ton of writes...  BANG.  User 
twiddles thumb while their mouse pointer ignores them.  (Speculatively 
swapping out a page or two of the X server because it's easy to swap them 
back in doesn't help if reads are blocked behind three seconds worth of 
writes and your mouse pointer stalls at the edge of the window because of 
this.)

Now to fake this in Linux, you theoretically just need to run your X server 
and  your window manager at a priority of -10 (and somebody needs to club the 
distributions on the head until they start DOING this).  But in the past, 
that wouldn't guarantee your mouse cursor didn't do a half-second pause at a 
window boundary when the swap file went nuts.  There was NOTHING you could do 
under the first dozen 2.4 kernels to make sure your mouse pointer wouldn't 
stall at a window boundary, or go into la-la land for five minutes for that 
matter.  (It improved noticeably after that, but by then most people's 
opinions of 2.4's desktop suitability were already formed.  And it's STILL 
not fully fixed in 2.4: the instant an app blocks on a swapped out page and 
then I/O starvation happens with reads blocked by writes...  BANG.  User 
twiddles thumb while their mouse pointer ignores them.  Solution?  Never do 
anything disk intensive in the background unless you want interactive feel to 
go into the toilet.)

The new deadline I/O scheduler directly addresses this, and the ability to 
get "nice" to affect I/O priority is going to be a big win as well.  Andrea 
and Rik's VM work help here: rmap adds a lot of future tuning potential, such 
as the ability to make SWAP care about niceness (swap out pages from the 
nice+20 process before the nice-20 process).  The O(1) scheduler helps here 
by making niceness levels more meaningful in general.  All of these help X11 
at nice level -10 to not stall.  The faster clock tick helps here too, the 
low  latency work at the start of 2.5 helps here, and preempt helps here.  
There has been a LOT of work on general latency improvement and interactive 
feel.

Even the new threading work can potentially help X spin off a dedicated 
high-priority "update the mouse position, and manipulate window borders and z 
order, and never swap this thread out" thread.  (I remember the way OS/2 used 
to cheat and give extra time slices to anything that got a Presentation 
Manager window event, so you could literally speed up your program on a 
loaded system by "scrubbing" the mouse across it repeatedly.  The resulting 
perception was a snappy desktop, whatever the reality was.)

Sure there's a psychological "third time's the charm" thing that MS has 
conditioned the unwashed masses into believing, and a 3.0 kernel would make a 
bigger marketing splash than a 2.6 kernel.  And for that reason we should NOT 
go to 3.0 until we ARE ready for a horde of desktop users to give Linux a try 
(and potentially get burned and run away and hide and never look at us 
again).  But 2.5 DOES contain some significant attempts at addressing the 
needs of desktop (and laptop) users.  And THAT is what makes it 3.0 material. 
 To me, anyway. :)

Rob

(P.S.  The fact Apple's conditioning the market to take unix seriously on the 
desktop with OS X is just a case of convenient timing.  And now that floppies 
have gone the way of the dodo, the conceptual incompatabilty between "mount" 
and removable media is largely a question of CDs, which are software 
ejected...)

(P.P.S.  There was some argument way back that 2.4 should have been 3.0 due 
to the amount of new stuff in it.  Old hat now, but the residue is a tendency 
to compare 2.5 and 2.3 and say "if we didn't do it then, why do it now".  But 
looking at it the other way, doesn't that just make the jump between 2.0 and 
2.5 even bigger, and INCREASE the rationale for calling the new release 3.0?)

(P.P.P.S.  I'll stop now. :)


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
@ 2002-10-06  2:15                         ` Andrew Morton
  2002-10-06  9:42                           ` Russell King
  2002-10-06 13:44                           ` Oliver Neukum
  2002-10-06  6:33                         ` Martin J. Bligh
                                           ` (4 subsequent siblings)
  5 siblings, 2 replies; 206+ messages in thread
From: Andrew Morton @ 2002-10-06  2:15 UTC (permalink / raw)
  To: Rob Landley; +Cc: Linus Torvalds, Martin J. Bligh, linux-kernel

Rob Landley wrote:
> 
> And the work that matters for the desktop is LATENCY work.

100% true.

You should resist any confusion between IO latency and CPU
scheduling latency.  They really are worlds apart.

In a stock 2.4 kernel it is hugely rare for the kernel to stall
a ready-to-run task for longer than a monitor refresh interval,
so I continue to disbelieve any claims that the low-latency
and preemptivity patches make any difference in desktop use.

(And 2.5 improves on this a _lot_.  The now-departed buffer LRU
and truncate list walks were the main culprits)

Any attempt to link IO priority with nice is probably doomed
to confused failure.  It should be a clearly separated concept.
There are priority inversions everywhere, too.

I disagree with you on the new CPU scheduler.  In my experience
it is significantly worse than the old one - a `make -j3' is
still sending interactive applications on extended lunch breaks.
Not that I have tried to tune this away.

Deadline scheduler is critical.  As is a correct setting for
/proc/sys/vm/dirty_async_ratio and the soon-to-be-born
/proc/sys/vm/swappiness.  These will boot up with sane values,
as much as is humanly possible.

It's not all kernel though.  Application (KDE) startup is *slow*,
even when zero I/O is performed.  Presumably because of the vtable
dynamic linking thing.  I'm not sure how the prelinking work is
getting along, but the initial figures I saw on that indicated
that the benefit may not be sufficient.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
  2002-10-06  2:15                         ` Andrew Morton
@ 2002-10-06  6:33                         ` Martin J. Bligh
  2002-10-07  5:28                         ` John Alvord
                                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 206+ messages in thread
From: Martin J. Bligh @ 2002-10-06  6:33 UTC (permalink / raw)
  To: Rob Landley, Linus Torvalds; +Cc: linux-kernel

> Linux isn't going to get  a new order of magnitude surge from the 
> server space, because there isn't an order of magnitude left.  

Depends on how you define the word "server". If you mean a PC being
a webserver, I'd agree. If you mean "large database server", I wouldn't. 
The term is so broad as to be useless in this context.

> The new uncharted territory for Linux, and the next major 
> order-of-magnitude jump in the installed base, is the desktop.  
> A kernel that could make a credible stab at the desktop  would 
> certainly be 3.0 material. And the work that matters for the 
> desktop  is LATENCY work.  Not SMP, not throughput, not more memory.
> Latency.  O(1), deadline I/O scheduler, rmap, preempt, shorter 
> clock ticks, 

I'd agree there are definitely some improvements to be made in this
space. My laptop skipping on xmms whilst I compile the kernel pisses
me off. But that's not why Linux is not sucessful on the the desktop 
...

> Yeah, a lot of the necessary work is user space stuff.  

... that is. Userspace sucks. X-windows is a pig, and a monumental 
pain in the ass to configure - I've been doing it for 10 years, and
I still hate it every single damned time I have to do it. After  
years of utter crap I finally have a browser that more or less 
works (Galeon, yay), though it still has some stupid annoying bugs. 
Fonts are still a pain. Laptops are a minefield of turd-covered 
banana skins.

Yeah, I may play with the kernel all day, can debug stuff if I have 
to, and can figure out how to set things up by staring at documentation
or source code for ages if it's really necessary. But I don't want to.
I want things that are easy to use for the basic stuff, and just 
frigging work out of the box. I don't want to be asked bunches of 
questions that really don't matter that much perioidically throughout
a Debian install. Spending all day playing with desktop nonsense isn't
fun, I just want to get on with real work.

It's getting better. But the reason Linux is not a desktop hit has
very little to do with interactive scheduler response, or other kernel
niceties. The kernel blows the competition out of the water, even if 
it does have a few problems here and there. It's to do with applications,
proprietary file formats, and commercial support.

> important, but in reality an awful lot of the windows "look and feel" 
> issues boil down to the simple fact that enough of their windowing 
> system is welded into the kernel that their mouse pointer keeps 
> updating smoothly no matter how  heavily loaded the system is, 
> and when you click on a window its Z-order gets  promoted snappily 
> under just about all circumstances.  That's it. 

Pft. What OS are you talking about here? Surely not Microsoft?
Send me your copy, it's obviously very different from mine.
 
> (P.S.  The fact Apple's conditioning the market to take unix 
> seriously on the desktop with OS X is just a case of convenient 
> timing.

You really think the market gives a damn that there's UNIX underneath
the hood of Apple machines? I beg to differ. They like the fact that
it actually works, and can really multitask, maybe ... which is an
indirect effect. But they don't care (on the whole) about the fact 
that it's UNIX.

M.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06  2:15                         ` Andrew Morton
@ 2002-10-06  9:42                           ` Russell King
  2002-10-06 17:06                             ` Alan Cox
  2002-10-06 13:44                           ` Oliver Neukum
  1 sibling, 1 reply; 206+ messages in thread
From: Russell King @ 2002-10-06  9:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rob Landley, Martin J. Bligh, linux-kernel

On Sat, Oct 05, 2002 at 07:15:49PM -0700, Andrew Morton wrote:
> It's not all kernel though.  Application (KDE) startup is *slow*,
> even when zero I/O is performed.  Presumably because of the vtable
> dynamic linking thing.  I'm not sure how the prelinking work is
> getting along, but the initial figures I saw on that indicated
> that the benefit may not be sufficient.

As a mad guy who runs gnome on an ARM box virtually every day, and
compared the speed of gnome during startup and in operation with
traditional X applications, gnome is severely lacking in speed
and "snappyness".

Eg, a pure X setup starts up in less than 5 seconds.  With gnome,
you're looking at around 30.  Gnome 1.2 was slower than Gnome 1.4.
I haven't tested Gnome 2 yet.

Flipping around between workspaces is something I do regularly (6 of
them.)  With a fairly old (1997) fvwm + fvwmpager + gnome 1.2 its
adequately fast - less than 1 second.  With sawfish + gnome 1.4,
even the refresh of other applications is noticably slower, and with
fvwm + gnome 1.4 its unbearable (because the gnome panel is obtaining
a complete list of windows and clients with the X server grabbed, and
quering various properties - because fvwm isn't gnome-compliant, the
panel can't ask the wm.)

The start up of rxvt - less than 1 second.  The start up of
gnome-terminal - around 15-20 seconds.

What I'm not saying here is that anything one thing sucks (except maybe
ARM on a desktop box running Gnome.)  The point I'm trying to make is
that you can give the kernel as much "interactive" feel as you like, but
until user space gets It Right (tm), the kernel isn't really going to
make one blind bit of difference to the "feel" the user experiences.

I just wish someone would take away all the gnome developers high
performance machines and give them slow old 486's.  8)

(PS, before the "use the source" mob start running about, I'm a full time
kernel hacker.  To get up to speed on gnome to fix this would require me
to leave the kernel for a considerable amount of time.  This isn't going
to happen any time soon; there is only a certain number of hours in a day.)

-- 
Russell King (rmk@arm.linux.org.uk)                The developer of ARM Linux
             http://www.arm.linux.org.uk/personal/aboutme.html


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06  2:15                         ` Andrew Morton
  2002-10-06  9:42                           ` Russell King
@ 2002-10-06 13:44                           ` Oliver Neukum
  2002-10-06 15:19                             ` Martin J. Bligh
  1 sibling, 1 reply; 206+ messages in thread
From: Oliver Neukum @ 2002-10-06 13:44 UTC (permalink / raw)
  To: Andrew Morton, Rob Landley; +Cc: Linus Torvalds, Martin J. Bligh, linux-kernel

On Sunday 06 October 2002 04:15, Andrew Morton wrote:
> Rob Landley wrote:
> > And the work that matters for the desktop is LATENCY work.
>
> 100% true.

Not 100%.
First of all desktop work is driver work. Desktop users tend to get pissed
if their shiny new webcam or DSL does not work.
And thinks like the hotplugging subsystems are essential.
Handling of removable media still is less than optimal.

Then there's the issue of application startup. There's not enough
read ahead. This is especially sad, as the order of page faults is at
least partially predictable.

Another thing that sucks in desktop enviroments is displaying directories.
Asynchronous IO will somewhat help, but you can't do an asynchronous stat.
Now do this while a compiler is running. Deadline IO scheduling will help
but a real helper would be read ahead on directory, inode and multi file 
level.

	Regards
		Oliver

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06 15:19                             ` Martin J. Bligh
@ 2002-10-06 15:14                               ` Oliver Neukum
  2002-10-07  8:08                               ` Helge Hafting
  2002-10-07 17:43                               ` Daniel Phillips
  2 siblings, 0 replies; 206+ messages in thread
From: Oliver Neukum @ 2002-10-06 15:14 UTC (permalink / raw)
  To: Martin J. Bligh, Andrew Morton, Rob Landley; +Cc: Linus Torvalds, linux-kernel

On Sunday 06 October 2002 17:19, Martin J. Bligh wrote:
> > Then there's the issue of application startup. There's not enough
> > read ahead. This is especially sad, as the order of page faults is
> > at least partially predictable.
>
> Is the problem really, fundamentally a lack of readahead in the
> kernel? Or is it that your application is huge bloated pig?
> With admittedly no evidence whatsoever, I suspect the latter is
> really the root cause of this type of problem.

Of course, but that's not an excuse for sucking more than necessary.
Application startup is a problem.

	Regards
		Oliver

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06 13:44                           ` Oliver Neukum
@ 2002-10-06 15:19                             ` Martin J. Bligh
  2002-10-06 15:14                               ` Oliver Neukum
                                                 ` (2 more replies)
  0 siblings, 3 replies; 206+ messages in thread
From: Martin J. Bligh @ 2002-10-06 15:19 UTC (permalink / raw)
  To: Oliver Neukum, Andrew Morton, Rob Landley; +Cc: Linus Torvalds, linux-kernel

> Then there's the issue of application startup. There's not enough
> read ahead. This is especially sad, as the order of page faults is 
> at least partially predictable.

Is the problem really, fundamentally a lack of readahead in the
kernel? Or is it that your application is huge bloated pig? 
With admittedly no evidence whatsoever, I suspect the latter is 
really the root cause of this type of problem. 

Ditto for the "takes me years to switch between desktops" ... 
maybe it's just that RAM is full of utter garbage due to mindless 
feature-bloat, so everything gets swapped out. If you're running
something like Netscape / Mozilla ... ;-)

I still think userspace is 90% of the problem here ...

M.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06  9:42                           ` Russell King
@ 2002-10-06 17:06                             ` Alan Cox
  0 siblings, 0 replies; 206+ messages in thread
From: Alan Cox @ 2002-10-06 17:06 UTC (permalink / raw)
  To: Russell King
  Cc: Andrew Morton, Rob Landley, Martin J. Bligh, Linux Kernel Mailing List

On Sun, 2002-10-06 at 10:42, Russell King wrote:
> What I'm not saying here is that anything one thing sucks (except maybe
> ARM on a desktop box running Gnome.)  The point I'm trying to make is
> that you can give the kernel as much "interactive" feel as you like, but
> until user space gets It Right (tm), the kernel isn't really going to
> make one blind bit of difference to the "feel" the user experiences.
> 
> I just wish someone would take away all the gnome developers high
> performance machines and give them slow old 486's.  8)

The GNOME stuff is mostly userspace problems not kernel space, and some
of it is tool problems (lack of tools to lay binaries out so they stream
from disk, lack of tools to put all the fixups in the same few pages).
Gnome noticably improved when prelinking in gnu tools began to work

To do a meaningful kernel comparison you need to look at 2.2/2.4/2.5
with the same user space setup. 

As to the 486's. There is optimisation work for gnome and especially
startup going on. Seems its a bit slow on those old legacy sparc64
contraptions ;)


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
  2002-10-06  2:15                         ` Andrew Morton
  2002-10-06  6:33                         ` Martin J. Bligh
@ 2002-10-07  5:28                         ` John Alvord
  2002-10-07  8:39                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 n Giuliano Pochini
                                           ` (2 subsequent siblings)
  5 siblings, 0 replies; 206+ messages in thread
From: John Alvord @ 2002-10-07  5:28 UTC (permalink / raw)
  To: Rob Landley; +Cc: Linus Torvalds, Martin J. Bligh, linux-kernel

On Sat, 5 Oct 2002 16:30:32 -0400, Rob Landley <landley@trommello.org>
wrote:

>On Friday 04 October 2002 07:13 pm, Linus Torvalds wrote:
>> On Fri, 4 Oct 2002, Martin J. Bligh wrote:
>> > When you say we have "some of" that (NuMA support) ... what else would
>> > you like to see?
>>
>> The main thing that I think is lacking is any relevance to any significant
>> user base, thanks to lack of interesting hardware. So even if Linux itself
>> was doing everything perfectly, as long as there is no wide hw base and
>> users, it's all pretty much academic, the same way SMP was during the
>> early 1.x days.
>>
>> And I'm not trying to put you or any of the Linux NuMA work down here, I'm
>> just saying that what makes it not important as a "3.0 feature" is just
>> that deployment doesn't merit it yet.
>
>Linux isn't going to get  a new order of magnitude surge from the server 
>space, because there isn't an order of magnitude left.  The figures I've seen 
>from several sources broadly agree that Linux currently has somewhere between 
>a fifth and a third of the server market, has been doing quite well on that 
>score for some time, and continues to make steady incremental advances 
>(taking about equal amounts of market share away from proprietary unixen and 
>NT boxen).  2.4 is already pretty darn good on a server (assuming you never 
>hit swap. :).  Even 2.2 wasn't at all bad at it.
>
>The new uncharted territory for Linux, and the next major order-of-magnitude 
>jump in the installed base, is the desktop.  A kernel that could make a 
>credible stab at the desktop  would certainly be 3.0 material.  And the work 
>that matters for the desktop  is LATENCY work.  Not SMP, not throughput, not 
>more memory.  Latency.  O(1), deadline I/O scheduler, rmap, preempt, shorter 
>clock ticks, 

The big drag on making progress on the desktop is the inertia of
existing applications. Speed/Performance is rarely a problem... just
wait a few months for more power or lower price. PCs are already
overpowered for the typical desktop workload.

Progress in that area is always possible but it will be very slow and
marginal.

john alvord

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-06 15:19                             ` Martin J. Bligh
  2002-10-06 15:14                               ` Oliver Neukum
@ 2002-10-07  8:08                               ` Helge Hafting
  2002-10-07  9:18                                 ` Oliver Neukum
  2002-10-07 17:43                               ` Daniel Phillips
  2 siblings, 1 reply; 206+ messages in thread
From: Helge Hafting @ 2002-10-07  8:08 UTC (permalink / raw)
  To: Martin J. Bligh, linux-kernel

"Martin J. Bligh" wrote:
> 
> > Then there's the issue of application startup. There's not enough
> > read ahead. This is especially sad, as the order of page faults is
> > at least partially predictable.
> 
> Is the problem really, fundamentally a lack of readahead in the
> kernel? Or is it that your application is huge bloated pig?

Often the latter.  People getting interested in linux
seems to believe that openoffice is the msoffice replacement,
and that _is_ a huge bloated pig.  It needs 50M to start
the text editor - and lots of _cpu_.  It takes a long time
to start on a 266MHz machine even when the disk io
is avoided by the pagecahce. 

A snappy desktop is trivial with 2.5, even with a slow machine.
Just stay away from gnome and kde, use a ugly fast
window manager like icewm or twm (and possibly lots
of others I haven't even heard about.)  
X itself is snappy enough, particularly with increased
priority.   
Take some care when selecting apps (yes - there is choice!)
and the desktop is just fine.  Openoffice is a nice
package of programs, but there are replacements for most
of them if speed is an issue.  If the machine is powerful
enough to run ms software snappy then speed probably
isn't such a big issue though.

> With admittedly no evidence whatsoever, I suspect the latter is
> really the root cause of this type of problem.
> 
> Ditto for the "takes me years to switch between desktops" ...
> maybe it's just that RAM is full of utter garbage due to mindless
> feature-bloat, so everything gets swapped out. If you're running
> something like Netscape / Mozilla ... ;-)

My guess is a bloated window manager.  Switching desktops
is fast for me, even with netscape running and swap in use.
Or are you talking 64M machines?
 
> I still think userspace is 90% of the problem here ...

Yes.

Helge Hafting

^ permalink raw reply	[flat|nested] 206+ messages in thread

* RE: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 n
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
                                           ` (2 preceding siblings ...)
  2002-10-07  5:28                         ` John Alvord
@ 2002-10-07  8:39                         ` Giuliano Pochini
  2002-10-07 13:56                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Jesse Pollard
  2002-10-11 23:53                         ` Hans Reiser
  5 siblings, 0 replies; 206+ messages in thread
From: Giuliano Pochini @ 2002-10-07  8:39 UTC (permalink / raw)
  To: Rob Landley; +Cc: linux-kernel, Martin J. Bligh, Linus Torvalds


> important, but in reality an awful lot of the windows "look and feel" issues 
> boil down to the simple fact that enough of their windowing system is welded 
> into the kernel that their mouse pointer keeps updating smoothly no matter 
> how  heavily loaded the system is, and when you click on a window its Z-order 
> gets  promoted snappily under just about all circumstances.  That's it.

I feel linux more responsive than M$ windos. But AmigaOS was better. In
AmigaOS the GUI was handled is a different way. UI, widgets, windows, etc.
run in a separate process, so even if the application is busy you can press
buttons, and the events are queued. GTK, QT, etc.. have a different behaviour
and you can't interact with the UI while the application is busy. It is
possible, but it requires a lot of extra work for the developer and almost
nobody does it. To get more GUI responsiveness, the right way is to change
UI toolkits. The kernel works just fine now.

And about sound skipping, I found that libtool is the most offender. I
don't know why (it's a shell script...), but it it. It causes a short
pause of everything. I use a ppc, perhaps on other archs it's harmless.


Bye.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07  8:08                               ` Helge Hafting
@ 2002-10-07  9:18                                 ` Oliver Neukum
  2002-10-07 14:11                                   ` Jan Hudec
                                                     ` (2 more replies)
  0 siblings, 3 replies; 206+ messages in thread
From: Oliver Neukum @ 2002-10-07  9:18 UTC (permalink / raw)
  To: Helge Hafting, Martin J. Bligh, linux-kernel

On Monday 07 October 2002 10:08, Helge Hafting wrote:
> "Martin J. Bligh" wrote:
> > > Then there's the issue of application startup. There's not enough
> > > read ahead. This is especially sad, as the order of page faults is
> > > at least partially predictable.
> >
> > Is the problem really, fundamentally a lack of readahead in the
> > kernel? Or is it that your application is huge bloated pig?
>
> Often the latter.  People getting interested in linux
> seems to believe that openoffice is the msoffice replacement,
> and that _is_ a huge bloated pig.  It needs 50M to start
> the text editor - and lots of _cpu_.  It takes a long time
> to start on a 266MHz machine even when the disk io
> is avoided by the pagecahce.

OpenOffice _is_ an important application, whether we like it or not.

How does one measure and profile application startup other than with
a stopwatch ? I'd like to gather some objective data on this.

> A snappy desktop is trivial with 2.5, even with a slow machine.
> Just stay away from gnome and kde, use a ugly fast

A desktop machine needs to run a desktop enviroment. Only a window manager is 
not enough.

> window manager like icewm or twm (and possibly lots
> of others I haven't even heard about.)
> X itself is snappy enough, particularly with increased
> priority.
> Take some care when selecting apps (yes - there is choice!)
> and the desktop is just fine.  Openoffice is a nice
> package of programs, but there are replacements for most
> of them if speed is an issue.  If the machine is powerful
> enough to run ms software snappy then speed probably
> isn't such a big issue though.

KDE and friends _are_ not quite optimised for speed. That however doesn't 
mean that the kernel should not make an effort to allow them to run as fast 
as they can.

	Regards
		Oliver

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
                                           ` (3 preceding siblings ...)
  2002-10-07  8:39                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 n Giuliano Pochini
@ 2002-10-07 13:56                         ` Jesse Pollard
  2002-10-07 14:03                           ` Rob Landley
  2002-10-07 18:22                           ` Daniel Phillips
  2002-10-11 23:53                         ` Hans Reiser
  5 siblings, 2 replies; 206+ messages in thread
From: Jesse Pollard @ 2002-10-07 13:56 UTC (permalink / raw)
  To: Rob Landley, Linus Torvalds, Martin J. Bligh; +Cc: linux-kernel

On Saturday 05 October 2002 03:30 pm, Rob Landley wrote:
> On Friday 04 October 2002 07:13 pm, Linus Torvalds wrote:
[snip]
> Now to fake this in Linux, you theoretically just need to run your X server
> and  your window manager at a priority of -10 (and somebody needs to club
> the distributions on the head until they start DOING this).  But in the
> past, that wouldn't guarantee your mouse cursor didn't do a half-second
> pause at a window boundary when the swap file went nuts.  There was NOTHING
> you could do under the first dozen 2.4 kernels to make sure your mouse
> pointer wouldn't stall at a window boundary, or go into la-la land for five
> minutes for that matter.  (It improved noticeably after that, but by then
> most people's opinions of 2.4's desktop suitability were already formed. 
> And it's STILL not fully fixed in 2.4: the instant an app blocks on a
> swapped out page and then I/O starvation happens with reads blocked by
> writes...  BANG.  User twiddles thumb while their mouse pointer ignores
> them.  Solution?  Never do anything disk intensive in the background unless
> you want interactive feel to go into the toilet.)

In other words... don't swap. If an application has to be swapped out, all
bets are off on response time. There are X events that REQUIRE the
application to be in memory if they are going to be handled. (example:
focus follows mouse, auto raise window on focus, app must redraw exposed
area... or worse: app grabs mouse to put it in the workspace on entry to a
status display. Guess what can happen to the mouse.)

> The new deadline I/O scheduler directly addresses this, and the ability to
> get "nice" to affect I/O priority is going to be a big win as well.  Andrea
> and Rik's VM work help here: rmap adds a lot of future tuning potential,
> such as the ability to make SWAP care about niceness (swap out pages from
> the nice+20 process before the nice-20 process).  The O(1) scheduler helps
> here by making niceness levels more meaningful in general.  All of these
> help X11 at nice level -10 to not stall.  The faster clock tick helps here
> too, the low  latency work at the start of 2.5 helps here, and preempt
> helps here. There has been a LOT of work on general latency improvement and
> interactive feel.

It will still stall everytime the mouse crosses the window border IF the
application has specified "enter/leave" event notification. This requires the
application to be swapped in to recieve the event. The only fix is locking
the application/X libraries into memory.

> Even the new threading work can potentially help X spin off a dedicated
> high-priority "update the mouse position, and manipulate window borders and
> z order, and never swap this thread out" thread.  (I remember the way OS/2
> used to cheat and give extra time slices to anything that got a
> Presentation Manager window event, so you could literally speed up your
> program on a loaded system by "scrubbing" the mouse across it repeatedly. 
> The resulting perception was a snappy desktop, whatever the reality was.)

Not really - the application may want the mouse pointer changed, update data
based on where the mouse is located (see what happens to a rule bar on
image/word processors). There is also the possibility that multiple processes
are watching the mouse.

The only "fix" that would help this out is to lock the X shared libraries and
X server into memory, and to use a multi-threaded X server, OR have
enough memory available to not swap.

The major difference between M$ window handling and X is that X gives the
users app control over what happens to the mouse. M$ has already defined
what the actions are, it is NOT up to the application. X does not implement
application policy. That is up to the application.

Even M$ Windows will lockup when it swaps out the application. The mouse
might move... but then the entire system hangs (at least under ME).

-- 
-------------------------------------------------------------------------
Jesse I Pollard, II
Email: pollard@navo.hpc.mil

Any opinions expressed are solely my own.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 13:56                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Jesse Pollard
@ 2002-10-07 14:03                           ` Rob Landley
  2002-10-08 22:14                             ` Jesse Pollard
  2002-10-09  8:17                             ` Alexander Kellett
  2002-10-07 18:22                           ` Daniel Phillips
  1 sibling, 2 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-07 14:03 UTC (permalink / raw)
  To: Jesse Pollard; +Cc: linux-kernel

On Monday 07 October 2002 09:56 am, Jesse Pollard wrote:

> In other words... don't swap.

"Don't swap this bit", anyway.

> If an application has to be swapped out, all
> bets are off on response time.

Alright, breaking the problem down into specific, bite-sized chunks, seeing 
what's easily measurable, and then picking the lowest hanging fruit:

The frequency of mouse pointer stalls, and the worst case response time, is 
probably something an automated benchmark could measure.  (Z-order's a 
tricker problem because the window manager's involved, but mouse stalls are 
EASY to cause.)

On my laptop (with 256 megs ram and 256 megs swap).  Open up 30 or 40 
konqueror windows of a "this page looks interesting, I'll read it offline" 
variety until memory's full and you're about 2/3 of the way into swap.  
(KTimeMon makes this easy to see.)  then do something swap-happy in the 
background (including downloading a huge file, which causes disk cache to 
grow and evict stuff, or of course running a big compile).

No matter how much ram the system has, with six desktops full of open windows 
I can usually drive it DEEP into swap, without even picking an easy target 
like star/openoffice.  (Yeah, KDE sucketh.  And X should be able to figure 
out that windows not currently being displayed at all (completely behind 
other windows, on another desktop, etc) can be swapped out.  But it's just 
not designed that way...)

> > Even the new threading work can potentially help X spin off a dedicated
> > high-priority "update the mouse position, and manipulate window borders
> > and z order, and never swap this thread out" thread.  (I remember the way
> > OS/2 used to cheat and give extra time slices to anything that got a
> > Presentation Manager window event, so you could literally speed up your
> > program on a loaded system by "scrubbing" the mouse across it repeatedly.
> > The resulting perception was a snappy desktop, whatever the reality was.)
>
> Not really - the application may want the mouse pointer changed, update
> data based on where the mouse is located (see what happens to a rule bar on
> image/word processors). There is also the possibility that multiple
> processes are watching the mouse.

You may notice that in mozilla when your rat moves over a link, the mouse 
pointer turns into a hand anywhere up to several seconds later on a 
pathologically loaded system.  This usually doesn't stop the pointer from 
moving if you just want to wander past the link and continue on.  "Tooltips" 
take two or three seconds to pop up, and this is a GOOD thing...

if the mouse movement stalls, you can't navigate with a nipple mouse or 
touchpad (which is all you get on a laptop), 'cause you'll overshoot. Having 
the button under the mouse highlight is secondary to being able to get the 
mouse over the button.

When the system isn't loaded anymore (went away while a compile finished or a 
file downloaded), you get one or two small (1/4 second) stalls as stuff swaps 
back in and then life is good.  It's when you swap stuff in and then it swaps 
back out after 3 seconds of inactivity that it gets to be a real pain 
(something the deadline I/O scheduler is supposed to help)...

Maybe the correct thing here is a user space fix, with X throwing certain 
event handlers into an mlocked shared library, just so your mouse pointer 
always updates smoothly.  But I do know a lot of work has gone into making 
more intelligent swapping decisions (fundamentally, that's all VM work really 
is), and it's certainly a heck of a lot better than the 2.4.6 days where you 
had to go get a beverage when it went swap-happy and it could be 30 seconds 
between pointer updates.

> Even M$ Windows will lockup when it swaps out the application. The mouse
> might move... but then the entire system hangs (at least under ME).

The amazing number of things windows manages to screw up should not be used 
to prevent discussiona about the small number of things they successfully 
copied from the macintosh.  :)

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07  9:18                                 ` Oliver Neukum
@ 2002-10-07 14:11                                   ` Jan Hudec
  2002-10-07 15:01                                     ` Jesse Pollard
  2002-10-07 15:15                                   ` Martin J. Bligh
  2002-10-08 13:49                                   ` Helge Hafting
  2 siblings, 1 reply; 206+ messages in thread
From: Jan Hudec @ 2002-10-07 14:11 UTC (permalink / raw)
  To: Oliver Neukum; +Cc: Helge Hafting, Martin J. Bligh, linux-kernel

On Mon, Oct 07, 2002 at 11:18:44AM +0200, Oliver Neukum wrote:
> On Monday 07 October 2002 10:08, Helge Hafting wrote:
> > "Martin J. Bligh" wrote:
> > > > Then there's the issue of application startup. There's not enough
> > > > read ahead. This is especially sad, as the order of page faults is
> > > > at least partially predictable.
> > >
> > > Is the problem really, fundamentally a lack of readahead in the
> > > kernel? Or is it that your application is huge bloated pig?
> >
> > Often the latter.  People getting interested in linux
> > seems to believe that openoffice is the msoffice replacement,
> > and that _is_ a huge bloated pig.  It needs 50M to start
> > the text editor - and lots of _cpu_.  It takes a long time
> > to start on a 266MHz machine even when the disk io
> > is avoided by the pagecahce.
> 
> OpenOffice _is_ an important application, whether we like it or not.
> 
> How does one measure and profile application startup other than with
> a stopwatch ? I'd like to gather some objective data on this.

Add some debuging output to the program (mainly at the very begining of
main) and then launch it with simple program that will note time right
before it forks and then wait for the application to output something
(which should be the debuging write at the start od main) and note time
it returned from select.

> > A snappy desktop is trivial with 2.5, even with a slow machine.
> > Just stay away from gnome and kde, use a ugly fast
> 
> A desktop machine needs to run a desktop enviroment. Only a window manager is 
> not enough.

Please, could someone explain to me, what is desktop enviroment in
addition to window manager and horde of libraries for UI and IPC.

(No, panel is not important thing and even if it were, it's a simple
fast application, providing it's implemented sanely (I mean, gnome panel
is currently buggy))

> > window manager like icewm or twm (and possibly lots
> > of others I haven't even heard about.)
> > X itself is snappy enough, particularly with increased
> > priority.
> > Take some care when selecting apps (yes - there is choice!)
> > and the desktop is just fine.  Openoffice is a nice
> > package of programs, but there are replacements for most
> > of them if speed is an issue.  If the machine is powerful
> > enough to run ms software snappy then speed probably
> > isn't such a big issue though.
> 
> KDE and friends _are_ not quite optimised for speed. That however doesn't 
> mean that the kernel should not make an effort to allow them to run as fast 
> as they can.

No, it does not.

-------------------------------------------------------------------------------
						 Jan 'Bulb' Hudec <bulb@ucw.cz>

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 14:11                                   ` Jan Hudec
@ 2002-10-07 15:01                                     ` Jesse Pollard
  2002-10-07 15:34                                       ` Jan Hudec
  0 siblings, 1 reply; 206+ messages in thread
From: Jesse Pollard @ 2002-10-07 15:01 UTC (permalink / raw)
  To: Jan Hudec, Oliver Neukum; +Cc: Helge Hafting, Martin J. Bligh, linux-kernel

On Monday 07 October 2002 09:11 am, Jan Hudec wrote:
> On Mon, Oct 07, 2002 at 11:18:44AM +0200, Oliver Neukum wrote:
> > On Monday 07 October 2002 10:08, Helge Hafting wrote:
[snip]
> >
> > How does one measure and profile application startup other than with
> > a stopwatch ? I'd like to gather some objective data on this.
>
> Add some debuging output to the program (mainly at the very begining of
> main) and then launch it with simple program that will note time right
> before it forks and then wait for the application to output something
> (which should be the debuging write at the start od main) and note time
> it returned from select.

nope... It has to be after input parameters have been evaluated, after
X window initialization has been done, and possibly after the application
windows are created. For a benchmark, it would likely be good to have
them at ALL such locations. Even on exit (how long does it take to
cleanup?).

> > > A snappy desktop is trivial with 2.5, even with a slow machine.
> > > Just stay away from gnome and kde, use a ugly fast
> >
> > A desktop machine needs to run a desktop enviroment. Only a window
> > manager is not enough.
>
> Please, could someone explain to me, what is desktop enviroment in
> addition to window manager and horde of libraries for UI and IPC.
>
> (No, panel is not important thing and even if it were, it's a simple
> fast application, providing it's implemented sanely (I mean, gnome panel
> is currently buggy))

The applications that USE that horde of libraries that must be running.
Otherwise, a blank screen would have been considered sufficient. Some
of these applications are: tool chest (sometimes part of a WM), multiple
desktop support (usually part of the WM, but not necessarily), WP or
other applications activated - depending on what the user wants.

What you end up having to do is define what the base desktop is
required to have to be considered "functional", and the amount of
time available for the desktop to be ready for use. I've even seen
M$ windows with 50-75 icons already present. Until they are initialized
the user didn't consider the system "usable". And that took several minutes
on an 800 MHZ system. During some of that setup the mouse was just
unusable (frozen) or it would jump around trying to catch up with the
users activity.

The other part of "usable" is how long it takes for an application to
"start". A simple fork/exec is quite fast. But that isn't a "started" 
application. A responsive system means that the time between
the selection of the application to the time the user can enter data
(ie. make a menu selection/start typing) is as short as possible. The
users desire is about 1/4th of a second. With a large number of applications, 
this activity requires a LOT of swap in code. Not something done fast.

One way some systems used to do this is to guarantee a MINIMUM of
50-100K of the application to be loaded BEFORE a context switch
to the application is done. Of course, this assumes that all of the 
initialization code can actually FIT in the first 100K. Usually it doesn't
because a lot of that initialization is for general runtime support and X
library initialization. Hopefully, this is already loaded and resident by a
pre-existing application (the window manger). Unfortunately, the WM
initialization may have already been swapped out. and some of the X
libraries too.

The only solution for this is to not swap out at all, and have enough
memory for everything. Which is also the first recommendation to
improve M$ Windows performance. (got that one when a laptop
was alread maxed out "... not enough resources, why don't you
get some more memory...")

-- 
-------------------------------------------------------------------------
Jesse I Pollard, II
Email: pollard@navo.hpc.mil

Any opinions expressed are solely my own.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07  9:18                                 ` Oliver Neukum
  2002-10-07 14:11                                   ` Jan Hudec
@ 2002-10-07 15:15                                   ` Martin J. Bligh
  2002-10-08 13:49                                   ` Helge Hafting
  2 siblings, 0 replies; 206+ messages in thread
From: Martin J. Bligh @ 2002-10-07 15:15 UTC (permalink / raw)
  To: Oliver Neukum, Helge Hafting, linux-kernel

> OpenOffice _is_ an important application, whether we like it or not.
> 
> How does one measure and profile application startup other than with
> a stopwatch ? I'd like to gather some objective data on this.

I suggest a slightly (not a lot) more sophisticated stopwatch ...

Use -mm kernels, that's where the latest vm stuff is
http://www.zipworld.com.au/~akpm/linux/patches/2.5/2.5.40/2.5.40-mm2/
and Andrew is normally wonderfully responsive to clear data from 
profiles (see oprofile below)

Then either use strace with the time option on it (-t?), or:

1. use oprofile (grab from akpm's site:
http://www.zipworld.com.au/~akpm/linux/patches/2.5/2.5.40/2.5.40-mm2/experimental/), and boot with idle=poll

2. in one window type the command to stop the oprofile stuff, but
don't press return (something like "op_stop > /dev/linux")

3. In another window do:

rm -rf /var/lib/oprofile

op_start --vmlinux=/boot/vmlinux --map-file=/boot/System.map --ctr0-event=CPU_CLK_UNHALTED --ctr0-count=300000 > /dev/null

my_application

4. When your app finishes starting, hit return in that first window.

5. oprofpp -dl -i /boot/vmlinux  > data_dumpy_place.

Examine output.
Or something along those lines. Not very sophisticated, but that's
what I'd do I guess (what does that say? ;-))

M.

PS. Actually the combination of an strace and profile might be most
meaningful (though you might want to do them seperately ... make
sure the cache is either cold or warm both times, not one of each).


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 15:01                                     ` Jesse Pollard
@ 2002-10-07 15:34                                       ` Jan Hudec
  2002-10-08  3:12                                         ` [OT] " Scott Mcdermott
  0 siblings, 1 reply; 206+ messages in thread
From: Jan Hudec @ 2002-10-07 15:34 UTC (permalink / raw)
  To: Jesse Pollard; +Cc: Oliver Neukum, Helge Hafting, Martin J. Bligh, linux-kernel

On Mon, Oct 07, 2002 at 10:01:22AM -0500, Jesse Pollard wrote:
> On Monday 07 October 2002 09:11 am, Jan Hudec wrote:
> > On Mon, Oct 07, 2002 at 11:18:44AM +0200, Oliver Neukum wrote:
> > > On Monday 07 October 2002 10:08, Helge Hafting wrote:
> [snip]
> > >
> > > How does one measure and profile application startup other than with
> > > a stopwatch ? I'd like to gather some objective data on this.
> >
> > Add some debuging output to the program (mainly at the very begining of
> > main) and then launch it with simple program that will note time right
> > before it forks and then wait for the application to output something
> > (which should be the debuging write at the start od main) and note time
> > it returned from select.
> 
> nope... It has to be after input parameters have been evaluated, after
> X window initialization has been done, and possibly after the application
> windows are created. For a benchmark, it would likely be good to have
> them at ALL such locations. Even on exit (how long does it take to
> cleanup?).

Well, depends on what we want to measure. If it's on the begining of
main, it measures library loading time. Then argument parsing, library
initialization, X initialization etc. can be measured. All those parts
should be timed so we can see where most time is spent and which can be
sped up.

> > > > A snappy desktop is trivial with 2.5, even with a slow machine.
> > > > Just stay away from gnome and kde, use a ugly fast
> > >
> > > A desktop machine needs to run a desktop enviroment. Only a window
> > > manager is not enough.
> >
> > Please, could someone explain to me, what is desktop enviroment in
> > addition to window manager and horde of libraries for UI and IPC.
> >
> > (No, panel is not important thing and even if it were, it's a simple
> > fast application, providing it's implemented sanely (I mean, gnome panel
> > is currently buggy))
> 
> The applications that USE that horde of libraries that must be running.
> Otherwise, a blank screen would have been considered sufficient. Some
> of these applications are: tool chest (sometimes part of a WM), multiple
> desktop support (usually part of the WM, but not necessarily), WP or
> other applications activated - depending on what the user wants.

Tool chest definitely does not need most of the horde of libraries. And
it's part of most window managers (except sawfish and icewm(?))
Multiple desktop support is _the_ windowmanager. I asked what in
	addition to window manager.
Application is application using the desktop enviroment.

Thus we come back to that desktop enviroment is only a window manager
(which either provides toolchest or uses separate process to do it, but
that process does not have to be that much complicated) and a horde of
libraries for applications to cooperate together well. Some basic
application must of course be there, like a file manager.

> What you end up having to do is define what the base desktop is
> required to have to be considered "functional", and the amount of
> time available for the desktop to be ready for use. I've even seen
> M$ windows with 50-75 icons already present. Until they are initialized
> the user didn't consider the system "usable". And that took several minutes
> on an 800 MHZ system. During some of that setup the mouse was just
> unusable (frozen) or it would jump around trying to catch up with the
> users activity.

And each of them was redrawn three times during the setup...
unfortunately gnome is not far from there too.

> The other part of "usable" is how long it takes for an application to
> "start". A simple fork/exec is quite fast. But that isn't a "started" 
> application. A responsive system means that the time between
> the selection of the application to the time the user can enter data
> (ie. make a menu selection/start typing) is as short as possible. The
> users desire is about 1/4th of a second. With a large number of applications, 
> this activity requires a LOT of swap in code. Not something done fast.

Here the larger the horde of libraries used is and the larger
individual libraries in it are, the worse.

> One way some systems used to do this is to guarantee a MINIMUM of
> 50-100K of the application to be loaded BEFORE a context switch
> to the application is done. Of course, this assumes that all of the 
> initialization code can actually FIT in the first 100K. Usually it doesn't
> because a lot of that initialization is for general runtime support and X
> library initialization. Hopefully, this is already loaded and resident by a
> pre-existing application (the window manger). Unfortunately, the WM
> initialization may have already been swapped out. and some of the X
> libraries too.
> 
> The only solution for this is to not swap out at all, and have enough
> memory for everything. Which is also the first recommendation to
> improve M$ Windows performance. (got that one when a laptop
> was alread maxed out "... not enough resources, why don't you
> get some more memory...")

Well, one of worst part is loading that horde of libraries in memory.
When you take a typical gnome application, the dynamic linker has quite
hard time there, because it must at least locate all of them and mmap
them. And must do that recursively for all the dependencied (fortunately
it can use cache the ld.cache where dependencies are listed). With
many gnome applications, many of these libraries will never be used or
will be used for just one or two functions, only once ... but they are
all mmaped, which means opened, which means looked up.

So what could help quite a lot would be to try hard to make as many
things as possible lazy (both in dynamic linker and in initialization of
all those libraries).

-------------------------------------------------------------------------------
						 Jan 'Bulb' Hudec <bulb@ucw.cz>

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-06 15:19                             ` Martin J. Bligh
  2002-10-06 15:14                               ` Oliver Neukum
  2002-10-07  8:08                               ` Helge Hafting
@ 2002-10-07 17:43                               ` Daniel Phillips
  2002-10-07 18:31                                 ` Andrew Morton
  2 siblings, 1 reply; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 17:43 UTC (permalink / raw)
  To: Martin J. Bligh, Oliver Neukum, Andrew Morton, Rob Landley
  Cc: Linus Torvalds, linux-kernel

On Sunday 06 October 2002 17:19, Martin J. Bligh wrote:
> > Then there's the issue of application startup. There's not enough
> > read ahead. This is especially sad, as the order of page faults is 
> > at least partially predictable.
> 
> Is the problem really, fundamentally a lack of readahead in the
> kernel? Or is it that your application is huge bloated pig? 

Readahead isn't the only problem, but it is a huge problem.  The current 
readahead model is per-inode, which is very little help with lots of small 
files, especially if they are fragmented or out of order.  There are various 
ways to fix this; they are all difficult[1].  Fortunately, we can call this 
"tuning work" so it can be done during the stable series.

[1] We could teach each filesystem how to read ahead across directories, or 
we could teach the vfs how to do physical readahead.  Choose your poison.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 13:56                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Jesse Pollard
  2002-10-07 14:03                           ` Rob Landley
@ 2002-10-07 18:22                           ` Daniel Phillips
  2002-10-08  8:19                             ` Jan Hudec
  1 sibling, 1 reply; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 18:22 UTC (permalink / raw)
  To: Jesse Pollard, Rob Landley, Linus Torvalds, Martin J. Bligh; +Cc: linux-kernel

On Monday 07 October 2002 15:56, Jesse Pollard wrote:
> [the mouse] will still stall everytime the mouse crosses the window border IF the
> application has specified "enter/leave" event notification. This requires the
> application to be swapped in to recieve the event. The only fix is locking
> the application/X libraries into memory.

That one could be punted with an hourglass cursor, until the events start flowing.
Well.  Not sure how much this has to do with the kernel...

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 17:43                               ` Daniel Phillips
@ 2002-10-07 18:31                                 ` Andrew Morton
  2002-10-07 18:51                                   ` Linus Torvalds
                                                     ` (3 more replies)
  0 siblings, 4 replies; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 18:31 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Martin J. Bligh, Oliver Neukum, Rob Landley, Linus Torvalds,
	linux-kernel

Daniel Phillips wrote:
> 
> On Sunday 06 October 2002 17:19, Martin J. Bligh wrote:
> > > Then there's the issue of application startup. There's not enough
> > > read ahead. This is especially sad, as the order of page faults is
> > > at least partially predictable.
> >
> > Is the problem really, fundamentally a lack of readahead in the
> > kernel? Or is it that your application is huge bloated pig?
> 
> Readahead isn't the only problem, but it is a huge problem.  The current
> readahead model is per-inode, which is very little help with lots of small
> files, especially if they are fragmented or out of order.  There are various
> ways to fix this; they are all difficult[1].  Fortunately, we can call this
> "tuning work" so it can be done during the stable series.
> 
> [1] We could teach each filesystem how to read ahead across directories, or
> we could teach the vfs how to do physical readahead.  Choose your poison.

Devices do physical readahead, and it works nicely.

Go into ext2_new_inode, replace the call to find_group_dir with
find_group_other.  Then untar a kernel tree, unmount the fs,
remount it and see how long it takes to do a

	`find . -type f  xargs cat > /dev/null'

on that tree.  If your disk is like my disk, you will achieve
full disk bandwidth.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:31                                 ` Andrew Morton
@ 2002-10-07 18:51                                   ` Linus Torvalds
  2002-10-07 20:14                                     ` Alan Cox
  2002-10-07 18:58                                   ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Chris Friesen
                                                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 18:51 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Daniel Phillips, Martin J. Bligh, Oliver Neukum, Rob Landley,
	linux-kernel


On Mon, 7 Oct 2002, Andrew Morton wrote:
> 
> Devices do physical readahead, and it works nicely.

Indeed. There isn't any reasonable device where this isn't the case: the
_device_ (and sometimes the driver - floppy.c) does a lot better at
readahead than higher layers can do anyway.

> Go into ext2_new_inode, replace the call to find_group_dir with
> find_group_other.

I hate that thing. Hate hate hate. Maybe we should just do this, and hope 
that somebody will do a proper off-line cleanup tool.

In the meantime, it might just be possible to take a look at the uid, and 
if the uid matches use find_group_other, but for non-matching uids use 
find_group_dir. That gives a "compact for same users, distribute for 
different users" heuristic, which might be acceptable for normal use (and 
the theoretical cleanup tool could fix it up).

Add some other heuristics ("if the difference between free group sizes is 
bigger than a factor of two"), and maybe it would be useful.

The current approach sucks for everybody, and makes it impossible to get 
good throughput on a disk on many very common loads.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:31                                 ` Andrew Morton
  2002-10-07 18:51                                   ` Linus Torvalds
@ 2002-10-07 18:58                                   ` Chris Friesen
  2002-10-07 19:21                                     ` Daniel Phillips
  2002-10-07 19:36                                     ` Andrew Morton
  2002-10-07 19:05                                   ` Daniel Phillips
  2002-10-30 18:26                                   ` Lee Leahu
  3 siblings, 2 replies; 206+ messages in thread
From: Chris Friesen @ 2002-10-07 18:58 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Daniel Phillips, Martin J. Bligh, Oliver Neukum, Rob Landley,
	Linus Torvalds, linux-kernel

Andrew Morton wrote:

> Go into ext2_new_inode, replace the call to find_group_dir with
> find_group_other.  Then untar a kernel tree, unmount the fs,
> remount it and see how long it takes to do a
> 
> 	`find . -type f  xargs cat > /dev/null'
> 
> on that tree.  If your disk is like my disk, you will achieve
> full disk bandwidth.

Pardon my ignorance, but what's the difference between find_group_dir 
and find_group_other, and why aren't we using find_group_other already 
if its so much faster?

Chris


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:31                                 ` Andrew Morton
  2002-10-07 18:51                                   ` Linus Torvalds
  2002-10-07 18:58                                   ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Chris Friesen
@ 2002-10-07 19:05                                   ` Daniel Phillips
  2002-10-07 19:24                                     ` Linus Torvalds
  2002-10-30 18:26                                   ` Lee Leahu
  3 siblings, 1 reply; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 19:05 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Martin J. Bligh, Oliver Neukum, Rob Landley, Linus Torvalds,
	linux-kernel

On Monday 07 October 2002 20:31, Andrew Morton wrote:
> Daniel Phillips wrote:
> > [1] We could teach each filesystem how to read ahead across directories, or
> > we could teach the vfs how to do physical readahead.  Choose your poison.
> 
> Devices do physical readahead, and it works nicely.

Devices have a few MB of readahead cache, the kernel can have thousands of
times as much.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:58                                   ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Chris Friesen
@ 2002-10-07 19:21                                     ` Daniel Phillips
  2002-10-07 19:35                                       ` Linus Torvalds
  2002-10-07 19:36                                     ` Andrew Morton
  1 sibling, 1 reply; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 19:21 UTC (permalink / raw)
  To: Chris Friesen, Andrew Morton
  Cc: Martin J. Bligh, Oliver Neukum, Rob Landley, Linus Torvalds,
	linux-kernel

On Monday 07 October 2002 20:58, Chris Friesen wrote:
> Andrew Morton wrote:
> 
> > Go into ext2_new_inode, replace the call to find_group_dir with
> > find_group_other.  Then untar a kernel tree, unmount the fs,
> > remount it and see how long it takes to do a
> > 
> > 	`find . -type f  xargs cat > /dev/null'
> > 
> > on that tree.  If your disk is like my disk, you will achieve
> > full disk bandwidth.
> 
> Pardon my ignorance, but what's the difference between find_group_dir 
> and find_group_other, and why aren't we using find_group_other already 
> if its so much faster?

These are the heuristics that determine where in the volume directory
inodes are allocated:

   http://lxr.linux.no/source/fs/ext2/ialloc.c#L221

Ext2 likes to spread directory inodes around the volume so that there is
room to keep the associated file blocks nearby.  This interacts rather
poorly with readahead.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 19:05                                   ` Daniel Phillips
@ 2002-10-07 19:24                                     ` Linus Torvalds
  2002-10-07 20:02                                       ` Daniel Phillips
  0 siblings, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 19:24 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel


On Mon, 7 Oct 2002, Daniel Phillips wrote:
> 
> Devices have a few MB of readahead cache, the kernel can have thousands of
> times as much.

I don't think that is in the least realistic.

There's _no_ way that the krenel could do physical readahead for more than
a few tens or hundreds of kB - the latency impact would just be too much
to handle, and the VM impact is not likely insignificant either.

So the device readahead is _not_ noticeably smaller than what the kernel
can reasonably do, and it does a better job of it (ie disks can fill track
buffers optimally, depending on where the head hits etc).

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 19:21                                     ` Daniel Phillips
@ 2002-10-07 19:35                                       ` Linus Torvalds
  2002-10-08  0:39                                         ` Theodore Ts'o
  0 siblings, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 19:35 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Chris Friesen, Andrew Morton, Martin J. Bligh, Oliver Neukum,
	Rob Landley, linux-kernel


On Mon, 7 Oct 2002, Daniel Phillips wrote:
> 
> Ext2 likes to spread directory inodes around the volume so that there is
> room to keep the associated file blocks nearby.  This interacts rather
> poorly with readahead.

Not a read-ahead problem. It interacts rather poory _full_stop_.

It means that the inode tables are spread all out, the bitmaps are
fragmented etc, so the disk head has to move all over the disk even when
only working with one directory tree like the kernel sources.

Kernel-level read-ahead doens't much help, because the FS tries to keep
the data blocks for individual files together - which is the case the
kernel _can_ try to optimize a bit. Physical read-ahead doesn't work
either, since the parts that can be physically read ahead are the ones
that the regular in-file read-ahead already mostly takes care of it.

So the problem with spreading stuff out doesn't have anything to do with 
read-ahead, and has everything to do with the basic issue of BAD LOCALITY. 
Locality is _good_, independently of read-ahead and independently of 
medium. 

Locality helps regardless of any read-ahead, although it is clearly true 
that bad locality makes readahead more futile.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 18:58                                   ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Chris Friesen
  2002-10-07 19:21                                     ` Daniel Phillips
@ 2002-10-07 19:36                                     ` Andrew Morton
  2002-10-08  2:36                                       ` Simon Kirby
  2002-10-08 13:54                                       ` Helge Hafting
  1 sibling, 2 replies; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 19:36 UTC (permalink / raw)
  To: Chris Friesen
  Cc: Daniel Phillips, Martin J. Bligh, Oliver Neukum, Rob Landley,
	Linus Torvalds, linux-kernel

Chris Friesen wrote:
> 
> Andrew Morton wrote:
> 
> > Go into ext2_new_inode, replace the call to find_group_dir with
> > find_group_other.  Then untar a kernel tree, unmount the fs,
> > remount it and see how long it takes to do a
> >
> >       `find . -type f  xargs cat > /dev/null'
> >
> > on that tree.  If your disk is like my disk, you will achieve
> > full disk bandwidth.
> 
> Pardon my ignorance, but what's the difference between find_group_dir
> and find_group_other, and why aren't we using find_group_other already
> if its so much faster?
> 

ext2 and ext3 filesystems are carved up into "block groups", aka
"cylinder groups".  Each one is 4096*8 blocks - typically 128 MB.
So you can easily have hundreds of blockgroups on a single partition.

The inode allocator is designed to arrange that files which are within the
same directory fall in the same blockgroup, for locality of reference.

But new directories are placed "far away", in block groups which have
plenty of free space.  (find_group_dir -> find a blockgroup for a
directory).

The thinking here is that files in a separate directory are related,
and files in different directories are unrelated.  So we can take 
advantage of that heuristic - go and use a new blockgroup each time
a new directory is created.  This is a levelling algorithm which
tries to keep all blockgroups at a similar occupancy level.
That's a good thing, because high occupancy levels lead to fragmentation.

find_group_other() is basically first-fit-from-start-of-disk, and
if we use that for directories as well as files, your untar-onto-a-clean-disk
simply lays everything out in a contiguous chunk.

Part of the problem here is that it has got worse over time.  The
size of a blockgroup is hardwired to blocksize*bits-in-a-byte*blocksize.
But disks keep on getting bigger.  Five years ago (when, presumably, this
algorithm was designed), a typical partition had, what?  Maybe four
blockgroups?  Now it has hundreds, and so the "levelling" is levelling
across hundreds of blockgroups and not just a handful.

I did a lot of work on this back in November 2001, mainly testing
with a trace-based workload from Keith Smith.  See
http://www.eecs.harvard.edu/~keith/usenix.1995.html

Al Viro wrote a modified allocator (which nobody understood ;))
based on Orlov's algorithm.

I ended up concluding that the current (sucky) code is indeed
best for minimising long-term fragmentation under slow-growth
scenarios.  And worst for fast-growth.

Orlov was in between on both.

Simply nuking find_group_dir() was best for fast-growth, worst
for slow-growth.

Block allocators are fertile grounds for academic papers.  It's
complex.  There is a risk that you can do something which is
cool in testing, but ends up exploding horridly after a year's
use.  By which time we have ten million deployed systems running like
dogs, damn all we can do about it.

The best solution is to use first-fit and online defrag to fix the
long-term fragmentation.  It really is.  There has been no appreciable
progress on this.

A *practical* solution is to keep a spare partition empty and do
a `cp -a' from one partition onto another once per week and
swizzle the mountpoints.  Because the big copy will unfragment
everything.

ho-hum.  I shall forward-port Orlov, and again attempt to understand
it ;)

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 19:24                                     ` Linus Torvalds
@ 2002-10-07 20:02                                       ` Daniel Phillips
  2002-10-07 20:14                                         ` Andrew Morton
  2002-10-07 20:28                                         ` Linus Torvalds
  0 siblings, 2 replies; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 20:02 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel

On Monday 07 October 2002 21:24, Linus Torvalds wrote:
> On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > 
> > Devices have a few MB of readahead cache, the kernel can have thousands of
> > times as much.
> 
> I don't think that is in the least realistic.
> 
> There's _no_ way that the krenel could do physical readahead for more than
> a few tens or hundreds of kB

If that's a bet, I'll take you up on it.

> - the latency impact would just be too much
> to handle, and the VM impact is not likely insignificant either.

I did say difficult.  It really is, but there are big gains to be had.

This is easy to verify: say you have 100 MB of kernel source stored in, say,
50 different clumps on disk.  Complete with seeks, a perfectly prescient
readahead algorithm can read that into memory in about 5 seconds, even with
my lame scsi raid controller[1].  So two of those needs 10 seconds, and I
can diff those two trees in 2 seconds, in cache.  In practice it takes 90
seconds, so there is obviously a lot of room for improvement.

Note that if the disks really were capable of handling the readahead
themselves they would already give me the 12 second result, not the 90
seconds.  They simply can't, because they haven't got enough cache.

[1] If the controller wasn't lame it would read the 100 MB in less than a
second, with its (peak) total of 200 MB/s media bandwith, less 20% worth
of parity blocks.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 20:02                                       ` Daniel Phillips
@ 2002-10-07 20:14                                         ` Andrew Morton
  2002-10-07 20:22                                           ` Daniel Phillips
  2002-10-07 20:28                                         ` Linus Torvalds
  1 sibling, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 20:14 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Linus Torvalds, Martin J. Bligh, Oliver Neukum, Rob Landley,
	linux-kernel

Daniel Phillips wrote:
> 
> This is easy to verify: say you have 100 MB of kernel source stored in, say,
> 50 different clumps on disk.

Disks use segmentation on their readahead buffers.  Typically four-way.
So they will only buffer four different chunks of disk at a time.

If you're reading from 50 different places on disk, the disk keeps
invalidating readahead at the segment level.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:51                                   ` Linus Torvalds
@ 2002-10-07 20:14                                     ` Alan Cox
  2002-10-07 20:31                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
  2002-10-07 20:44                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Linus Torvalds
  0 siblings, 2 replies; 206+ messages in thread
From: Alan Cox @ 2002-10-07 20:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linux Kernel Mailing List

On Mon, 2002-10-07 at 19:51, Linus Torvalds wrote:
> In the meantime, it might just be possible to take a look at the uid, and 
> if the uid matches use find_group_other, but for non-matching uids use 
> find_group_dir. That gives a "compact for same users, distribute for 
> different users" heuristic, which might be acceptable for normal use (and 
> the theoretical cleanup tool could fix it up).

Factoring the uid/gid/pid in actually may help in other ways. If we are
doing it by pid or by uid we will reduce the interleave of multiple
files thing you sometimes get


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 20:14                                         ` Andrew Morton
@ 2002-10-07 20:22                                           ` Daniel Phillips
  0 siblings, 0 replies; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 20:22 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Martin J. Bligh, Oliver Neukum, Rob Landley,
	linux-kernel

On Monday 07 October 2002 22:14, Andrew Morton wrote:
> Daniel Phillips wrote:
> > 
> > This is easy to verify: say you have 100 MB of kernel source stored in, say,
> > 50 different clumps on disk.
> 
> Disks use segmentation on their readahead buffers.  Typically four-way.
> So they will only buffer four different chunks of disk at a time.
> 
> If you're reading from 50 different places on disk, the disk keeps
> invalidating readahead at the segment level.

Sure, and kernel-based physical readahead would not have that problem.
(Kernel-based physical readahead has its own problems, for example: how
do you determine that a given physical block is already cached in an
inode and so should be ignored as a readahead candidate?)

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 20:02                                       ` Daniel Phillips
  2002-10-07 20:14                                         ` Andrew Morton
@ 2002-10-07 20:28                                         ` Linus Torvalds
  2002-10-07 21:16                                           ` Daniel Phillips
  1 sibling, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 20:28 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel


On Mon, 7 Oct 2002, Daniel Phillips wrote:
> 
> If that's a bet, I'll take you up on it.

Sure. The mey is:
 - we can more easily fix the f*cking filesystems to be sane
 - then trying to add prescient read-ahead to the kernel

In other words, trying to do an impossibly good job on read-ahead is 
_stupid_, when the real problem is that ext2 lays out files in total crap 
ways. 

> I did say difficult.  It really is, but there are big gains to be had.

But why do the horribly stupid thing, when Andrew has already shown that a
one-liner change to ext2/3 gives you platter speeds (and better speeds
than your approach _can_ get, since you still are going to end up seeking
a lot, even if you can make your read-ahead prescient).

In other words, you're overcompensating.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 -   (NUMA))
  2002-10-07 20:14                                     ` Alan Cox
@ 2002-10-07 20:31                                       ` Andrew Morton
  2002-10-07 20:46                                         ` Linus Torvalds
  2002-10-07 20:44                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Linus Torvalds
  1 sibling, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 20:31 UTC (permalink / raw)
  To: Alan Cox
  Cc: Linus Torvalds, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linux Kernel Mailing List

Alan Cox wrote:
> 
> On Mon, 2002-10-07 at 19:51, Linus Torvalds wrote:
> > In the meantime, it might just be possible to take a look at the uid, and
> > if the uid matches use find_group_other, but for non-matching uids use
> > find_group_dir. That gives a "compact for same users, distribute for
> > different users" heuristic, which might be acceptable for normal use (and
> > the theoretical cleanup tool could fix it up).
> 
> Factoring the uid/gid/pid in actually may help in other ways. If we are
> doing it by pid or by uid we will reduce the interleave of multiple
> files thing you sometimes get

Yes, that would help on multiuser setups.  Delayed allocation is
a great fix for that problem though.

The other obvious heuristic is "if the parent directory was
created in the last five seconds, use find_group_other()".  But
that made Linus go "ewwww".

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 20:14                                     ` Alan Cox
  2002-10-07 20:31                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
@ 2002-10-07 20:44                                       ` Linus Torvalds
  2002-10-07 21:16                                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
  1 sibling, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 20:44 UTC (permalink / raw)
  To: Alan Cox
  Cc: Andrew Morton, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linux Kernel Mailing List


On 7 Oct 2002, Alan Cox wrote:
> 
> Factoring the uid/gid/pid in actually may help in other ways. If we are
> doing it by pid or by uid we will reduce the interleave of multiple
> files thing you sometimes get

'pid' would probably work better than what we have now, even though I bet
it would get confused by a large number of installers (ie "make install"
in just about any project will use multiple different processes to copy
over separate subdirectories. In the X11R6 tree it uses individual "cp"
processes for each file!)

The session ID would avoid some of that, but they both have a fundamental
problem: neither pid nor session ID is actually saved in any directory
structure, so it's quite hard to use that as a heuristic for whether a new
file should go into the same directory group as the directory it is
created in.

That's why "uid" would work better. The uid has a different issue, though,
namely the fact that when user directories are created, they are basically
always created as uid 0 first, and then a "chown" - which means that the
user heuristic wouldn't actually trigger at the right time. So the
heuristic couldn't be just "newfile->uid == directory->uid", it would have
to be something better.

I think last time we had the discussion, time-based things were also felt 
were good heuristics in many cases..

It could also be good to have an additional static hint on whether
directories should be spread out or not. Administrators could set the
"spread out" bit on the /, /home and /var/spool/(news|mail) directories,
for example, causing those to spread out their subdirectories. but not
causing normal user activity to do so.

Yeah, yeah, I know there are papers on this. I don't care. I think 
something has to be done, and last time the discussion petered out at 
about this point.

			Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 -   (NUMA))
  2002-10-07 20:31                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
@ 2002-10-07 20:46                                         ` Linus Torvalds
  0 siblings, 0 replies; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 20:46 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alan Cox, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linux Kernel Mailing List


On Mon, 7 Oct 2002, Andrew Morton wrote:
> 
> The other obvious heuristic is "if the parent directory was
> created in the last five seconds, use find_group_other()".  But
> that made Linus go "ewwww".

Well, it makes me go "less ewww" than the current scheme, so if that turns 
out to be acceptable to others, I won't mind _too_ much.

The reason I don't like time too much persoanlly is that it's not very 
reproducible. Especially if the times are in the second range. I'd rather 
have a heuristic that is deterministic.

		Linus


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 -   (NUMA))
  2002-10-07 20:44                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Linus Torvalds
@ 2002-10-07 21:16                                         ` Andrew Morton
  2002-10-07 23:47                                           ` jw schultz
  2002-10-11  0:02                                           ` Mike Fedyk
  0 siblings, 2 replies; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 21:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Alan Cox, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linux Kernel Mailing List

Linus Torvalds wrote:
> 
> On 7 Oct 2002, Alan Cox wrote:
> >
> > Factoring the uid/gid/pid in actually may help in other ways. If we are
> > doing it by pid or by uid we will reduce the interleave of multiple
> > files thing you sometimes get
> 
> 'pid' would probably work better than what we have now, even though I bet
> it would get confused by a large number of installers (ie "make install"
> in just about any project will use multiple different processes to copy
> over separate subdirectories. In the X11R6 tree it uses individual "cp"
> processes for each file!)
> 
> The session ID would avoid some of that, but they both have a fundamental
> problem: neither pid nor session ID is actually saved in any directory
> structure, so it's quite hard to use that as a heuristic for whether a new
> file should go into the same directory group as the directory it is
> created in.
> 
> That's why "uid" would work better.

Sound good to me.  At leat this puts a veneer of respectability over
decapitating find_group_other(), which is really what we all want
to do anyway ;)

> The uid has a different issue, though,
> namely the fact that when user directories are created, they are basically
> always created as uid 0 first, and then a "chown" - which means that the
> user heuristic wouldn't actually trigger at the right time. So the
> heuristic couldn't be just "newfile->uid == directory->uid", it would have
> to be something better.

Last time, Al suggested that we always use the find_group_other() approach
if the directory is being made at the top-level of the filesystem.  So
if /home is a mountpoint, the user directories get spread out.

I think this, and the UID comparison will be good enough.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 20:28                                         ` Linus Torvalds
@ 2002-10-07 21:16                                           ` Daniel Phillips
  2002-10-07 21:55                                             ` Linus Torvalds
  2002-10-07 22:14                                             ` Charles Cazabon
  0 siblings, 2 replies; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 21:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel

On Monday 07 October 2002 22:28, Linus Torvalds wrote:
> On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > 
> > If that's a bet, I'll take you up on it.
> 
> Sure. The mey is:
            ^^^ <---- "bet" ?
>  - we can more easily fix the f*cking filesystems to be sane
>  - then trying to add prescient read-ahead to the kernel

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 21:16                                           ` Daniel Phillips
@ 2002-10-07 21:55                                             ` Linus Torvalds
  2002-10-07 22:02                                               ` Daniel Phillips
  2002-10-07 22:14                                             ` Charles Cazabon
  1 sibling, 1 reply; 206+ messages in thread
From: Linus Torvalds @ 2002-10-07 21:55 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel


On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > 
> > Sure. The mey is:
>             ^^^ <---- "bet" ?

Yeah. What the heck happened to my fingers?

		Linus "spastic" Torvalds


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 21:55                                             ` Linus Torvalds
@ 2002-10-07 22:02                                               ` Daniel Phillips
  2002-10-07 22:12                                                 ` Andrew Morton
  0 siblings, 1 reply; 206+ messages in thread
From: Daniel Phillips @ 2002-10-07 22:02 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Martin J. Bligh, Oliver Neukum, Rob Landley, linux-kernel

On Monday 07 October 2002 23:55, Linus Torvalds wrote:
> On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > > 
> > > Sure. The mey is:
> >             ^^^ <---- "bet" ?
> 
> Yeah. What the heck happened to my fingers?

Apparently, one of them missed the key it was aiming for and the other one
changed hands.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 22:02                                               ` Daniel Phillips
@ 2002-10-07 22:12                                                 ` Andrew Morton
  2002-10-08  8:49                                                   ` Padraig Brady
  0 siblings, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-07 22:12 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Linus Torvalds, Martin J. Bligh, Oliver Neukum, Rob Landley,
	linux-kernel

Daniel Phillips wrote:
> 
> On Monday 07 October 2002 23:55, Linus Torvalds wrote:
> > On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > > >
> > > > Sure. The mey is:
> > >             ^^^ <---- "bet" ?
> >
> > Yeah. What the heck happened to my fingers?
> 
> Apparently, one of them missed the key it was aiming for and the other one
> changed hands.
> 

They don't call him Kubys for nothing.

I dug out and dusted off Al's Orlov allocator patch.  And found
a comment which rather helps explain how it works.

I performance tested this back in November.  See
http://www.uwsg.iu.edu/hypermail/linux/kernel/0111.1/0281.html

Bottom line: it's as good as the use-first-fit-everywhere
approach, and appears to have better long-term antifragmentation
characteristics.

I shall test it.


 fs/ext2/ext2.h             |    1 
 fs/ext2/ialloc.c           |  164 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext2/super.c            |    8 ++
 include/linux/ext2_fs_sb.h |    2 
 4 files changed, 172 insertions(+), 3 deletions(-)

--- 2.5.41/fs/ext2/ialloc.c~orlov-allocator	Mon Oct  7 14:31:50 2002
+++ 2.5.41-akpm/fs/ext2/ialloc.c	Mon Oct  7 15:04:09 2002
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/random.h>
 
 /*
  * ialloc.c contains the inodes allocation and deallocation routines
@@ -205,6 +206,7 @@ static void ext2_preread_inode(struct in
  * For other inodes, search forward from the parent directory\'s block
  * group to find a free inode.
  */
+#if 0
 
 static int find_group_dir(struct super_block *sb, int parent_group)
 {
@@ -238,9 +240,141 @@ static int find_group_dir(struct super_b
 	mark_buffer_dirty(best_bh);
 	return best_group;
 }
+#endif
+
+/* 
+ * Orlov's allocator for directories. 
+ * 
+ * We always try to spread first-level directories.
+ *
+ * If there are blockgroups with both free inodes and free blocks counts 
+ * not worse than average we return one with smallest directory count. 
+ * Otherwise we simply return a random group. 
+ * 
+ * For the rest rules look so: 
+ * 
+ * It's OK to put directory into a group unless 
+ * it has too many directories already (max_dirs) or 
+ * it has too few free inodes left (min_inodes) or 
+ * it has too few free blocks left (min_blocks) or 
+ * it's already running too large debt (max_debt). 
+ * Parent's group is prefered, if it doesn't satisfy these 
+ * conditions we search cyclically through the rest. If none 
+ * of the groups look good we just look for a group with more 
+ * free inodes than average (starting at parent's group). 
+ * 
+ * Debt is incremented each time we allocate a directory and decremented 
+ * when we allocate an inode, within 0--255. 
+ */ 
+
+#define INODE_COST 64
+#define BLOCK_COST 256
+
+static int find_group_orlov(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT2_I(parent)->i_block_group;
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct ext2_super_block *es = sbi->s_es;
+	int ngroups = sbi->s_groups_count;
+	int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
+	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+	int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
+	int blocks_per_dir;
+	int ndirs = sbi->s_dir_count;
+	int max_debt, max_dirs, min_blocks, min_inodes;
+	int group = -1, i;
+	struct ext2_group_desc *desc;
+	struct buffer_head *bh;
+
+	if (parent == sb->s_root->d_inode) {
+		struct ext2_group_desc *best_desc = NULL;
+		struct buffer_head *best_bh = NULL;
+		int best_ndir = inodes_per_group;
+		int best_group = -1;
+
+		get_random_bytes(&group, sizeof(group));
+		parent_group = (unsigned)group % ngroups;
+		for (i = 0; i < ngroups; i++) {
+			group = (parent_group + i) % ngroups;
+			desc = ext2_get_group_desc (sb, group, &bh);
+			if (!desc || !desc->bg_free_inodes_count)
+				continue;
+			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+				continue;
+			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+				continue;
+			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+				continue;
+			best_group = group;
+			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+			best_desc = desc;
+			best_bh = bh;
+		}
+		if (best_group >= 0) {
+			desc = best_desc;
+			bh = best_bh;
+			group = best_group;
+			goto found;
+		}
+		goto fallback;
+	}
+
+	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) -
+			  le32_to_cpu(es->s_free_blocks_count)) / ndirs;
+
+	max_dirs = ndirs / ngroups + inodes_per_group / 16;
+	min_inodes = avefreei - inodes_per_group / 4;
+	min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;
+
+	max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
+	if (max_debt * INODE_COST > inodes_per_group)
+		max_debt = inodes_per_group / INODE_COST;
+	if (max_debt > 255)
+		max_debt = 255;
+	if (max_debt == 0)
+		max_debt = 1;
+
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext2_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (sbi->debts[group] >= max_debt)
+			continue;
+		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+			continue;
+		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+			continue;
+		goto found;
+	}
+
+fallback:
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext2_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+			goto found;
+	}
+
+	return -1;
+
+found:
+	desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+	desc->bg_used_dirs_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
+	sbi->s_dir_count++;
+	mark_buffer_dirty(bh);
+	return group;
+}
 
-static int find_group_other(struct super_block *sb, int parent_group)
+static int find_group_other(struct super_block *sb, struct inode *parent)
 {
+	int parent_group = EXT2_I(parent)->i_block_group;
 	int ngroups = EXT2_SB(sb)->s_groups_count;
 	struct ext2_group_desc *desc;
 	struct buffer_head *bh;
@@ -312,9 +446,9 @@ struct inode * ext2_new_inode(struct ino
 	es = EXT2_SB(sb)->s_es;
 repeat:
 	if (S_ISDIR(mode))
-		group = find_group_dir(sb, EXT2_I(dir)->i_block_group);
+		group = find_group_orlov(sb, dir);
 	else 
-		group = find_group_other(sb, EXT2_I(dir)->i_block_group);
+		group = find_group_other(sb, dir);
 
 	err = -ENOSPC;
 	if (group == -1)
@@ -349,6 +483,15 @@ repeat:
 
 	es->s_free_inodes_count =
 		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
+
+	if (S_ISDIR(mode)) {
+		if (EXT2_SB(sb)->debts[i] < 255)
+			EXT2_SB(sb)->debts[i]++;
+	} else {
+		if (EXT2_SB(sb)->debts[i])
+			EXT2_SB(sb)->debts[i]--;
+	}
+
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	inode->i_uid = current->fsuid;
@@ -478,6 +621,21 @@ unsigned long ext2_count_free_inodes (st
 #endif
 }
 
+/* Called at mount-time, super-block is locked */
+unsigned long ext2_count_dirs (struct super_block * sb)
+{
+	unsigned long count = 0;
+	int i;
+
+	for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
+		struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		count += le16_to_cpu(gdp->bg_used_dirs_count);
+	}
+	return count;
+}
+
 #ifdef CONFIG_EXT2_CHECK
 /* Called at mount-time, super-block is locked */
 void ext2_check_inodes_bitmap (struct super_block * sb)
--- 2.5.41/fs/ext2/super.c~orlov-allocator	Mon Oct  7 14:31:58 2002
+++ 2.5.41-akpm/fs/ext2/super.c	Mon Oct  7 14:52:38 2002
@@ -665,6 +665,12 @@ static int ext2_fill_super(struct super_
 		printk ("EXT2-fs: not enough memory\n");
 		goto failed_mount;
 	}
+	sbi->debts = kmalloc(sbi->s_groups_count, GFP_KERNEL);
+	if (!sbi->debts) {
+		printk ("EXT2-fs: not enough memory\n");
+		goto failed_mount_group_desc;
+	}
+	memset(sbi->debts, 0, sbi->s_groups_count);
 	for (i = 0; i < db_count; i++) {
 		sbi->s_group_desc[i] = sb_bread(sb, logic_sb_block + i + 1);
 		if (!sbi->s_group_desc[i]) {
@@ -681,6 +687,7 @@ static int ext2_fill_super(struct super_
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
+	sbi->s_dir_count = ext2_count_dirs(sb);
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	/*
 	 * set up enough so that it can read an inode
@@ -706,6 +713,7 @@ static int ext2_fill_super(struct super_
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
+failed_mount_group_desc:
 	kfree(sbi->s_group_desc);
 failed_mount:
 	brelse(bh);
--- 2.5.41/include/linux/ext2_fs_sb.h~orlov-allocator	Mon Oct  7 14:32:07 2002
+++ 2.5.41-akpm/include/linux/ext2_fs_sb.h	Mon Oct  7 14:38:23 2002
@@ -43,6 +43,8 @@ struct ext2_sb_info {
 	int s_inode_size;
 	int s_first_ino;
 	u32 s_next_generation;
+	unsigned long s_dir_count;
+	u8 *debts;
 };
 
 #endif	/* _LINUX_EXT2_FS_SB */
--- 2.5.41/fs/ext2/ext2.h~orlov-allocator	Mon Oct  7 14:37:36 2002
+++ 2.5.41-akpm/fs/ext2/ext2.h	Mon Oct  7 14:37:51 2002
@@ -45,6 +45,7 @@ extern int ext2_new_block (struct inode 
 extern void ext2_free_blocks (struct inode *, unsigned long,
 			      unsigned long);
 extern unsigned long ext2_count_free_blocks (struct super_block *);
+extern unsigned long ext2_count_dirs (struct super_block *);
 extern void ext2_check_blocks_bitmap (struct super_block *);
 extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
 						    unsigned int block_group,

.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 21:16                                           ` Daniel Phillips
  2002-10-07 21:55                                             ` Linus Torvalds
@ 2002-10-07 22:14                                             ` Charles Cazabon
  1 sibling, 0 replies; 206+ messages in thread
From: Charles Cazabon @ 2002-10-07 22:14 UTC (permalink / raw)
  To: linux-kernel

Daniel Phillips <phillips@arcor.de> wrote:
> > 
> > Sure. The mey is:
>             ^^^ <---- "bet" ?

Kubys typed that line.

Charles
-- 
-----------------------------------------------------------------------
Charles Cazabon                            <linux@discworld.dyndns.org>
GPL'ed software available at:     http://www.qcc.ca/~charlesc/software/
-----------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 - (NUMA))
  2002-10-07 21:16                                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
@ 2002-10-07 23:47                                           ` jw schultz
  2002-10-11  0:02                                           ` Mike Fedyk
  1 sibling, 0 replies; 206+ messages in thread
From: jw schultz @ 2002-10-07 23:47 UTC (permalink / raw)
  To: Linux Kernel Mailing List

On Mon, Oct 07, 2002 at 02:16:29PM -0700, Andrew Morton wrote:
> Linus Torvalds wrote:
> > 
> > On 7 Oct 2002, Alan Cox wrote:
> > >
> > > Factoring the uid/gid/pid in actually may help in other ways. If we are
> > > doing it by pid or by uid we will reduce the interleave of multiple
> > > files thing you sometimes get
> > 
> > 'pid' would probably work better than what we have now, even though I bet
> > it would get confused by a large number of installers (ie "make install"
> > in just about any project will use multiple different processes to copy
> > over separate subdirectories. In the X11R6 tree it uses individual "cp"
> > processes for each file!)
> > 
> > The session ID would avoid some of that, but they both have a fundamental
> > problem: neither pid nor session ID is actually saved in any directory
> > structure, so it's quite hard to use that as a heuristic for whether a new
> > file should go into the same directory group as the directory it is
> > created in.
> > 
> > That's why "uid" would work better.
> 
> Sound good to me.  At leat this puts a veneer of respectability over
> decapitating find_group_other(), which is really what we all want
> to do anyway ;)
> 
> > The uid has a different issue, though,
> > namely the fact that when user directories are created, they are basically
> > always created as uid 0 first, and then a "chown" - which means that the
> > user heuristic wouldn't actually trigger at the right time. So the
> > heuristic couldn't be just "newfile->uid == directory->uid", it would have
> > to be something better.
> 
> Last time, Al suggested that we always use the find_group_other() approach
> if the directory is being made at the top-level of the filesystem.  So
> if /home is a mountpoint, the user directories get spread out.
> 
> I think this, and the UID comparison will be good enough.

How about UID == 0?  Other than install and restore tree
creation (top levels) is done by root but tree population
tends to be done by non-root.  That would cause /home/* or
/project/* etc to be in seperate groups but the contents of
each would (mostly) have locality. 

Let's see, that would be..

-       if (S_ISDIR(mode))
+       if (S_ISDIR(mode) && !current->fsuid)
                group = find_group_dir(sb, dir->u.ext2_i.i_block_group);
        else
                group = find_group_other(sb, dir->u.ext2_i.i_block_group);



-- 
________________________________________________________________
	J.W. Schultz            Pegasystems Technologies
	email address:		jw@pegasys.ws

		Remember Cernan and Schmitt

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 19:35                                       ` Linus Torvalds
@ 2002-10-08  0:39                                         ` Theodore Ts'o
  2002-10-08  2:59                                           ` Andrew Morton
  0 siblings, 1 reply; 206+ messages in thread
From: Theodore Ts'o @ 2002-10-08  0:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Daniel Phillips, Chris Friesen, Andrew Morton, Martin J. Bligh,
	Oliver Neukum, Rob Landley, linux-kernel

On Mon, Oct 07, 2002 at 12:35:26PM -0700, Linus Torvalds wrote:
> 
> On Mon, 7 Oct 2002, Daniel Phillips wrote:
> > 
> > Ext2 likes to spread directory inodes around the volume so that there is
> > room to keep the associated file blocks nearby.  This interacts rather
> > poorly with readahead.
> 
> Not a read-ahead problem. It interacts rather poory _full_stop_.
> 
> It means that the inode tables are spread all out, the bitmaps are
> fragmented etc, so the disk head has to move all over the disk even when
> only working with one directory tree like the kernel sources.

It depends on what you are doing.  BSD, and even XFS, uses the concept
of using cylinder groups or block groups as one of many tools to avoid
file fragmentation and to concetrate locality for files within a
directory.  The reason why FAT filesystems have file fragmentation
problems in far more worse way is because they attempt don't have the
concept of a block group, and simply always allocate from the
beginning of the filesystem.  This is effectively what would happen if
you had a single block/cylinder group in the filesystem.

> So the problem with spreading stuff out doesn't have anything to do with 
> read-ahead, and has everything to do with the basic issue of BAD LOCALITY. 
> Locality is _good_, independently of read-ahead and independently of 
> medium. 

Ironically, as I mentioned, one of the reasons behind the block group
scheme is to *increase* locality for files within a particular
directory.  As you point out quite correctly, though, it tends to
destroy locality across an entire directory tree.

Maybe the answer is that we need some way of declaring that some
directory is the root of "a directory tree".  That way, the filesystem
can keep directories underneath the directory tree close together, and
the filesystem can try to keep directory trees far apart from each
other.  

In order to do something like this, we would just need a filesystem
API extension to allow programs like tar and bitkeeper to give a hint
that a new directory tree is being established --- and ideally, it
needs to be done at mkdir time, so that the filesystem can perform
appropriate do a better job of deciding where to place the initial
root of the "directory tree".  Things would also work if you declared
some directory tree to be the root of a "directory tree" after the
directory was initially created, but the allocation hueristics
wouldn't be nearly as effective.

Linus, what do you think about defining a new flag which could be
passed as part of the mode bits to mkdir()?  If we allow the
filesystem to get some additional hints from userspace about what the
difference between /usr/src/linux (where directory spreading is a bad
idea) and /usr/home (where directory spreading is a very good idea),
it would make life for the filesystem much easier.

						- Ted


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 19:36                                     ` Andrew Morton
@ 2002-10-08  2:36                                       ` Simon Kirby
  2002-10-08  2:47                                         ` Daniel Phillips
  2002-10-08  2:50                                         ` Andrew Morton
  2002-10-08 13:54                                       ` Helge Hafting
  1 sibling, 2 replies; 206+ messages in thread
From: Simon Kirby @ 2002-10-08  2:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Chris Friesen, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linus Torvalds, linux-kernel

On Mon, Oct 07, 2002 at 12:36:48PM -0700, Andrew Morton wrote:

> Block allocators are fertile grounds for academic papers.  It's
> complex.  There is a risk that you can do something which is
> cool in testing, but ends up exploding horridly after a year's
> use.  By which time we have ten million deployed systems running like
> dogs, damn all we can do about it.
> 
> The best solution is to use first-fit and online defrag to fix the
> long-term fragmentation.  It really is.  There has been no appreciable
> progress on this.
> 
> A *practical* solution is to keep a spare partition empty and do
> a `cp -a' from one partition onto another once per week and
> swizzle the mountpoints.  Because the big copy will unfragment
> everything.

Having seen fragmentation issues build up on (mbox) mail spools over
several years first hand, I can say that mail spools definitely show the
need for a defragmentation tool.  I remember actually doing the "cp -a"
trick just to restore the mail server to decent performance (which
worked amazingly well, for another few months).  (This was before we
switched to hashed directories and a POP3 server which caches mbox
messages offsets/UIDLs/states.)

Being able to defragment online would be very useful.  I've seen some
people talk about this every so often.  How far away is it?

Simon-

[        Simon Kirby        ][        Network Operations        ]
[     sim@netnation.com     ][     NetNation Communications     ]
[  Opinions expressed are not necessarily those of my employer. ]

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08  2:36                                       ` Simon Kirby
@ 2002-10-08  2:47                                         ` Daniel Phillips
  2002-10-08  2:50                                         ` Andrew Morton
  1 sibling, 0 replies; 206+ messages in thread
From: Daniel Phillips @ 2002-10-08  2:47 UTC (permalink / raw)
  To: Simon Kirby, Andrew Morton
  Cc: Chris Friesen, Martin J. Bligh, Oliver Neukum, Rob Landley,
	Linus Torvalds, linux-kernel

On Tuesday 08 October 2002 04:36, Simon Kirby wrote:
> Being able to defragment online would be very useful.  I've seen some
> people talk about this every so often.  How far away is it?

The vfs consistency semantics are a little complex and fragile at the
moment, which is the only thing that makes it hard.  Think about how
many months of truncate bugs we had, then consider how the situation
looks when all the bits of filesystem are moving around while its being
accessed.  That's not to say it won't happen, but it's unlikely to ever
be solid until the vfs semantics mature a little more.

-- 
Daniel

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08  2:36                                       ` Simon Kirby
  2002-10-08  2:47                                         ` Daniel Phillips
@ 2002-10-08  2:50                                         ` Andrew Morton
  2002-10-08  2:54                                           ` Simon Kirby
  2002-10-08 12:49                                           ` jlnance
  1 sibling, 2 replies; 206+ messages in thread
From: Andrew Morton @ 2002-10-08  2:50 UTC (permalink / raw)
  To: Simon Kirby
  Cc: Chris Friesen, Daniel Phillips, Martin J. Bligh, Oliver Neukum,
	Rob Landley, Linus Torvalds, linux-kernel

Simon Kirby wrote:
> 
> On Mon, Oct 07, 2002 at 12:36:48PM -0700, Andrew Morton wrote:
> 
> > Block allocators are fertile grounds for academic papers.  It's
> > complex.  There is a risk that you can do something which is
> > cool in testing, but ends up exploding horridly after a year's
> > use.  By which time we have ten million deployed systems running like
> > dogs, damn all we can do about it.
> >
> > The best solution is to use first-fit and online defrag to fix the
> > long-term fragmentation.  It really is.  There has been no appreciable
> > progress on this.
> >
> > A *practical* solution is to keep a spare partition empty and do
> > a `cp -a' from one partition onto another once per week and
> > swizzle the mountpoints.  Because the big copy will unfragment
> > everything.
> 
> Having seen fragmentation issues build up on (mbox) mail spools over
> several years first hand, I can say that mail spools definitely show the
> need for a defragmentation tool.  I remember actually doing the "cp -a"
> trick just to restore the mail server to decent performance (which
> worked amazingly well, for another few months).  (This was before we
> switched to hashed directories and a POP3 server which caches mbox
> messages offsets/UIDLs/states.)

Oh tell me about it.

Appended is the offset->block mapping for my "linux-kernel" mailbox.
Read it and weep...

> Being able to defragment online would be very useful.  I've seen some
> people talk about this every so often.  How far away is it?
> 

At the current rate of progress, I'd say that your grandkids will
just love it.

I have the core code for ext3.  It's at
http://www.zip.com.au/~akpm/linux/patches/2.4/2.4.19-pre10/ext3-reloc-page.patch
I never tested it, but that's a formality ;)

It offers a simple ioctl to reloate a single page's worth of blocks.
It's fully journalled and recoverable, pagecache coherent, etc.
But the userspace application which calls that ioctl hasn't been
written.


0-3: 3247328-3247331 (4)
4-10: 3247354-3247360 (7)
11-11: 3247371-3247371 (1)
12-14: 3247373-3247375 (3)
15-17: 3247989-3247991 (3)
18-18: 3247994-3247994 (1)
19-24: 3248026-3248031 (6)
25-26: 3248071-3248072 (2)
27-29: 3248259-3248261 (3)
30-43: 3248297-3248310 (14)
44-46: 3248389-3248391 (3)
47-53: 3248394-3248400 (7)
54-57: 3248403-3248406 (4)
58-61: 3248410-3248413 (4)
62-63: 3248533-3248534 (2)
64-65: 3248952-3248953 (2)
66-69: 3248973-3248976 (4)
70-75: 3249048-3249053 (6)
76-79: 3249070-3249073 (4)
80-83: 3259754-3259757 (4)
84-1035: 3277970-3278921 (952)
1036-1463: 3278924-3279351 (428)
1464-1478: 3279353-3279367 (15)
1479-1480: 3279369-3279370 (2)
1481-1532: 3279372-3279423 (52)
1533-1584: 3279462-3279513 (52)
1585-1585: 3279515-3279515 (1)
1586-1589: 3279517-3279520 (4)
1590-1595: 3279523-3279528 (6)
1596-1604: 3279540-3279548 (9)
1605-1638: 3279550-3279583 (34)
1639-1639: 17625-17625 (1)
1640-1640: 17627-17627 (1)
1641-1641: 17629-17629 (1)
1642-1645: 17636-17639 (4)
1646-1647: 17642-17643 (2)
1648-1649: 17645-17646 (2)
1650-1652: 17649-17651 (3)
1653-1654: 17657-17658 (2)
1655-1655: 17660-17660 (1)
1656-1656: 18789-18789 (1)
1657-1657: 18804-18804 (1)
1658-1658: 18806-18806 (1)
1659-1659: 18808-18808 (1)
1660-1660: 18812-18812 (1)
1661-1662: 19483-19484 (2)
1663-1663: 19486-19486 (1)
1664-1664: 19534-19534 (1)
1665-1665: 19605-19605 (1)
1666-1667: 19607-19608 (2)
1668-1669: 19613-19614 (2)
1670-1672: 19637-19639 (3)
1673-1674: 19656-19657 (2)
1675-1675: 19662-19662 (1)
1676-1676: 19668-19668 (1)
1677-1678: 19677-19678 (2)
1679-1679: 19685-19685 (1)
1680-1680: 19691-19691 (1)
1681-1681: 19693-19693 (1)
1682-1682: 19819-19819 (1)
1683-1683: 19832-19832 (1)
1684-1684: 19854-19854 (1)
1685-1685: 19862-19862 (1)
1686-1686: 19879-19879 (1)
1687-1688: 19884-19885 (2)
1689-1689: 19894-19894 (1)
1690-1690: 19897-19897 (1)
1691-1691: 19979-19979 (1)
1692-1692: 19982-19982 (1)
1693-1693: 19993-19993 (1)
1694-1694: 20002-20002 (1)
1695-1696: 20009-20010 (2)
1697-1698: 20012-20013 (2)
1699-1699: 20033-20033 (1)
1700-1700: 20102-20102 (1)
1701-1701: 20113-20113 (1)
1702-1702: 20115-20115 (1)
1703-1703: 20131-20131 (1)
1704-1704: 20147-20147 (1)
1705-1705: 20178-20178 (1)
1706-1708: 20187-20189 (3)
1709-1709: 20192-20192 (1)
1710-1711: 20352-20353 (2)
1712-1714: 20355-20357 (3)
1715-1715: 20371-20371 (1)
1716-1718: 20379-20381 (3)
1719-1723: 20385-20389 (5)
1724-1724: 20391-20391 (1)
1725-1725: 20393-20393 (1)
1726-1727: 20395-20396 (2)
1728-1733: 20403-20408 (6)
1734-1734: 20454-20454 (1)
1735-1737: 20488-20490 (3)
1738-1739: 20492-20493 (2)
1740-1740: 20496-20496 (1)
1741-1741: 20499-20499 (1)
1742-1744: 20506-20508 (3)
1745-1748: 20511-20514 (4)
1749-1750: 20520-20521 (2)
1751-1751: 20528-20528 (1)
1752-1752: 20533-20533 (1)
1753-1755: 20559-20561 (3)
1756-1756: 20563-20563 (1)
1757-1758: 20567-20568 (2)
1759-1759: 20570-20570 (1)
1760-1760: 20572-20572 (1)
1761-1761: 20574-20574 (1)
1762-1762: 20580-20580 (1)
1763-1765: 20610-20612 (3)
1766-1767: 20620-20621 (2)
1768-1770: 20630-20632 (3)
1771-1771: 20643-20643 (1)
1772-1772: 20646-20646 (1)
1773-1773: 20657-20657 (1)
1774-1780: 20697-20703 (7)
1781-1783: 20714-20716 (3)
1784-1787: 20719-20722 (4)
1788-1788: 20726-20726 (1)
1789-1789: 21624-21624 (1)
1790-1790: 21663-21663 (1)
1791-1791: 21696-21696 (1)
1792-1792: 22179-22179 (1)
1793-1793: 22724-22724 (1)
1794-1794: 23099-23099 (1)
1795-1795: 23266-23266 (1)
1796-1796: 23681-23681 (1)
1797-1797: 24326-24326 (1)
1798-1798: 24376-24376 (1)
1799-1799: 24396-24396 (1)
1800-1800: 24421-24421 (1)
1801-1801: 24458-24458 (1)
1802-1804: 24554-24556 (3)
1805-1806: 24609-24610 (2)
1807-1808: 24612-24613 (2)
1809-1810: 25120-25121 (2)
1811-1814: 25123-25126 (4)
1815-1816: 25128-25129 (2)
1817-1817: 25131-25131 (1)
1818-1819: 25133-25134 (2)
1820-1820: 25138-25138 (1)
1821-1821: 25185-25185 (1)
1822-1822: 25338-25338 (1)
1823-1823: 25341-25341 (1)
1824-1825: 25365-25366 (2)
1826-1833: 25369-25376 (8)
1834-1838: 25384-25388 (5)
1839-1839: 25399-25399 (1)
1840-1842: 25401-25403 (3)
1843-1843: 25406-25406 (1)
1844-1844: 25411-25411 (1)
1845-1845: 25435-25435 (1)
1846-1848: 25437-25439 (3)
1849-1849: 25444-25444 (1)
1850-1850: 25464-25464 (1)
1851-1853: 25468-25470 (3)
1854-1854: 25472-25472 (1)
1855-1856: 25562-25563 (2)
1857-1857: 25565-25565 (1)
1858-1859: 25568-25569 (2)
1860-1860: 25577-25577 (1)
1861-1861: 25589-25589 (1)
1862-1863: 25594-25595 (2)
1864-1864: 25597-25597 (1)
1865-1865: 25601-25601 (1)
1866-1867: 25604-25605 (2)
1868-1868: 25611-25611 (1)
1869-1870: 25630-25631 (2)
1871-1872: 25633-25634 (2)
1873-1874: 25668-25669 (2)
1875-1875: 25671-25671 (1)
1876-1876: 25673-25673 (1)
1877-1879: 25675-25677 (3)
1880-1881: 25679-25680 (2)
1882-1883: 25687-25688 (2)
1884-1891: 25706-25713 (8)
1892-1900: 25716-25724 (9)
1901-1901: 25739-25739 (1)
1902-1904: 25784-25786 (3)
1905-1908: 26067-26070 (4)
1909-1909: 26131-26131 (1)
1910-1910: 26133-26133 (1)
1911-1911: 26149-26149 (1)
1912-1913: 26167-26168 (2)
1914-1914: 26170-26170 (1)
1915-1915: 26176-26176 (1)
1916-1916: 26209-26209 (1)
1917-1917: 26313-26313 (1)
1918-1919: 26315-26316 (2)
1920-1920: 26318-26318 (1)
1921-1921: 26321-26321 (1)
1922-1925: 26368-26371 (4)
1926-1933: 26373-26380 (8)
1934-1934: 26383-26383 (1)
1935-1948: 26385-26398 (14)
1949-1952: 26400-26403 (4)
1953-1955: 26411-26413 (3)
1956-1958: 26416-26418 (3)
1959-1962: 26420-26423 (4)
1963-1964: 26426-26427 (2)
1965-1971: 26433-26439 (7)
1972-1975: 26442-26445 (4)
1976-1976: 26464-26464 (1)
1977-1984: 26503-26510 (8)
1985-1988: 26525-26528 (4)
1989-1989: 26638-26638 (1)
1990-1990: 26792-26792 (1)
1991-1991: 26824-26824 (1)
1992-1992: 26835-26835 (1)
1993-1996: 26837-26840 (4)
1997-1997: 26848-26848 (1)
1998-2000: 26850-26852 (3)
2001-2001: 26854-26854 (1)
2002-2006: 26857-26861 (5)
2007-2007: 26865-26865 (1)
2008-2011: 26873-26876 (4)
2012-2012: 27441-27441 (1)
2013-2018: 27450-27455 (6)
2019-2021: 27457-27459 (3)
2022-2022: 27462-27462 (1)
2023-2024: 27465-27466 (2)
2025-2025: 27468-27468 (1)
2026-2032: 27483-27489 (7)
2033-2033: 27491-27491 (1)
2034-2034: 27584-27584 (1)
2035-2035: 27588-27588 (1)
2036-2036: 27592-27592 (1)
2037-2037: 27595-27595 (1)
2038-2040: 27597-27599 (3)
2041-2042: 27607-27608 (2)
2043-2044: 27610-27611 (2)
2045-2045: 27617-27617 (1)
2046-2050: 27629-27633 (5)
2051-2056: 27663-27668 (6)
2057-2058: 27670-27671 (2)
2059-2059: 27674-27674 (1)
2060-2060: 27676-27676 (1)
2061-2062: 27678-27679 (2)
2063-2064: 27681-27682 (2)
2065-2075: 27684-27694 (11)
2076-2078: 27696-27698 (3)
2079-2079: 27700-27700 (1)
2080-2082: 27704-27706 (3)
2083-2084: 27708-27709 (2)
2085-2091: 27711-27717 (7)
2092-2092: 27735-27735 (1)
2093-2094: 27737-27738 (2)
2095-2095: 27740-27740 (1)
2096-2097: 27742-27743 (2)
2098-2098: 27748-27748 (1)
2099-2099: 27751-27751 (1)
2100-2103: 27756-27759 (4)
2104-2107: 27761-27764 (4)
2108-2108: 27775-27775 (1)
2109-2109: 27779-27779 (1)
2110-2111: 27787-27788 (2)
2112-2112: 27793-27793 (1)
2113-2113: 27806-27806 (1)
2114-2114: 27808-27808 (1)
2115-2115: 27810-27810 (1)
2116-2116: 27812-27812 (1)
2117-2117: 27819-27819 (1)
2118-2118: 27822-27822 (1)
2119-2120: 27825-27826 (2)
2121-2121: 27834-27834 (1)
2122-2123: 27836-27837 (2)
2124-2126: 27857-27859 (3)
2127-2128: 27863-27864 (2)
2129-2131: 27876-27878 (3)
2132-2134: 27880-27882 (3)
2135-2135: 27884-27884 (1)
2136-2136: 27888-27888 (1)
2137-2137: 27894-27894 (1)
2138-2138: 27899-27899 (1)
2139-2141: 27943-27945 (3)
2142-2146: 27950-27954 (5)
2147-2147: 27982-27982 (1)
2148-2148: 27992-27992 (1)
2149-2150: 28004-28005 (2)
2151-2152: 28016-28017 (2)
2153-2153: 28022-28022 (1)
2154-2155: 28025-28026 (2)
2156-2156: 28033-28033 (1)
2157-2162: 28036-28041 (6)
2163-2163: 28062-28062 (1)
2164-2165: 28079-28080 (2)
2166-2167: 28082-28083 (2)
2168-2168: 28086-28086 (1)
2169-2170: 28097-28098 (2)
2171-2179: 28100-28108 (9)
2180-2186: 28113-28119 (7)
2187-2190: 28469-28472 (4)
2191-2192: 28474-28475 (2)
2193-2194: 28487-28488 (2)
2195-2195: 28490-28490 (1)
2196-2198: 28492-28494 (3)
2199-2199: 28496-28496 (1)
2200-2200: 28555-28555 (1)
2201-2201: 28563-28563 (1)
2202-2204: 28565-28567 (3)
2205-2210: 28570-28575 (6)
2211-2212: 28577-28578 (2)
2213-2214: 28580-28581 (2)
2215-2216: 28608-28609 (2)
2217-2217: 28611-28611 (1)
2218-2218: 28613-28613 (1)
2219-2221: 28615-28617 (3)
2222-2223: 28619-28620 (2)
2224-2226: 28623-28625 (3)
2227-2229: 28627-28629 (3)
2230-2231: 28633-28634 (2)
2232-2232: 28638-28638 (1)
2233-2233: 28640-28640 (1)
2234-2237: 28643-28646 (4)
2238-2240: 28667-28669 (3)
2241-2243: 28672-28674 (3)
2244-2245: 28676-28677 (2)
2246-2246: 28749-28749 (1)
2247-2253: 28756-28762 (7)
2254-2255: 28764-28765 (2)
2256-2263: 28767-28774 (8)
2264-2270: 28776-28782 (7)
2271-2272: 28784-28785 (2)
2273-2274: 28796-28797 (2)
2275-2277: 28813-28815 (3)
2278-2281: 28819-28822 (4)
2282-2283: 28824-28825 (2)
2284-2285: 28839-28840 (2)
2286-2286: 28843-28843 (1)
2287-2287: 28846-28846 (1)
2288-2294: 28849-28855 (7)
2295-2295: 28918-28918 (1)
2296-2296: 28953-28953 (1)
2297-2300: 28955-28958 (4)
2301-2301: 28968-28968 (1)
2302-2302: 28970-28970 (1)
2303-2306: 28986-28989 (4)
2307-2308: 28991-28992 (2)
2309-2309: 29002-29002 (1)
2310-2311: 29025-29026 (2)
2312-2312: 29028-29028 (1)
2313-2314: 29031-29032 (2)
2315-2315: 29036-29036 (1)
2316-2316: 29057-29057 (1)
2317-2318: 29060-29061 (2)
2319-2319: 29076-29076 (1)
2320-2322: 29079-29081 (3)
2323-2325: 29089-29091 (3)
2326-2327: 29094-29095 (2)
2328-2330: 29105-29107 (3)
2331-2333: 29109-29111 (3)
2334-2335: 29113-29114 (2)
2336-2336: 29116-29116 (1)
2337-2337: 29119-29119 (1)
2338-2339: 29122-29123 (2)
2340-2340: 29130-29130 (1)
2341-2341: 29133-29133 (1)
2342-2344: 29136-29138 (3)
2345-2347: 29146-29148 (3)
2348-2348: 29150-29150 (1)
2349-2350: 29153-29154 (2)
2351-2351: 29167-29167 (1)
2352-2352: 29172-29172 (1)
2353-2353: 29184-29184 (1)
2354-2356: 29193-29195 (3)
2357-2357: 29248-29248 (1)
2358-2361: 29307-29310 (4)
2362-2362: 29335-29335 (1)
2363-2363: 29338-29338 (1)
2364-2367: 29349-29352 (4)
2368-2368: 29408-29408 (1)
2369-2370: 29422-29423 (2)
2371-2371: 29425-29425 (1)
2372-2372: 29428-29428 (1)
2373-2373: 29434-29434 (1)
2374-2374: 29443-29443 (1)
2375-2378: 29449-29452 (4)
2379-2379: 29569-29569 (1)
2380-2380: 29575-29575 (1)
2381-2382: 29591-29592 (2)
2383-2386: 29595-29598 (4)
2387-2393: 29601-29607 (7)
2394-2394: 29609-29609 (1)
2395-2395: 29611-29611 (1)
2396-2397: 29613-29614 (2)
2398-2398: 29619-29619 (1)
2399-2399: 29623-29623 (1)
2400-2401: 29629-29630 (2)
2402-2405: 29649-29652 (4)
2406-2406: 29666-29666 (1)
2407-2408: 29681-29682 (2)
2409-2409: 29685-29685 (1)
2410-2412: 29687-29689 (3)
2413-2414: 29692-29693 (2)
2415-2415: 29698-29698 (1)
2416-2418: 29700-29702 (3)
2419-2420: 29707-29708 (2)
2421-2421: 29710-29710 (1)
2422-2428: 29714-29720 (7)
2429-2437: 29724-29732 (9)
2438-2438: 29737-29737 (1)
2439-2443: 29739-29743 (5)
2444-2447: 29747-29750 (4)
2448-2452: 29752-29756 (5)
2453-2453: 29758-29758 (1)
2454-2455: 29761-29762 (2)
2456-2457: 29765-29766 (2)
2458-2458: 29771-29771 (1)
2459-2460: 29773-29774 (2)
2461-2461: 29779-29779 (1)
2462-2469: 29781-29788 (8)
2470-2470: 29797-29797 (1)
2471-2478: 29799-29806 (8)
2479-2479: 29813-29813 (1)
2480-2480: 29815-29815 (1)
2481-2485: 29817-29821 (5)
2486-2487: 29865-29866 (2)
2488-2489: 29882-29883 (2)
2490-2490: 29958-29958 (1)
2491-2491: 29961-29961 (1)
2492-2492: 30008-30008 (1)
2493-2493: 30010-30010 (1)
2494-2496: 30014-30016 (3)
2497-2498: 30022-30023 (2)
2499-2501: 30025-30027 (3)
2502-2502: 30034-30034 (1)
2503-2503: 30036-30036 (1)
2504-2504: 30040-30040 (1)
2505-2512: 30059-30066 (8)
2513-2515: 30070-30072 (3)
2516-2516: 30077-30077 (1)
2517-2517: 30283-30283 (1)
2518-2518: 30291-30291 (1)
2519-2519: 30293-30293 (1)
2520-2523: 30377-30380 (4)
2524-2524: 30382-30382 (1)
2525-2525: 30389-30389 (1)
2526-2526: 30397-30397 (1)
2527-2535: 30401-30409 (9)
2536-2543: 30411-30418 (8)
2544-2544: 30425-30425 (1)
2545-2546: 30427-30428 (2)
2547-2552: 30432-30437 (6)
2553-2557: 30439-30443 (5)
2558-2559: 30446-30447 (2)
2560-2560: 30461-30461 (1)
2561-2561: 30464-30464 (1)
2562-2563: 30479-30480 (2)
2564-2564: 30482-30482 (1)
2565-2565: 30571-30571 (1)
2566-2570: 30574-30578 (5)
2571-2574: 30581-30584 (4)
2575-2578: 30592-30595 (4)
2579-2579: 30607-30607 (1)
2580-2580: 30609-30609 (1)
2581-2581: 30611-30611 (1)
2582-2585: 30613-30616 (4)
2586-2586: 30624-30624 (1)
2587-2594: 30627-30634 (8)
2595-2602: 30641-30648 (8)
2603-2605: 30651-30653 (3)
2606-2606: 30660-30660 (1)
2607-2607: 30671-30671 (1)
2608-2608: 30682-30682 (1)
2609-2609: 30698-30698 (1)
2610-2610: 30702-30702 (1)
2611-2611: 30705-30705 (1)
2612-2614: 30709-30711 (3)
2615-2615: 30713-30713 (1)
2616-2616: 30715-30715 (1)
2617-2617: 30717-30717 (1)
2618-2619: 30734-30735 (2)
2620-2622: 30740-30742 (3)
2623-2623: 30749-30749 (1)
2624-2625: 30752-30753 (2)
2626-2626: 30756-30756 (1)
2627-2627: 30759-30759 (1)
2628-2629: 30776-30777 (2)
2630-2631: 30786-30787 (2)
2632-2635: 30796-30799 (4)
2636-2636: 30801-30801 (1)
2637-2637: 30806-30806 (1)
2638-2638: 30955-30955 (1)
2639-2643: 30957-30961 (5)
2644-2644: 30969-30969 (1)
2645-2645: 31549-31549 (1)
2646-2649: 31551-31554 (4)
2650-2651: 31609-31610 (2)
2652-2656: 31613-31617 (5)
2657-2657: 31619-31619 (1)
2658-2658: 31625-31625 (1)
2659-2660: 31627-31628 (2)
2661-2661: 31631-31631 (1)
2662-2662: 31636-31636 (1)
2663-2663: 31638-31638 (1)
2664-2664: 31640-31640 (1)
2665-2666: 31642-31643 (2)
2667-2667: 31645-31645 (1)
2668-2668: 31647-31647 (1)
2669-2671: 31650-31652 (3)
2672-2673: 31654-31655 (2)
2674-2675: 31658-31659 (2)
2676-2676: 31665-31665 (1)
2677-2679: 31669-31671 (3)
2680-2681: 31678-31679 (2)
2682-2682: 31681-31681 (1)
2683-2683: 31685-31685 (1)
2684-2684: 31693-31693 (1)
2685-2685: 31699-31699 (1)
2686-2686: 31701-31701 (1)
2687-2687: 32022-32022 (1)
2688-2688: 32028-32028 (1)
2689-2689: 32033-32033 (1)
2690-2690: 32038-32038 (1)
2691-2691: 32044-32044 (1)
2692-2692: 32048-32048 (1)
2693-2693: 32057-32057 (1)
2694-2696: 56640-56642 (3)
2697-2697: 56644-56644 (1)
2698-2699: 56647-56648 (2)
2700-2700: 56650-56650 (1)
2701-2701: 56654-56654 (1)
2702-2704: 12692-12694 (3)
2705-2705: 52949-52949 (1)
2706-2706: 51414-51414 (1)
2707-2707: 51417-51417 (1)
2708-2708: 52617-52617 (1)
2709-2709: 52620-52620 (1)
2710-2710: 52625-52625 (1)
2711-2711: 51421-51421 (1)
2712-2712: 52576-52576 (1)
2713-2713: 52627-52627 (1)
2714-2714: 52988-52988 (1)
2715-2716: 114848-114849 (2)
2717-2717: 116505-116505 (1)
2718-2718: 53944-53944 (1)
2719-2719: 53967-53967 (1)
2720-2720: 54139-54139 (1)
2721-2722: 54231-54232 (2)
2723-2723: 54269-54269 (1)
2724-2725: 54271-54272 (2)
2726-2728: 54274-54276 (3)
2729-2729: 8840-8840 (1)
2730-2730: 12089-12089 (1)
2731-2731: 12091-12091 (1)
2732-2732: 30852-30852 (1)
2733-2733: 30897-30897 (1)
2734-2734: 30899-30899 (1)
2735-2735: 30902-30902 (1)
2736-2736: 30907-30907 (1)
2737-2737: 30909-30909 (1)
2738-2738: 30920-30920 (1)
2739-2741: 30922-30924 (3)
2742-2742: 30932-30932 (1)
2743-2745: 30935-30937 (3)
2746-2749: 30939-30942 (4)
2750-2751: 30949-30950 (2)
2752-2752: 30954-30954 (1)
2753-2753: 30956-30956 (1)
2754-2754: 31038-31038 (1)
2755-2755: 31044-31044 (1)
2756-2756: 31054-31054 (1)
2757-2757: 31059-31059 (1)
2758-2758: 31063-31063 (1)
2759-2759: 31065-31065 (1)
2760-2760: 31067-31067 (1)
2761-2761: 31070-31070 (1)
2762-2762: 31075-31075 (1)
2763-2763: 31618-31618 (1)
2764-2764: 31620-31620 (1)
2765-2765: 31626-31626 (1)
2766-2767: 31629-31630 (2)
2768-2768: 31633-31633 (1)
2769-2769: 31635-31635 (1)
2770-2770: 31639-31639 (1)
2771-2771: 31641-31641 (1)
2772-2772: 31646-31646 (1)
2773-2773: 31648-31648 (1)
2774-2774: 31653-31653 (1)
2775-2775: 31657-31657 (1)
2776-2776: 31666-31666 (1)
2777-2777: 31672-31672 (1)
2778-2778: 31677-31677 (1)
2779-2779: 31682-31682 (1)
2780-2780: 31687-31687 (1)
2781-2781: 31692-31692 (1)
2782-2782: 34258-34258 (1)
2783-2783: 52797-52797 (1)
2784-2784: 52977-52977 (1)
2785-2785: 52979-52979 (1)
2786-2786: 52981-52981 (1)
2787-2787: 52983-52983 (1)
2788-2788: 52986-52986 (1)
2789-2791: 54112-54114 (3)
2792-2792: 54125-54125 (1)
2793-2794: 56666-56667 (2)
2795-2799: 56670-56674 (5)
2800-2800: 56692-56692 (1)
2801-2802: 56718-56719 (2)
2803-2803: 56793-56793 (1)
2804-2808: 57086-57090 (5)
2809-2809: 57094-57094 (1)
2810-2810: 71446-71446 (1)
2811-2811: 71448-71448 (1)
2812-2812: 71451-71451 (1)
2813-2813: 54143-54143 (1)
2814-2820: 113973-113979 (7)
2821-2821: 54221-54221 (1)
2822-2822: 54235-54235 (1)
2823-2823: 54243-54243 (1)
2824-2824: 54246-54246 (1)
2825-2828: 54278-54281 (4)
2829-2829: 54283-54283 (1)
2830-2832: 71454-71456 (3)
2833-2835: 71458-71460 (3)
2836-2853: 71462-71479 (18)
2854-2886: 81643-81675 (33)
2887-2891: 81681-81685 (5)
2892-2894: 65538-65540 (3)
2895-2895: 69217-69217 (1)
2896-2896: 69255-69255 (1)
2897-2899: 69332-69334 (3)
2900-2901: 69351-69352 (2)
2902-2902: 69356-69356 (1)
2903-2905: 69367-69369 (3)
2906-2907: 69714-69715 (2)
2908-2909: 69875-69876 (2)
2910-2911: 69878-69879 (2)
2912-2913: 71244-71245 (2)
2914-2914: 71249-71249 (1)
2915-2915: 78530-78530 (1)
2916-2916: 80724-80724 (1)
2917-2919: 80736-80738 (3)
2920-2922: 80740-80742 (3)
2923-2923: 80745-80745 (1)
2924-2925: 80747-80748 (2)
2926-2928: 80750-80752 (3)
2929-2936: 81377-81384 (8)
2937-2937: 81676-81676 (1)
2938-2938: 81686-81686 (1)
2939-2946: 81691-81698 (8)
2947-2950: 81700-81703 (4)
2951-2951: 81705-81705 (1)
2952-2958: 104273-104279 (7)
2959-2973: 104281-104295 (15)
2974-2978: 114219-114223 (5)
2979-2980: 114225-114226 (2)
2981-2996: 114228-114243 (16)
2997-3017: 114245-114265 (21)
3018-3030: 114267-114279 (13)
3031-3035: 114281-114285 (5)
3036-3039: 114287-114290 (4)
3040-3040: 114819-114819 (1)
3041-3044: 56656-56659 (4)
3045-3045: 56662-56662 (1)
3046-3046: 114836-114836 (1)
3047-3055: 114839-114847 (9)
3056-3057: 114851-114852 (2)
3058-3058: 114862-114862 (1)
3059-3065: 114864-114870 (7)
3066-3066: 114872-114872 (1)
3067-3070: 114874-114877 (4)
3071-3071: 29869-29869 (1)
3072-3072: 114880-114880 (1)
3073-3077: 114892-114896 (5)
3078-3083: 115518-115523 (6)
3084-3086: 115525-115527 (3)
3087-3088: 114931-114932 (2)
3089-3091: 114943-114945 (3)
3092-3092: 114947-114947 (1)
3093-3093: 114949-114949 (1)
3094-3096: 114957-114959 (3)
3097-3099: 114961-114963 (3)
3100-3100: 114966-114966 (1)
3101-3104: 114968-114971 (4)
3105-3106: 114974-114975 (2)
3107-3107: 114977-114977 (1)
3108-3109: 114981-114982 (2)
3110-3111: 114992-114993 (2)
3112-3113: 115004-115005 (2)
3114-3114: 115007-115007 (1)
3115-3122: 115014-115021 (8)
3123-3123: 115025-115025 (1)
3124-3126: 115048-115050 (3)
3127-3128: 115053-115054 (2)
3129-3129: 115056-115056 (1)
3130-3135: 115067-115072 (6)
3136-3136: 115074-115074 (1)
3137-3137: 115078-115078 (1)
3138-3138: 115085-115085 (1)
3139-3146: 115098-115105 (8)
3147-3157: 115110-115120 (11)
3158-3158: 115127-115127 (1)
3159-3160: 115129-115130 (2)
3161-3164: 115133-115136 (4)
3165-3165: 115138-115138 (1)
3166-3166: 115140-115140 (1)
3167-3167: 115560-115560 (1)
3168-3173: 117645-117650 (6)
3174-3180: 117652-117658 (7)
3181-3188: 117660-117667 (8)
3189-3199: 117669-117679 (11)
3200-3204: 117681-117685 (5)
3205-3210: 117687-117692 (6)
3211-3232: 117694-117715 (22)
3233-3235: 117717-117719 (3)
3236-3246: 117721-117731 (11)
3247-3262: 117733-117748 (16)
3263-3273: 117750-117760 (11)
3274-3287: 117762-117775 (14)
3288-3310: 117777-117799 (23)
3311-3326: 117801-117816 (16)
3327-3338: 117818-117829 (12)
3339-3347: 117831-117839 (9)
3348-3365: 117841-117858 (18)
3366-3417: 117860-117911 (52)
3418-3449: 117913-117944 (32)
3450-3471: 117946-117967 (22)
3472-3487: 117969-117984 (16)
3488-3506: 117986-118004 (19)
3507-3526: 118006-118025 (20)
3527-3560: 118027-118060 (34)
3561-3590: 118062-118091 (30)
3591-3624: 118093-118126 (34)
3625-3636: 118128-118139 (12)
3637-3672: 118141-118176 (36)
3673-3726: 118178-118231 (54)
3727-3732: 118233-118238 (6)
3733-3736: 118240-118243 (4)
3737-3755: 118245-118263 (19)

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08  2:50                                         ` Andrew Morton
@ 2002-10-08  2:54                                           ` Simon Kirby
  2002-10-08  3:00                                             ` Andrew Morton
  2002-10-08 12:49                                           ` jlnance
  1 sibling, 1 reply; 206+ messages in thread
From: Simon Kirby @ 2002-10-08  2:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Mon, Oct 07, 2002 at 07:50:27PM -0700, Andrew Morton wrote:

> Oh tell me about it.
> 
> Appended is the offset->block mapping for my "linux-kernel" mailbox.
> Read it and weep...

Eep. :)  Just out of interest, how did you get these mappings?

> 0-3: 3247328-3247331 (4)
> 4-10: 3247354-3247360 (7)
> 11-11: 3247371-3247371 (1)
> 12-14: 3247373-3247375 (3)

Simon-

[        Simon Kirby        ][        Network Operations        ]
[     sim@netnation.com     ][     NetNation Communications     ]
[  Opinions expressed are not necessarily those of my employer. ]

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08  0:39                                         ` Theodore Ts'o
@ 2002-10-08  2:59                                           ` Andrew Morton
  2002-10-08 16:15                                             ` Theodore Ts'o
  0 siblings, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-08  2:59 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: Linus Torvalds, Daniel Phillips, Chris Friesen, Martin J. Bligh,
	Oliver Neukum, Rob Landley, linux-kernel

Theodore Ts'o wrote:
> 
> ...
> It depends on what you are doing.  BSD, and even XFS, uses the concept
> of using cylinder groups or block groups as one of many tools to avoid
> file fragmentation and to concetrate locality for files within a
> directory.  The reason why FAT filesystems have file fragmentation
> problems in far more worse way is because they attempt don't have the
> concept of a block group, and simply always allocate from the
> beginning of the filesystem.  This is effectively what would happen if
> you had a single block/cylinder group in the filesystem.
>

In the testing which I did, based on Keith Smith's traces, the
current code really isn't very effective.

What I did was to run his aging workload an increasing number of
times.  Then measured the fragmentation of the files which it
left behind.  I measured the fragmentation simply by timing
how long it took to read all the files, and compared that to
how long it took to read the same files when they had been laid
down on a fresh fs.

After ten aging rounds, with the current block allocator, we're
running 4x to 5x times slower.  With the Orlov allocator, we're
running 5x to 6x slower.  Either way, that's a big performance
slowdown.

Orlov turns a 400% slowdown into a 500% slowdown.  So it is a
25% regression for slow growth.  But it is a 300% to 500%
improvement for fast-growth.   (Well, it used to be.  But I
just fixed a memory-corrupting bug in it which I think has
slowed it down.  It's now only double the speed on scsi, triple
on IDE).

What we need, *regardless* of which allocator is used is effective
defrag tools.

I just retested.

2.5.41, scsi:
	time find linux-2.4.19 -type f | xargs cat > /dev/null
	find linux-2.4.19 -type f  0.06s user 0.24s system 1% cpu 19.274 total
	xargs cat > /dev/null  0.23s user 1.42s system 8% cpu 19.954 total

2.5.41, IDE:
	time find linux-2.4.19 -type f | xargs cat > /dev/null
	find linux-2.4.19 -type f  0.06s user 0.23s system 0% cpu 29.274 total
	xargs cat > /dev/null  0.23s user 1.58s system 5% cpu 30.199 total

2.5.41+Orlov, SCSI:
	time find linux-2.4.19 -type f | xargs cat > /dev/null
	find linux-2.4.19 -type f  0.06s user 0.24s system 2% cpu 11.579 total
	xargs cat > /dev/null  0.23s user 1.46s system 14% cpu 11.951 total

2.5.41+Orlov, IDE:
	time find linux-2.4.19 -type f | xargs cat > /dev/null
	find linux-2.4.19 -type f  0.06s user 0.24s system 2% cpu 12.225 total
	xargs cat > /dev/null  0.22s user 1.59s system 14% cpu 12.500 total

We need some of that goodness.

>
> [ administrator hints ]
>

Alas, nobody uses them :(

Maybe a mount option?  But I think the current algorithm should
default to "off".

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08  2:54                                           ` Simon Kirby
@ 2002-10-08  3:00                                             ` Andrew Morton
  2002-10-08 16:17                                               ` Theodore Ts'o
  0 siblings, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-08  3:00 UTC (permalink / raw)
  To: Simon Kirby; +Cc: linux-kernel

Simon Kirby wrote:
> 
> On Mon, Oct 07, 2002 at 07:50:27PM -0700, Andrew Morton wrote:
> 
> > Oh tell me about it.
> >
> > Appended is the offset->block mapping for my "linux-kernel" mailbox.
> > Read it and weep...
> 
> Eep. :)  Just out of interest, how did you get these mappings?
> 

/*
 * Show file blocks
 */

#include <unistd.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include <linux/fs.h>

static const char *myname;

int main(int argc, char **argv)
{
	int i, err = 0;
	int fd;
	long blksize;
	off_t filesz;
	long fileblks, blk;
	long start = -1, end = -1;
	long first_logical = -1;

	myname = argv[0];

	while((i = getopt(argc, argv, "")) != EOF) {
		switch(i) {
		default:
			err++;
			break;
		}
	}

	if (err || optind != argc-1) {
		fprintf(stderr, "Usage: %s file\n",
			myname);
		exit(1);
	}

	fd = open(argv[optind], O_RDONLY);
	if (fd == -1) {
		perror(argv[optind]);
		exit(1);
	}

	if (ioctl(fd, FIGETBSZ, &blksize) == -1) {
		perror("FIGETBSZ");
		fprintf(stderr, "assuming 4096\n");
		blksize = 4096;
	}

	filesz = lseek(fd, 0, SEEK_END);
	lseek(fd, 0, SEEK_SET);
	fileblks = (filesz + blksize-1) / blksize;

	err = 0;
	for(blk = 0; blk < fileblks; blk++) {
		long devblk = blk;

		if (ioctl(fd, FIBMAP, &devblk) == -1) {
			if (errno == -EPERM) {
				fprintf(stderr, "got root?\n");
				exit(1);
			}
			printf("%ld: %d (%s)\n",
				blk, errno, strerror(errno));
			err++;
		} else {
			if (start == -1) {
				start = devblk;
				end = devblk;
				first_logical = blk;
			} else {
				if (devblk == end + 1) {
					end++;
				} else {
					printf("%ld-%ld: %ld-%ld (%ld)\n",
						first_logical,
						first_logical+(end-start),
						start, end,
						end - start + 1);
					start = devblk;
					end = devblk;
					first_logical = blk;
				}
			}
		}
	}

	if (start != -1)
		printf("%ld-%ld: %ld-%ld (%ld)\n",
			first_logical,
			first_logical+(end-start),
			start, end,
			end - start + 1);

	exit(err ? 1 : 0);
}

^ permalink raw reply	[flat|nested] 206+ messages in thread

* [OT] Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 15:34                                       ` Jan Hudec
@ 2002-10-08  3:12                                         ` Scott Mcdermott
  2002-10-10 23:49                                           ` Mike Fedyk
  0 siblings, 1 reply; 206+ messages in thread
From: Scott Mcdermott @ 2002-10-08  3:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jan Hudec, Jesse Pollard, Oliver Neukum, Helge Hafting, Martin J. Bligh

Jan Hudec on Mon  7/10 17:34 +0200:
> Well, depends on what we want to measure. If it's on the begining of
> main, it measures library loading time. Then argument parsing, library
> initialization, X initialization etc. can be measured. All those parts
> should be timed so we can see where most time is spent and which can
> be sped up.

newer glibc prelinking support should help here a lot, according to
publshed time trials I have seen with and without the feature.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 18:22                           ` Daniel Phillips
@ 2002-10-08  8:19                             ` Jan Hudec
  0 siblings, 0 replies; 206+ messages in thread
From: Jan Hudec @ 2002-10-08  8:19 UTC (permalink / raw)
  To: Daniel Phillips
  Cc: Jesse Pollard, Rob Landley, Linus Torvalds, Martin J. Bligh,
	linux-kernel

On Mon, Oct 07, 2002 at 08:22:41PM +0200, Daniel Phillips wrote:
> On Monday 07 October 2002 15:56, Jesse Pollard wrote:
> > [the mouse] will still stall everytime the mouse crosses the window border IF the
> > application has specified "enter/leave" event notification. This requires the
> > application to be swapped in to recieve the event. The only fix is locking
> > the application/X libraries into memory.
> 
> That one could be punted with an hourglass cursor, until the events start flowing.
> Well.  Not sure how much this has to do with the kernel...

Nothing. It's X. And it will take another X protocol extension (so it
will suck yet more).

-------------------------------------------------------------------------------
						 Jan 'Bulb' Hudec <bulb@ucw.cz>

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 22:12                                                 ` Andrew Morton
@ 2002-10-08  8:49                                                   ` Padraig Brady
  0 siblings, 0 replies; 206+ messages in thread
From: Padraig Brady @ 2002-10-08  8:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Daniel Phillips, Linus Torvalds, Martin J. Bligh, Oliver Neukum,
	Rob Landley, linux-kernel

Andrew Morton wrote:
> Daniel Phillips wrote:
> 
>>On Monday 07 October 2002 23:55, Linus Torvalds wrote:
>>
>>>On Mon, 7 Oct 2002, Daniel Phillips wrote:
>>>
>>>>>Sure. The mey is:
>>>>
>>>>            ^^^ <---- "bet" ?
>>>
>>>Yeah. What the heck happened to my fingers?
>>
>>Apparently, one of them missed the key it was aiming for and the other one
>>changed hands.
>>
> 
> They don't call him Kubys for nothing.
> 
> I dug out and dusted off Al's Orlov allocator patch.  And found
> a comment which rather helps explain how it works.
> 
> I performance tested this back in November.  See
> http://www.uwsg.iu.edu/hypermail/linux/kernel/0111.1/0281.html
> 
> Bottom line: it's as good as the use-first-fit-everywhere
> approach, and appears to have better long-term antifragmentation
> characteristics.
> 
> I shall test it.

See dirpref (Orlov's allocator) here:
http://www.maths.tcd.ie/~dwmalone/p/usenix02.pdf
I was going to do this myself but of course it's
already done, silly me.

Pádraig.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08  2:50                                         ` Andrew Morton
  2002-10-08  2:54                                           ` Simon Kirby
@ 2002-10-08 12:49                                           ` jlnance
  2002-10-08 17:09                                             ` Andrew Morton
  1 sibling, 1 reply; 206+ messages in thread
From: jlnance @ 2002-10-08 12:49 UTC (permalink / raw)
  To: linux-kernel

On Mon, Oct 07, 2002 at 07:50:27PM -0700, Andrew Morton wrote:

> I have the core code for ext3.  It's at
> http://www.zip.com.au/~akpm/linux/patches/2.4/2.4.19-pre10/ext3-reloc-page.patch
> I never tested it, but that's a formality ;)
> 
> It offers a simple ioctl to reloate a single page's worth of blocks.
> It's fully journalled and recoverable, pagecache coherent, etc.
> But the userspace application which calls that ioctl hasn't been
> written.

Hi Andrew,
    I decided not to let the fact that I have never written any FS code
stand in the way of making suggestions :-) :-)
    Do you think it would be better to make the defragmentation part of
the normal operation of the FS rather than a seperate application.  For
example, if you did a fragmentation check/fix on the last close of a file
you would know that coherency issues were not going to be important.  It
might also give you some way to determine which files were important to
keep close together.

Thanks,

Jim

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07  9:18                                 ` Oliver Neukum
  2002-10-07 14:11                                   ` Jan Hudec
  2002-10-07 15:15                                   ` Martin J. Bligh
@ 2002-10-08 13:49                                   ` Helge Hafting
  2 siblings, 0 replies; 206+ messages in thread
From: Helge Hafting @ 2002-10-08 13:49 UTC (permalink / raw)
  To: Oliver Neukum, linux-kernel

Oliver Neukum wrote:
> 
> On Monday 07 October 2002 10:08, Helge Hafting wrote:
> >  People getting interested in linux
> > seems to believe that openoffice is the msoffice replacement,
> > and that _is_ a huge bloated pig.  It needs 50M to start
> > the text editor - and lots of _cpu_.  It takes a long time
> > to start on a 266MHz machine even when the disk io
> > is avoided by the pagecahce.
> 
> OpenOffice _is_ an important application, whether we like it or not.
> 
Sure.  It is important.  Fortunately it is open source, so
improving on it might be a good idea.  I don't think the kernel
do anything wrong with it - it is simply very big and dead slow.

> How does one measure and profile application startup other than with
> a stopwatch ? I'd like to gather some objective data on this.
> 
> > A snappy desktop is trivial with 2.5, even with a slow machine.
> > Just stay away from gnome and kde, use a ugly fast
> 
> A desktop machine needs to run a desktop enviroment. Only a window manager is
> not enough.

Of course.  My machine (256M, 266MHz) is snappy with 
a netscape, 4-5 opera windows, 5-10 xterms, a few
xemacs'es, a couple of lyx windows and xdvi,
and sometimes a compile or latex running.

This is possibly spread out over 2-3 virtual desktops
provided by icewm.  Switching between them is instantaneous,
although I can see "slow" things like xdvi redraw.  The rest
just appear. Throwing a openoffice into
the mix cause no problems with desktop snappiness,
but openoffice itself is too slow to use.  Particularly
if a cpu hog like gcc/latex is running.  But then
this _is_ a slow machine these days.
> 
> > window manager like icewm or twm (and possibly lots
> > of others I haven't even heard about.)
> > X itself is snappy enough, particularly with increased
> > priority.
> > Take some care when selecting apps (yes - there is choice!)
> > and the desktop is just fine.  Openoffice is a nice
> > package of programs, but there are replacements for most
> > of them if speed is an issue.  If the machine is powerful
> > enough to run ms software snappy then speed probably
> > isn't such a big issue though.
> 
> KDE and friends _are_ not quite optimised for speed. That however doesn't
> mean that the kernel should not make an effort to allow them to run as fast
> as they can.

The kernel should do its best - and it seems to do well too.
I believe KDE and friends may have performance problems
of their own, and stay away from them mostly.  I don't need
_pretty_, merely something that works well.  That might
not sell, but nobody sell 266MHz machines either.

Helge Hafting

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-07 19:36                                     ` Andrew Morton
  2002-10-08  2:36                                       ` Simon Kirby
@ 2002-10-08 13:54                                       ` Helge Hafting
  2002-10-08 15:31                                         ` Andreas Dilger
  1 sibling, 1 reply; 206+ messages in thread
From: Helge Hafting @ 2002-10-08 13:54 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel

Andrew Morton wrote:
> 
[...]
> ext2 and ext3 filesystems are carved up into "block groups", aka
> "cylinder groups".  Each one is 4096*8 blocks - typically 128 MB.
> So you can easily have hundreds of blockgroups on a single partition.
> 
> The inode allocator is designed to arrange that files which are within the
> same directory fall in the same blockgroup, for locality of reference.
> 
> But new directories are placed "far away", in block groups which have
> plenty of free space.  (find_group_dir -> find a blockgroup for a
> directory).
> 
> The thinking here is that files in a separate directory are related,
> and files in different directories are unrelated.  So we can take
> advantage of that heuristic - go and use a new blockgroup each time
> a new directory is created.  This is a levelling algorithm which
> tries to keep all blockgroups at a similar occupancy level.
> That's a good thing, because high occupancy levels lead to fragmentation.
> 
> find_group_other() is basically first-fit-from-start-of-disk, and
> if we use that for directories as well as files, your untar-onto-a-clean-disk
> simply lays everything out in a contiguous chunk.
> 
> Part of the problem here is that it has got worse over time.  The
> size of a blockgroup is hardwired to blocksize*bits-in-a-byte*blocksize.
> But disks keep on getting bigger.  Five years ago (when, presumably, this
> algorithm was designed), a typical partition had, what?  Maybe four
> blockgroups?  Now it has hundreds, and so the "levelling" is levelling
> across hundreds of blockgroups and not just a handful.

If having only "a few" block groups really work better 
(even for todays bigger disks) then bigger
block groups seems like a solution.

changing the on-disk format might not be popular, but there
is no need for that.  Simply regard several on-disk block
groups as a bigger "allocation group" when using the above
algorithm.  This should be perfectly backwards compatible.

Helge Hafting

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08 13:54                                       ` Helge Hafting
@ 2002-10-08 15:31                                         ` Andreas Dilger
  0 siblings, 0 replies; 206+ messages in thread
From: Andreas Dilger @ 2002-10-08 15:31 UTC (permalink / raw)
  To: Helge Hafting; +Cc: Andrew Morton, linux-kernel

On Oct 08, 2002  15:54 +0200, Helge Hafting wrote:
> Andrew Morton wrote:
> > Part of the problem here is that it has got worse over time.  The
> > size of a blockgroup is hardwired to blocksize*bits-in-a-byte*blocksize.
> > But disks keep on getting bigger.  Five years ago (when, presumably, this
> > algorithm was designed), a typical partition had, what?  Maybe four
> > blockgroups?  Now it has hundreds, and so the "levelling" is levelling
> > across hundreds of blockgroups and not just a handful.
> 
> If having only "a few" block groups really work better 
> (even for todays bigger disks) then bigger
> block groups seems like a solution.
> 
> changing the on-disk format might not be popular, but there
> is no need for that.  Simply regard several on-disk block
> groups as a bigger "allocation group" when using the above
> algorithm.  This should be perfectly backwards compatible.

We already have plans for something like this - a "meta blockgroup".
This will help us with several things, actually, so it is likely to
be implemented.

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08  2:59                                           ` Andrew Morton
@ 2002-10-08 16:15                                             ` Theodore Ts'o
  2002-10-08 19:39                                               ` Andrew Morton
  0 siblings, 1 reply; 206+ messages in thread
From: Theodore Ts'o @ 2002-10-08 16:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Daniel Phillips, Chris Friesen, Martin J. Bligh,
	Oliver Neukum, Rob Landley, linux-kernel

On Mon, Oct 07, 2002 at 07:59:26PM -0700, Andrew Morton wrote:
> In the testing which I did, based on Keith Smith's traces, the
> current code really isn't very effective.
> 
> What I did was to run his aging workload an increasing number of
> times.  Then measured the fragmentation of the files which it
> left behind.  I measured the fragmentation simply by timing
> how long it took to read all the files, and compared that to
> how long it took to read the same files when they had been laid
> down on a fresh fs.

What access pattern did you use when you read the files?  Did you
sweep through filesystem directory by directory, or did you use some
other pattern (perhaps random)?

It would also be interesting to get a measure of fragmentation of the
filesystems as measured by e2fsck.  This only measures file
fragmentation, and not file locality on a per-directory (or more
ideally per-directory tree, but establishing where the directory trees
are is difficult).

> >
> > [ administrator hints ]
> >
> 
> Alas, nobody uses them :(

No one will use them if they are need to do so manually.  But if we
can convert a few programs to use them, then it might work.  And
people didn't much use madvise() when it was first introduced either,
but it doesn't mean that the existence of the interface was a bad
idea....

If the current algorithm is so bad, then maybe the trick is to use the
fast-growth optimized allocator as the default, *unless* given a hint
to do so via some magic mkdir flag.  Then if certain programs, such as
adduser (when creating a home directory), "cp -r", "bk clone", tar,
etc. where modified to give hints that the a particular directory was
at the top of a directory tree, then slow-growth optimized allocator
could be used to spread apart directory trees.  No, it's not perfect,
but it should be better not using any hints at all.  (And yes, it will
take a while before the userpsace tools that provide said hints are
widely deployed.)

And if we don't have any user-space hints, then we default to the
fast-growth algorithm, which should make Linus happy.  :-)

> Maybe a mount option?  But I think the current algorithm should
> default to "off".

How about a mount option with the possible values: "fast", "slow",
"hinted", and "auto", with the default being "auto" or "hinted"?
(Where hinted utilizes user-space hints, and "auto" utilizes
user-space hints if present, plus some of the so-called ugly
hueristics which you had discussed?)

						- Ted


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-08  3:00                                             ` Andrew Morton
@ 2002-10-08 16:17                                               ` Theodore Ts'o
  0 siblings, 0 replies; 206+ messages in thread
From: Theodore Ts'o @ 2002-10-08 16:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Simon Kirby, linux-kernel

On Mon, Oct 07, 2002 at 08:00:30PM -0700, Andrew Morton wrote:
> Simon Kirby wrote:
> > 
> > On Mon, Oct 07, 2002 at 07:50:27PM -0700, Andrew Morton wrote:
> > 
> > > Oh tell me about it.
> > >
> > > Appended is the offset->block mapping for my "linux-kernel" mailbox.
> > > Read it and weep...
> > 
> > Eep. :)  Just out of interest, how did you get these mappings?
> > 

Debugfs will also show the mappings, although it's less reliable when
used on a mounted filesystem....

						- Ted

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08 19:39                                               ` Andrew Morton
@ 2002-10-08 17:06                                                 ` Rob Landley
  0 siblings, 0 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-08 17:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Tuesday 08 October 2002 03:39 pm, Andrew Morton wrote:

> Well the current Orlov patch will spread top-level directories,
> so as long as /home is a mountpoint, we're fine.
>
> For more generalality, yes, I think a new chattr flag on the
> parent directory which says "spread my subdirectories out"
> would be a good solution.

Individual sysadmins may not use it much, but getting distributions to put it 
in their install/upgrade software isn't too unlikely...

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08 12:49                                           ` jlnance
@ 2002-10-08 17:09                                             ` Andrew Morton
  2002-10-10 20:53                                               ` Thomas Zimmerman
  0 siblings, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-08 17:09 UTC (permalink / raw)
  To: jlnance; +Cc: linux-kernel

jlnance@intrex.net wrote:
> 
> On Mon, Oct 07, 2002 at 07:50:27PM -0700, Andrew Morton wrote:
> 
> > I have the core code for ext3.  It's at
> > http://www.zip.com.au/~akpm/linux/patches/2.4/2.4.19-pre10/ext3-reloc-page.patch
> > I never tested it, but that's a formality ;)
> >
> > It offers a simple ioctl to reloate a single page's worth of blocks.
> > It's fully journalled and recoverable, pagecache coherent, etc.
> > But the userspace application which calls that ioctl hasn't been
> > written.
> 
> Hi Andrew,
>     I decided not to let the fact that I have never written any FS code
> stand in the way of making suggestions :-) :-)
>     Do you think it would be better to make the defragmentation part of
> the normal operation of the FS rather than a seperate application.  For
> example, if you did a fragmentation check/fix on the last close of a file
> you would know that coherency issues were not going to be important.  It
> might also give you some way to determine which files were important to
> keep close together.
> 

Well the initial approach was to put the minimum functionality
in-kernel and drive it all from userspace.  I that proved to
be inadequate then the kernel-side might need to be grown.

I'd expect that a defrag would be a batch process which is done
during quiet times.  Although one _could_ have a `defragd' which
ticks along all the time I suppose.

A defragmentation algorithm probably would not be a "per file" thing;
it would need to gather a fair amount of state about the fs, or
at least an individual block group before starting to shuffle things.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-08 22:14                             ` Jesse Pollard
@ 2002-10-08 19:11                               ` Rob Landley
  0 siblings, 0 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-08 19:11 UTC (permalink / raw)
  To: Jesse Pollard; +Cc: linux-kernel

On Tuesday 08 October 2002 06:14 pm, Jesse Pollard wrote:

> > On my laptop (with 256 megs ram and 256 megs swap).  Open up 30 or 40
> > konqueror windows of a "this page looks interesting, I'll read it
> > offline" variety until memory's full and you're about 2/3 of the way into
> > swap. (KTimeMon makes this easy to see.)  then do something swap-happy in
> > the background (including downloading a huge file, which causes disk
> > cache to grow and evict stuff, or of course running a big compile).
>
> Out of curiosity, does it also happen if you have no swap?

I'd trigger the OOM killer a lot easier?  (Done it more than once without 
meaning to...)

It used to go into REAL swap meltdown once the swap file was full, because 
it'd start paging out executables and libraries back into their files.  I've 
actually tried to avoid testing that recently, for obvious reasons. :)

As soon as I read, take notes from, index, and close about 40 open web pages, 
I can reboot the sucker without swap.  (I could try to swapoff a heavily 
loaded running system, but I tried that once and the results were NOT 
pretty...)

> It is my understanding that this change will prevent much (not all) of the
> swap activity, giving a quicker response to the mouse events. It should
> increase the amount of actual swap activity, but each activiation will be
> of shorter duration, giving a "better" apparent interactive response.

I haven't been brave enough to run 2.5 on my laptop yet.  (Soon.  I've 
downloaded it, compiled it, but haven't made it through the "what do I need 
to upgrade" list yet.  This sucker's still running 2.4.19 inserted in a 
modified red hat 7.2.)  My test machine at home's an old pentium pro 180 with 
96 megs of ram, so I haven't exactly got the world's highest interactive 
expectations there.

> > You may notice that in mozilla when your rat moves over a link, the mouse
> > pointer turns into a hand anywhere up to several seconds later on a
> > pathologically loaded system.  This usually doesn't stop the pointer from
> > moving if you just want to wander past the link and continue on.
> > "Tooltips" take two or three seconds to pop up, and this is a GOOD
> > thing...
>
> I was thinking more about switching pointer on window entry. I don't think
> a link is implemented as a window. (I thought is was a proximity check in
> an already loaded event). Or places that do pointer grabs (fortunately for
> me most of the dialog boxes I see in X don't do this).

All sorts of things can cause a stall at the edge of the window.  I've seen 
it happen at the edge of the little animated mozilla logo.

To drive a 2.4 system to its knees, all you have to do is "cat /dev/zero > 
bigfile" on a partition with a few gigabytes free, and then scrub the mouse a 
bit.

Tried it on a friend's workstation a minute ago.  The result was NOT pretty.  
2.4.19 is a lot better about this than whatever shipped with his SuSE box, 
but if you want to make desktop interactive feel suck, try running this in 
the following on a system that's a ways into swap.  (It needs 4 gigs of disk 
space, which should be more ram than most people have...)

while true
do
  dd if=/dev/zero of=tempfile bs=65536 count=65536
  rm tempfile
done

It's certainly improving.  On 2.4.19, the mouse cursor only really seems to 
get truly jerky when you exhaust the swap so badly it pages to the executable 
files.  (Then again, every few minutes it goes consistently jerky for several 
seconds.)

But by the same token, I have a server running 2.4.19 that when receiving a 
big file transfer through the 100baseT and blasting it to disk, goes 
completely into la-la land and won't allow new ssh connections until the 
transfer ends.  (I've given it a 4 gigabyte transfer and waited minutes.  The 
prompt shows up about one second after the transfer ends, and I had more than 
one machine queued waiting like that...)

I'm hoping 2.5 categorically fixes this, but haven't put it on a production 
machine yet.  Maybe I'll be able to slap together an appropriate spare box in 
a few days.  (P.S.  Did make meuconfig crashing when you tried to enter the 
ALSA menu ever get fixed?  Set me back half an hour, that did...)

> Also the "tooltips" thing is implemented as a mouse window entry event
> which in turn sets a timer event. A mouse window exit event generates
> a timer cancel.
>
> One of the most amazing thing to me is the total number
> of events that occur on something a simple as a scroll bar. Entering a
> window can generate 8-10 events depending which toolkit is used.
> First the pointer character is changed, then events cascade since the
> border of a scrollbar may actually have 2 or 3 windows, each with
> a different requirement, but requesting a window entry/exit event.

Not exactly an easy problem to solve from kernel space, no.  But when 
unrelated processes can seriously interact with each other, you can't help 
but think the kernel is involved somehow... :)

> This is where a slightly different method of handling background processes
> (and I/O requests). A background process should have a lower processing
> priority.

1) This doesn't affect I/O.

2) Swapping, running executables, stating files...  all I/O the high priority 
process may need to do.

Hmmm...  You know,it might be a good idea to rip the swap file out of that 
SERVER (which has 256 megs of ram also, that should be plenty) and see if 
that makes the incoming transfer hang go away...

> The I/O activity generated by that background process should also
> have a lower priority. The deadline I/O scheduler should/would/could then
> keep the forground processes (X server, apps with exposed windows) running
> by processing their I/O first.

This is what I'm hoping.  This is not the 2.4 reality, I'll tell you that. :)

> This also assumes that the X server MIGHT be able to change the priority of
> processes attached to hidden windows (iconified/covered).

Ingo was thinking about letting normal processes nice themselves up a couple 
of levels.  Enough that abuse wouldn't matter too much, but so that processes 
intended to be interactive could identify themselves as such.

Part of the problem is that "nice" is really trying to say two things.  "I 
want more CPU time" and "I want lower latency".  In theory, interactive 
processes could get SHORTER time slices (subject to some minimum), they just 
need to be dispatched more rapidly when they unblock.  Possibly the scheduler 
needs some kind of hint in addition to just a number.

> It doesn't
> address those processes that may be running detached (cron or started by
> terminal emulators) which would act like foreground processes. Though the
> terminal emulators could be detected, and have all subprocesses of the
> controlling pty reduced in priority.... Also have to recognize when they
> should again be elevated too... (or even if they should be. These things
> can take a LOT of resources). It would also have to be under the control of
> the user, since the user may need the background compile done ASAP (even if
> the user DOES run a solitare game covering the terminal window...)

Again, two separate scheduler problems.  A process that wants big long 
timeslices but doesn't care about gaps between them, and a process that wants 
short time slices in 30 miliseconds or less or it's free. :)

An artifact of the current O(1) scheduler is that if you nice a process way 
the heck DOWN it may finish slightly faster, because its timeslices are 
longer when it gets them, so the cache stays hot.

Strange but true, or at least "worked for me"...

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08 16:15                                             ` Theodore Ts'o
@ 2002-10-08 19:39                                               ` Andrew Morton
  2002-10-08 17:06                                                 ` Rob Landley
  0 siblings, 1 reply; 206+ messages in thread
From: Andrew Morton @ 2002-10-08 19:39 UTC (permalink / raw)
  To: Theodore Ts'o
  Cc: Linus Torvalds, Daniel Phillips, Chris Friesen, Martin J. Bligh,
	Oliver Neukum, Rob Landley, linux-kernel

Theodore Ts'o wrote:
> 
> On Mon, Oct 07, 2002 at 07:59:26PM -0700, Andrew Morton wrote:
> > In the testing which I did, based on Keith Smith's traces, the
> > current code really isn't very effective.
> >
> > What I did was to run his aging workload an increasing number of
> > times.  Then measured the fragmentation of the files which it
> > left behind.  I measured the fragmentation simply by timing
> > how long it took to read all the files, and compared that to
> > how long it took to read the same files when they had been laid
> > down on a fresh fs.
> 
> What access pattern did you use when you read the files?  Did you
> sweep through filesystem directory by directory, or did you use some
> other pattern (perhaps random)?

Well this is all rather dim in my memory, so the confidence level
is drooping.  I am sure that the timing was a single find | xargs cat
thing.  I also know that I investigated whether the increased time
was due to the metadata access or the data access.  I _think_ it was
mainly metadata.

But it all needs to be redone, really.

> ...
> > Maybe a mount option?  But I think the current algorithm should
> > default to "off".
> 
> How about a mount option with the possible values: "fast", "slow",
> "hinted", and "auto", with the default being "auto" or "hinted"?
> (Where hinted utilizes user-space hints, and "auto" utilizes
> user-space hints if present, plus some of the so-called ugly
> hueristics which you had discussed?)
> 

Well the current Orlov patch will spread top-level directories,
so as long as /home is a mountpoint, we're fine.

For more generalality, yes, I think a new chattr flag on the
parent directory which says "spread my subdirectories out"
would be a good solution.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 14:03                           ` Rob Landley
@ 2002-10-08 22:14                             ` Jesse Pollard
  2002-10-08 19:11                               ` Rob Landley
  2002-10-09  8:17                             ` Alexander Kellett
  1 sibling, 1 reply; 206+ messages in thread
From: Jesse Pollard @ 2002-10-08 22:14 UTC (permalink / raw)
  To: Rob Landley; +Cc: linux-kernel

On Monday 07 October 2002 09:03 am, Rob Landley wrote:
> On Monday 07 October 2002 09:56 am, Jesse Pollard wrote:
> > In other words... don't swap.
>
> "Don't swap this bit", anyway.
>
> > If an application has to be swapped out, all
> > bets are off on response time.
>
> Alright, breaking the problem down into specific, bite-sized chunks, seeing
> what's easily measurable, and then picking the lowest hanging fruit:
>
> The frequency of mouse pointer stalls, and the worst case response time, is
> probably something an automated benchmark could measure.  (Z-order's a
> tricker problem because the window manager's involved, but mouse stalls are
> EASY to cause.)
>
> On my laptop (with 256 megs ram and 256 megs swap).  Open up 30 or 40
> konqueror windows of a "this page looks interesting, I'll read it offline"
> variety until memory's full and you're about 2/3 of the way into swap.
> (KTimeMon makes this easy to see.)  then do something swap-happy in the
> background (including downloading a huge file, which causes disk cache to
> grow and evict stuff, or of course running a big compile).

Out of curiosity, does it also happen if you have no swap?
It is my understanding that this change will prevent much (not all) of the
swap activity, giving a quicker response to the mouse events. It should
increase the amount of actual swap activity, but each activiation will be of
shorter duration, giving a "better" apparent interactive response.

> No matter how much ram the system has, with six desktops full of open
> windows I can usually drive it DEEP into swap, without even picking an easy
> target like star/openoffice.  (Yeah, KDE sucketh.  And X should be able to
> figure out that windows not currently being displayed at all (completely
> behind other windows, on another desktop, etc) can be swapped out.  But
> it's just not designed that way...)

partly depends on whether the X window buffers are page aligned... If they
were then that should be the result. I bet they arn't page aligned.

> > > Even the new threading work can potentially help X spin off a dedicated
> > > high-priority "update the mouse position, and manipulate window borders
> > > and z order, and never swap this thread out" thread.  (I remember the
> > > way OS/2 used to cheat and give extra time slices to anything that got
> > > a Presentation Manager window event, so you could literally speed up
> > > your program on a loaded system by "scrubbing" the mouse across it
> > > repeatedly. The resulting perception was a snappy desktop, whatever the
> > > reality was.)
> >
> > Not really - the application may want the mouse pointer changed, update
> > data based on where the mouse is located (see what happens to a rule bar
> > on image/word processors). There is also the possibility that multiple
> > processes are watching the mouse.
>
> You may notice that in mozilla when your rat moves over a link, the mouse
> pointer turns into a hand anywhere up to several seconds later on a
> pathologically loaded system.  This usually doesn't stop the pointer from
> moving if you just want to wander past the link and continue on. 
> "Tooltips" take two or three seconds to pop up, and this is a GOOD thing...

I was thinking more about switching pointer on window entry. I don't think
a link is implemented as a window. (I thought is was a proximity check in an
already loaded event). Or places that do pointer grabs (fortunately for me
most of the dialog boxes I see in X don't do this).

Also the "tooltips" thing is implemented as a mouse window entry event
which in turn sets a timer event. A mouse window exit event generates
a timer cancel.

One of the most amazing thing to me is the total number
of events that occur on something a simple as a scroll bar. Entering a
window can generate 8-10 events depending which toolkit is used.
First the pointer character is changed, then events cascade since the
border of a scrollbar may actually have 2 or 3 windows, each with
a different requirement, but requesting a window entry/exit event.

> if the mouse movement stalls, you can't navigate with a nipple mouse or
> touchpad (which is all you get on a laptop), 'cause you'll overshoot.
> Having the button under the mouse highlight is secondary to being able to
> get the mouse over the button.
>
> When the system isn't loaded anymore (went away while a compile finished or
> a file downloaded), you get one or two small (1/4 second) stalls as stuff
> swaps back in and then life is good.  It's when you swap stuff in and then
> it swaps back out after 3 seconds of inactivity that it gets to be a real
> pain (something the deadline I/O scheduler is supposed to help)...

This is where a slightly different method of handling background processes
(and I/O requests). A background process should have a lower processing
priority. The I/O activity generated by that background process should also
have a lower priority. The deadline I/O scheduler should/would/could then
keep the forground processes (X server, apps with exposed windows) running
by processing their I/O first.

This also assumes that the X server MIGHT be able to change the priority of
processes attached to hidden windows (iconified/covered). It doesn't address
those processes that may be running detached (cron or started by terminal
emulators) which would act like foreground processes. Though the terminal
emulators could be detected, and have all subprocesses of the controlling
pty reduced in priority.... Also have to recognize when they should again
be elevated too... (or even if they should be. These things can take a LOT
of resources). It would also have to be under the control of the user, since
the user may need the background compile done ASAP (even if the user
DOES run a solitare game covering the terminal window...)

> Maybe the correct thing here is a user space fix, with X throwing certain
> event handlers into an mlocked shared library, just so your mouse pointer
> always updates smoothly.  But I do know a lot of work has gone into making
> more intelligent swapping decisions (fundamentally, that's all VM work
> really is), and it's certainly a heck of a lot better than the 2.4.6 days
> where you had to go get a beverage when it went swap-happy and it could be
> 30 seconds between pointer updates.

Unfortunately, X cannot control the event handlers. That is the rest of the
application, and you end up locking the entire application in memory.

-- 
-------------------------------------------------------------------------
Jesse I Pollard, II
Email: pollard@navo.hpc.mil

Any opinions expressed are solely my own.

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-07 14:03                           ` Rob Landley
  2002-10-08 22:14                             ` Jesse Pollard
@ 2002-10-09  8:17                             ` Alexander Kellett
  1 sibling, 0 replies; 206+ messages in thread
From: Alexander Kellett @ 2002-10-09  8:17 UTC (permalink / raw)
  To: Rob Landley; +Cc: Jesse Pollard, linux-kernel

On Mon, Oct 07, 2002 at 10:03:16AM -0400, Rob Landley wrote:
> The frequency of mouse pointer stalls, and the worst case response time, is 
> probably something an automated benchmark could measure.  (Z-order's a 
> tricker problem because the window manager's involved, but mouse stalls are 
> EASY to cause.)

actually with low-latency, preempt and X's new silken mouse
stuff i haven't had any real mouse pointer stalls in a while.
well, apart from my maxtor drive stalling my entire system 
(vmstat included) for 2 seconds at a time when i get it to
pump its full 20mb/s (and yes, dma is enabled).

Alex

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0  -  (NUMA))
  2002-10-08 17:09                                             ` Andrew Morton
@ 2002-10-10 20:53                                               ` Thomas Zimmerman
  0 siblings, 0 replies; 206+ messages in thread
From: Thomas Zimmerman @ 2002-10-10 20:53 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1311 bytes --]

On Tue, 08 Oct 2002 10:09:38 -0700
Andrew Morton <akpm@digeo.com> wrote:
[snip]
> Well the initial approach was to put the minimum functionality
> in-kernel and drive it all from userspace.  I that proved to
> be inadequate then the kernel-side might need to be grown.
> 
> I'd expect that a defrag would be a batch process which is done
> during quiet times.  Although one _could_ have a `defragd' which
> ticks along all the time I suppose.
> 
> A defragmentation algorithm probably would not be a "per file" thing;
> it would need to gather a fair amount of state about the fs, or
> at least an individual block group before starting to shuffle things.

I seem to remember a "drive optimzier" on an old SE Mac. It would move
files and dirs about so that commonly used files all sat together. It
would run in the background too...after the disk was idle for about 5
minuest (configurable, iirc) it would go to work moving things about. It
really helped, as programs and used libs usually all sat in nice self
contained directories. I wounder if load times could be significantly
reduced by having libraries/programs fault in w/o all the seeking that
goes on on X load; as I first test, I guess, I'll have to see if
"prefaulting" all the X/kde dependences in help much.

Thomas "all lurk, no code" Zimmerman



[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-08  3:12                                         ` [OT] " Scott Mcdermott
@ 2002-10-10 23:49                                           ` Mike Fedyk
  0 siblings, 0 replies; 206+ messages in thread
From: Mike Fedyk @ 2002-10-10 23:49 UTC (permalink / raw)
  To: Scott Mcdermott
  Cc: linux-kernel, Jan Hudec, Jesse Pollard, Oliver Neukum,
	Helge Hafting, Martin J. Bligh

On Mon, Oct 07, 2002 at 11:12:04PM -0400, Scott Mcdermott wrote:
> Jan Hudec on Mon  7/10 17:34 +0200:
> > Well, depends on what we want to measure. If it's on the begining of
> > main, it measures library loading time. Then argument parsing, library
> > initialization, X initialization etc. can be measured. All those parts
> > should be timed so we can see where most time is spent and which can
> > be sped up.
> 
> newer glibc prelinking support should help here a lot, according to
> publshed time trials I have seen with and without the feature.

Define newer.

Latest 2.2, or upcoming 3.0?

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 - (NUMA))
  2002-10-07 21:16                                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
  2002-10-07 23:47                                           ` jw schultz
@ 2002-10-11  0:02                                           ` Mike Fedyk
  1 sibling, 0 replies; 206+ messages in thread
From: Mike Fedyk @ 2002-10-11  0:02 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linus Torvalds, Alan Cox, Daniel Phillips, Martin J. Bligh,
	Oliver Neukum, Rob Landley, Linux Kernel Mailing List

On Mon, Oct 07, 2002 at 02:16:29PM -0700, Andrew Morton wrote:
> Last time, Al suggested that we always use the find_group_other() approach
> if the directory is being made at the top-level of the filesystem.  So
> if /home is a mountpoint, the user directories get spread out.
> 
> I think this, and the UID comparison will be good enough.

Not everyone puts /home or similar on a seperate mount point.  Why not
spread them out always for uid 0 and the parent directory is older than X?

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-11 23:53                         ` Hans Reiser
@ 2002-10-11 20:26                           ` Rob Landley
  2002-10-12  4:14                             ` Nick LeRoy
                                               ` (2 more replies)
  0 siblings, 3 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-11 20:26 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Martin J. Bligh, linux-kernel

On Friday 11 October 2002 07:53 pm, Hans Reiser wrote:
> Rob Landley wrote:
> >The new uncharted territory for Linux, and the next major
> > order-of-magnitude jump in the installed base, is the desktop.  A kernel
> > that could make a credible stab at the desktop  would certainly be 3.0
> > material.  And the work that matters for the desktop  is LATENCY work. 
> > Not SMP, not throughput, not more memory.  Latency.  O(1), deadline I/O
> > scheduler, rmap, preempt, shorter clock ticks,
>
> I must confess to thinking that namespace work is the most strategic
> upcoming battle between Linux and Windows, but probably I am biased in
> this regard.;-)  MS seems to think it also, given the rumors that OFS is
> where they are shifting their focus away from the browser and over to
> for Longhorn....

If you're talking about driverfs (kfs, kernelfs, kernfs...  i think my vote 
really is for patfs here, actually :), it is indeed seriously cool, but most 
of it's potential coolness rather than active (kinetic?) coolness.  It's 
infrastructure for cool things to be built on top of.

For example,  handling removable media and transient network resources has 
always been a bit of a sore spot for unix derivatives.   "mount' doesn't 
combine well with ejecting a floppy, and hacks like mcopy would have to be 
built into the shell, or some kind of library to be sufficiently generic.  
(Your web browser can't right click->save as to "a:".)  And most cd-roms I've 
tried still won't eject when you hit the button unless you unmount the 
filesystem first.  there was talking about fixing this back in 2.3.  Can't 
say i've really thumped on it in 2.5, IDE hasn't been working long enough 
yet.  NFS has a "don't hang my entire OS" mount option, which I'm told is a 
kludge of biblical proportions, but I've mostly stayed away from NFS, so I 
really couldn't say.)

MS has been trying and failing to have a coherent naming policy for years.  
Two years ago, the active directory hype.  I still haven't seen a better 
naming system than the amiga (where you could dynamically create a ramdisk by 
just copying something to "ram:", that was cool.)

A little side project I'm working on now (in my copious free time) is mount 
point relocation support.  (You can mount the same filesystem a second time 
in another location (mount --bind makes this easy), and they share a 
superblock so open files should be happy, but you still can't detach the 
first mount point.  Not with a hacksaw, or explosives...)  It's more an 
excuse to learn the new VFS layer than anything else, but it's functionality 
I would in fact have a use for, strange enough...

I'm also looking for an "unmount --force" option that works on something 
other than NFS.  Close all active filehandles (the programs using it can just 
deal with EBADF or whatever), flush the buffers to disk, and unmount.  None 
of this "oh I can't do that, you have a zombie process with an open file...", 
I want  "guillotine this filesystem pronto, capice?" behavior.

Of course loopback mounts would be kind of upset about this, but to be 
honest: tough.  The loopback block device gives them an I/O error, and the 
filesystem should just cope.  Floppies do this all the time with dust and cat 
hair and stuff...

Of course I don't yet know 1/10 as much about the VFS as I need to, but I'm 
learning.  Slowly...

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
                                           ` (4 preceding siblings ...)
  2002-10-07 13:56                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Jesse Pollard
@ 2002-10-11 23:53                         ` Hans Reiser
  2002-10-11 20:26                           ` Rob Landley
  5 siblings, 1 reply; 206+ messages in thread
From: Hans Reiser @ 2002-10-11 23:53 UTC (permalink / raw)
  To: Rob Landley; +Cc: Linus Torvalds, Martin J. Bligh, linux-kernel

Rob Landley wrote:

>The new uncharted territory for Linux, and the next major order-of-magnitude 
>jump in the installed base, is the desktop.  A kernel that could make a 
>credible stab at the desktop  would certainly be 3.0 material.  And the work 
>that matters for the desktop  is LATENCY work.  Not SMP, not throughput, not 
>more memory.  Latency.  O(1), deadline I/O scheduler, rmap, preempt, shorter 
>clock ticks, 
>
>  
>
I must confess to thinking that namespace work is the most strategic 
upcoming battle between Linux and Windows, but probably I am biased in 
this regard.;-)  MS seems to think it also, given the rumors that OFS is 
where they are shifting their focus away from the browser and over to 
for Longhorn....


Hans


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-11 20:26                           ` Rob Landley
@ 2002-10-12  4:14                             ` Nick LeRoy
  2002-10-13 17:27                               ` Rob Landley
  2002-10-12 10:03                             ` Hans Reiser
  2002-10-12 11:42                             ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Matthias Andree
  2 siblings, 1 reply; 206+ messages in thread
From: Nick LeRoy @ 2002-10-12  4:14 UTC (permalink / raw)
  To: Rob Landley, Hans Reiser; +Cc: Martin J. Bligh, linux-kernel

On Friday 11 October 2002 03:26 pm, Rob Landley wrote:
> On Friday 11 October 2002 07:53 pm, Hans Reiser wrote:
<snip>
> A little side project I'm working on now (in my copious free time) is mount
> point relocation support.  (You can mount the same filesystem a second time
> in another location (mount --bind makes this easy), and they share a
> superblock so open files should be happy, but you still can't detach the
> first mount point.  Not with a hacksaw, or explosives...)  It's more an
> excuse to learn the new VFS layer than anything else, but it's
> functionality I would in fact have a use for, strange enough...

Not quite sure that I'm following the _why_ of this one, but maybe I'm just 
slow.

> I'm also looking for an "unmount --force" option that works on something
> other than NFS.  Close all active filehandles (the programs using it can
> just deal with EBADF or whatever), flush the buffers to disk, and unmount. 
> None of this "oh I can't do that, you have a zombie process with an open
> file...", I want  "guillotine this filesystem pronto, capice?" behavior.

Now _this_ I *like*.  I've wanted this for _a long time_.  Not that I have 
that much spare time, but I'd like to help if I can!

> Of course loopback mounts would be kind of upset about this, but to be
> honest: tough.  The loopback block device gives them an I/O error, and the
> filesystem should just cope.  Floppies do this all the time with dust and
> cat hair and stuff...

Yup.  This is required sometimes.  Ever have a CD mounted that the (#$)# 
kernel won't let you umount even though lsof and /proc insist that's there's 
nothing open, but all you can do is an fscking reboot?!!!

Thanks

-Nick

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-11 20:26                           ` Rob Landley
  2002-10-12  4:14                             ` Nick LeRoy
@ 2002-10-12 10:03                             ` Hans Reiser
  2002-10-13 17:32                               ` Rob Landley
  2002-10-12 11:42                             ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Matthias Andree
  2 siblings, 1 reply; 206+ messages in thread
From: Hans Reiser @ 2002-10-12 10:03 UTC (permalink / raw)
  To: Rob Landley; +Cc: Martin J. Bligh, linux-kernel

Rob Landley wrote:

>I'm also looking for an "unmount --force" option that works on something 
>other than NFS.  Close all active filehandles (the programs using it can just 
>deal with EBADF or whatever), flush the buffers to disk, and unmount.  None 
>of this "oh I can't do that, you have a zombie process with an open file...", 
>I want  "guillotine this filesystem pronto, capice?" behavior.
>
This sounds useful.  It would be nice if umount prompted you rather than 
refusing.

>
>Of course loopback mounts would be kind of upset about this, but to be 
>honest: tough.  The loopback block device gives them an I/O error, and the 
>filesystem should just cope.  Floppies do this all the time with dust and cat 
>hair and stuff...
>
>Of course I don't yet know 1/10 as much about the VFS as I need to, but I'm 
>learning.  Slowly...
>
>Rob
>
>
>  
>




^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-11 20:26                           ` Rob Landley
  2002-10-12  4:14                             ` Nick LeRoy
  2002-10-12 10:03                             ` Hans Reiser
@ 2002-10-12 11:42                             ` Matthias Andree
  2002-10-12 14:56                               ` Hugh Dickins
  2 siblings, 1 reply; 206+ messages in thread
From: Matthias Andree @ 2002-10-12 11:42 UTC (permalink / raw)
  To: linux-kernel

On Fri, 11 Oct 2002, Rob Landley wrote:

> I'm also looking for an "unmount --force" option that works on something 
> other than NFS.  Close all active filehandles (the programs using it can just 
> deal with EBADF or whatever), flush the buffers to disk, and unmount.  None 
> of this "oh I can't do that, you have a zombie process with an open file...", 
> I want  "guillotine this filesystem pronto, capice?" behavior.

Seconded.

The patch at the URL below used to work back with 2.4.9, I did not track
what has become of it, if it still applies, I haven't needed it recently
or if so, Alt-SysRq was fair enough for me. Maybe just updating this
badfs and forced umount patch for 2.4.20 would suffice:

http://www.moses.uklinux.net/patches/forced-umount-2.4.9.patch

It gives me one reject in fs/super.c that I don't know how to fix:

***************
*** 1145,1150 ****
  		return retval;
  	}
  
  	spin_lock(&dcache_lock);
  
  	if (atomic_read(&sb->s_active) > 1) {
--- 1172,1180 ----
  		return retval;
  	}
  
+ 	if (flags&MNT_FORCE)
+ 		quiesce_filesystem(mnt);
+ 
  	spin_lock(&dcache_lock);
  
  	if (atomic_read(&sb->s_active) > 1) {

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-12 11:42                             ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Matthias Andree
@ 2002-10-12 14:56                               ` Hugh Dickins
  0 siblings, 0 replies; 206+ messages in thread
From: Hugh Dickins @ 2002-10-12 14:56 UTC (permalink / raw)
  To: Matthias Andree; +Cc: Tigran Aivazian, Rob Landley, linux-kernel

On Sat, 12 Oct 2002, Matthias Andree wrote:
> On Fri, 11 Oct 2002, Rob Landley wrote:
> > I'm also looking for an "unmount --force" option that works on something 
> > other than NFS.  Close all active filehandles (the programs using it can just 
> > deal with EBADF or whatever), flush the buffers to disk, and unmount.  None 
> > of this "oh I can't do that, you have a zombie process with an open file...", 
> > I want  "guillotine this filesystem pronto, capice?" behavior.
> 
> Seconded.
> 
> The patch at the URL below used to work back with 2.4.9, I did not track
> what has become of it, if it still applies, I haven't needed it recently
> or if so, Alt-SysRq was fair enough for me. Maybe just updating this
> badfs and forced umount patch for 2.4.20 would suffice:
> 
> http://www.moses.uklinux.net/patches/forced-umount-2.4.9.patch
> 
> It gives me one reject in fs/super.c that I don't know how to fix:

Tigran did update his forced umount patch to 2.4.18,
here's a built but untested patch against 2.4.20-pre10 ...

--- 2.4.20-pre10/fs/Makefile	Wed Oct  9 11:53:45 2002
+++ forcedumount/fs/Makefile	Sat Oct 12 15:24:11 2002
@@ -68,6 +68,7 @@
 subdir-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs
 subdir-$(CONFIG_BEFS_FS)	+= befs
 subdir-$(CONFIG_JFS_FS)		+= jfs
+subdir-y			+= badfs
 
 
 obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
--- 2.4.20-pre10/fs/badfs/Makefile	Thu Jan  1 00:00:00 1970
+++ forcedumount/fs/badfs/Makefile	Sat Oct 12 15:24:11 2002
@@ -0,0 +1,8 @@
+#
+# Makefile for badfs filesystem.
+#
+
+O_TARGET := badfs.o
+obj-y   := inode.o
+
+include $(TOPDIR)/Rules.make
--- 2.4.20-pre10/fs/badfs/inode.c	Thu Jan  1 00:00:00 1970
+++ forcedumount/fs/badfs/inode.c	Sat Oct 12 15:24:11 2002
@@ -0,0 +1,275 @@
+/*
+ *  badfs - the Bad Filesystem
+ *
+ *  Author - Tigran Aivazian <tigran@veritas.com>
+ *
+ *  Thanks to:
+ *  	Manfred Spraul <manfred@colorfullife.com>, for useful comments.
+ *
+ *  This file is released under the GPL.
+ *
+ *  The badfs filesystem is used by forced umount ('umount -f' command)
+ *  to move inodes that keep the filesystem being umounted busy to it.
+ *
+ *  The entry point into this module is via quiesce_filesystem() called
+ *  from fs/super.c:do_umount() if MNT_FORCE is passed.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+
+#define BADFS_MAGIC	0xBADF5001
+
+static struct super_block *badfs_read_super(struct super_block *,void *,int);
+
+#define FS_FLAGS_BADFS	(FS_NOMOUNT | FS_SINGLE)
+static DECLARE_FSTYPE(badfs_fs_type,"badfs",badfs_read_super,FS_FLAGS_BADFS);
+
+static struct vfsmount *badfs_mnt;	/* returned by kern_mount() */
+static struct super_block *badfs_sb;	/* badfs_mnt->mnt_sb */
+static struct dentry *badfs_root;	/* badfs_sb->s_root */
+
+static int __init init_badfs_fs(void)
+{
+	int err = register_filesystem(&badfs_fs_type);
+
+	if (!err) {
+		badfs_mnt = kern_mount(&badfs_fs_type);
+		if (IS_ERR(badfs_mnt)) {
+			err = PTR_ERR(badfs_mnt);
+			unregister_filesystem(&badfs_fs_type);
+		} else {
+			badfs_sb = badfs_mnt->mnt_sb;
+			err = 0;
+		}
+	}
+	return err;
+}
+
+static struct inode *badfs_get_inode(struct super_block *sb, int mode)
+{
+	struct inode *inode = get_empty_inode();
+
+	if (inode) {
+		make_bad_inode(inode);
+		inode->i_sb = sb;
+		inode->i_dev = sb->s_dev;
+		inode->i_mode = mode;
+		inode->i_nlink = 1;
+		inode->i_size = 0;
+		inode->i_blocks = 0;
+	}
+	return inode;
+}
+
+/* VFS ->read_super() method */
+static struct super_block *badfs_read_super(struct super_block * sb, 
+		void * data, int silent)
+{
+	static struct super_operations badfs_ops = {};
+	struct inode * root = badfs_get_inode(sb, S_IFDIR|S_IRUSR|S_IWUSR);
+
+	if (!root)
+		return NULL;
+	sb->s_blocksize = 1024;
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = BADFS_MAGIC;
+	sb->s_op = &badfs_ops;
+	badfs_root = sb->s_root = d_alloc(NULL, 
+			&(const struct qstr){ "bad:", 5, 0});
+	if (!badfs_root) {
+		iput(root);
+		return NULL;
+	}
+	sb->s_root->d_sb = sb;
+	sb->s_root->d_parent = sb->s_root;
+	d_instantiate(sb->s_root, root);
+	return sb;
+}
+
+static void disable_pwd(struct fs_struct *fs)
+{
+	struct inode *inode;
+	struct dentry *dentry;
+
+	inode = badfs_get_inode(badfs_sb, S_IFDIR|0755);
+	if (!inode) {
+		printk(KERN_ERR "disable_pwd(): can't allocate inode\n");
+		return;
+	}
+	dentry = d_alloc(badfs_root, &(const struct qstr){"dead_pwd", 8, 0});
+	if (!dentry) {
+		iput(inode);
+		printk(KERN_ERR "disable_pwd(): can't allocate dentry\n");
+		return;
+	}
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	set_fs_pwd(fs, badfs_mnt, dentry);
+}
+
+static void disable_root(struct fs_struct *fs)
+{
+	struct inode *inode;
+	struct dentry *dentry;
+
+	inode = badfs_get_inode(badfs_sb, S_IFDIR|0755);
+	if (!inode) {
+		printk(KERN_ERR "disable_root(): can't allocate inode\n");
+		return;
+	}
+	dentry = d_alloc(badfs_root, &(const struct qstr){"dead_root", 9, 0});
+	if (!dentry) {
+		iput(inode);
+		printk(KERN_ERR "disable_root(): can't allocate dentry\n");
+		return;
+	}
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	set_fs_root(fs, badfs_mnt, dentry);
+}
+
+/* called from do_umount() if MNT_FORCE is specified */
+void quiesce_filesystem(struct vfsmount *mnt)
+{
+	struct task_struct *p;
+	struct file *file;
+	struct inode *inode;
+
+	/* we do three passes through the task list, examining:
+	 *   1. p->fs{->pwd,root} that can keep this mnt busy
+	 *   2. p->files, i.e. open files (we do_close them)
+	 *   3. p->mm, i.e. mmaped files (we simply do_munmap them)
+	 * There is no guarantee that by the time we restart the loop
+	 * the amount of work to do in the loop has not increased.
+	 */
+repeat1:
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		struct fs_struct *fs;
+
+		/* get a reference to p->fs */
+		task_lock(p);
+		fs = p->fs;
+		if (!fs) {
+			task_unlock(p);
+			continue;
+		} else
+			atomic_inc(&fs->count);
+		task_unlock(p);
+
+		if (fs->pwdmnt == mnt) {
+			read_unlock(&tasklist_lock);
+			disable_pwd(fs); /* may sleep */
+			put_fs_struct(fs);
+			goto repeat1;
+		}
+		if (fs->rootmnt == mnt) {
+			read_unlock(&tasklist_lock);
+			disable_root(fs); /* may sleep */
+			put_fs_struct(fs);
+			goto repeat1;
+		}
+		put_fs_struct(fs);
+	}
+	read_unlock(&tasklist_lock);
+
+repeat2:
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		unsigned int fd, j = 0;
+		struct files_struct *files;
+
+		/* get a reference to p->files */
+		task_lock(p);
+		files = p->files;
+		if (!files) {
+			task_unlock(p);
+			continue;
+		} else {
+			atomic_inc(&files->count);
+			write_lock(&files->file_lock);
+		}
+		task_unlock(p);
+
+		/* check if this process has open files here */
+		while (1) {
+			unsigned long set;
+
+			fd = j * __NFDBITS;
+			if (fd >= files->max_fdset || fd >= files->max_fds)
+				break;
+			set = files->open_fds->fds_bits[j++];
+			while (set) {
+				if (set & 1) {
+					file = files->fd[fd];
+					if (file) {
+						inode = file->f_dentry->d_inode;
+						if (inode && (file->f_vfsmnt==mnt)) {
+							files->fd[fd] = NULL;	
+							FD_CLR(fd, files->close_on_exec);
+							__put_unused_fd(files, fd);
+							write_unlock(&files->file_lock);
+							read_unlock(&tasklist_lock);
+							put_files_struct(files);
+							filp_close(file, files);
+							goto repeat2;
+						}
+					}
+				}
+				fd++;
+				set >>= 1;
+			}
+		}
+		write_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	read_unlock(&tasklist_lock);
+
+repeat3:
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		struct mm_struct *mm;
+		struct vm_area_struct *vma;
+
+		/* get a reference to p->mm */
+		task_lock(p);
+		mm = p->mm;
+		if (!mm) {
+			task_unlock(p);
+			continue;
+		} else
+			atomic_inc(&mm->mm_users);
+		task_unlock(p);
+
+		/* check for mmap'd files and unmap them */
+		spin_lock(&mm->page_table_lock);
+		for (vma = mm->mmap; vma; vma=vma->vm_next) {
+			file = vma->vm_file;
+			if (!file)
+				continue;
+			inode = file->f_dentry->d_inode;
+			if (!inode || !inode->i_sb)
+				continue;
+			if (file->f_vfsmnt == mnt) {
+				spin_unlock(&mm->page_table_lock);
+				read_unlock(&tasklist_lock);
+				down_write(&mm->mmap_sem);
+				do_munmap(mm, vma->vm_start, 
+					vma->vm_end - vma->vm_start);
+				up_write(&mm->mmap_sem);
+				mmput(mm);
+				goto repeat3;
+			}
+		}
+		spin_unlock(&mm->page_table_lock);
+		mmput(mm);
+	}
+	read_unlock(&tasklist_lock);
+}
+
+module_init(init_badfs_fs)
+MODULE_LICENSE("GPL");
--- 2.4.20-pre10/fs/namespace.c	Wed Oct  9 11:53:48 2002
+++ forcedumount/fs/namespace.c	Sat Oct 12 15:24:11 2002
@@ -298,10 +298,14 @@
 	 * about for the moment.
 	 */
 
-	lock_kernel();
-	if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
-		sb->s_op->umount_begin(sb);
-	unlock_kernel();
+	if (flags & MNT_FORCE) {
+		lock_kernel();
+		if (mnt != current->fs->rootmnt)
+			quiesce_filesystem(mnt);
+		if (sb->s_op->umount_begin)
+			sb->s_op->umount_begin(sb);
+		unlock_kernel();
+	}
 
 	/*
 	 * No sense to grab the lock for this test, but test itself looks
--- 2.4.20-pre10/include/linux/fs.h	Wed Oct  9 11:58:21 2002
+++ forcedumount/include/linux/fs.h	Sat Oct 12 15:24:11 2002
@@ -1479,6 +1479,8 @@
 extern kdev_t ROOT_DEV;
 extern char root_device_name[];
 
+/* fs/badfs/inode.c - used by forced umount */
+extern void quiesce_filesystem(struct vfsmount *);
 
 extern void show_buffers(void);
 


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-12  4:14                             ` Nick LeRoy
@ 2002-10-13 17:27                               ` Rob Landley
  0 siblings, 0 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-13 17:27 UTC (permalink / raw)
  To: Nick LeRoy, Hans Reiser; +Cc: Martin J. Bligh, linux-kernel

On Saturday 12 October 2002 12:14 am, Nick LeRoy wrote:
> On Friday 11 October 2002 03:26 pm, Rob Landley wrote:
> > On Friday 11 October 2002 07:53 pm, Hans Reiser wrote:
>
> <snip>
>
> > A little side project I'm working on now (in my copious free time) is
> > mount point relocation support.  (You can mount the same filesystem a
> > second time in another location (mount --bind makes this easy), and they
> > share a superblock so open files should be happy, but you still can't
> > detach the first mount point.  Not with a hacksaw, or explosives...) 
> > It's more an excuse to learn the new VFS layer than anything else, but
> > it's
> > functionality I would in fact have a use for, strange enough...
>
> Not quite sure that I'm following the _why_ of this one, but maybe I'm just
> slow.

I posted it earlier:

Root filesystem is a loopback mounted zisofs image.  The file to be loopback 
mounted lives in the partition that will become /var.

An initial ramdisk mounts the partition on /initrd/var, calls losetup to 
associate /dev/loop0 with the correct file, and exits to let the boot process 
continue.  The boot process remounts /var in the appropriate place.

/var is now mounted twice.  The initrd can't be released because it's got an 
active mount point under it.  That mount point can't be released because the 
root filesystem is loopback mounted from within it, so it has to stay open.

Logically, the second /var mount should be "mount --move /initrd/var /var", 
followed by "umount /initrd" to free up the initrd memory.  Right now it's 
doing "mount -n --bind /initrd/var /var", because /etc is a symlink into /var 
(has to remain editable, you see), and this way the information about which 
partition var actually is can be kept in one place.  (This is an 
implementation detail: I could have used volume labels instead.)

The point is, right now I can't free the initial ramdisk because it has an 
active mount point under it..

> > I'm also looking for an "unmount --force" option that works on something
> > other than NFS.  Close all active filehandles (the programs using it can
> > just deal with EBADF or whatever), flush the buffers to disk, and
> > unmount. None of this "oh I can't do that, you have a zombie process with
> > an open file...", I want  "guillotine this filesystem pronto, capice?"
> > behavior.
>
> Now _this_ I *like*.  I've wanted this for _a long time_.  Not that I have
> that much spare time, but I'd like to help if I can!

I have no spare time at the moment either (hopefully next week), and I 
started out studying the 2.4 vfs layer which seems a bit different in 2.5 
(can't tell how much yet), but I'll get there...

> > Of course loopback mounts would be kind of upset about this, but to be
> > honest: tough.  The loopback block device gives them an I/O error, and
> > the filesystem should just cope.  Floppies do this all the time with dust
> > and cat hair and stuff...
>
> Yup.  This is required sometimes.  Ever have a CD mounted that the (#$)#
> kernel won't let you umount even though lsof and /proc insist that's
> there's nothing open, but all you can do is an fscking reboot?!!!

Yes.  And some scratched CDs can give REALLY interesting results...

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-12 10:03                             ` Hans Reiser
@ 2002-10-13 17:32                               ` Rob Landley
  2002-10-13 23:51                                 ` Hans Reiser
  2002-10-14  7:10                                 ` Nikita Danilov
  0 siblings, 2 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-13 17:32 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Martin J. Bligh, linux-kernel

On Saturday 12 October 2002 06:03 am, Hans Reiser wrote:
> Rob Landley wrote:
> >I'm also looking for an "unmount --force" option that works on something
> >other than NFS.  Close all active filehandles (the programs using it can
> > just deal with EBADF or whatever), flush the buffers to disk, and
> > unmount.  None of this "oh I can't do that, you have a zombie process
> > with an open file...", I want  "guillotine this filesystem pronto,
> > capice?" behavior.
>
> This sounds useful.  It would be nice if umount prompted you rather than
> refusing.

The problem here is that umount(2) doesn't take a flag.  I'd be happy to have 
it fail unless called with the WITH_EXTREME_PREJUDICE flag or some such, but 
that's an API change.

Of course I haven't gotten that far yet, but eventually this will have to be 
dealt with...

Rob

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-13 17:32                               ` Rob Landley
@ 2002-10-13 23:51                                 ` Hans Reiser
  2002-10-14 16:33                                   ` Rob Landley
  2002-10-14  7:10                                 ` Nikita Danilov
  1 sibling, 1 reply; 206+ messages in thread
From: Hans Reiser @ 2002-10-13 23:51 UTC (permalink / raw)
  To: Rob Landley; +Cc: Martin J. Bligh, linux-kernel

Rob Landley wrote:

>On Saturday 12 October 2002 06:03 am, Hans Reiser wrote:
>  
>
>>Rob Landley wrote:
>>    
>>
>>>I'm also looking for an "unmount --force" option that works on something
>>>other than NFS.  Close all active filehandles (the programs using it can
>>>just deal with EBADF or whatever), flush the buffers to disk, and
>>>unmount.  None of this "oh I can't do that, you have a zombie process
>>>with an open file...", I want  "guillotine this filesystem pronto,
>>>capice?" behavior.
>>>      
>>>
>>This sounds useful.  It would be nice if umount prompted you rather than
>>refusing.
>>    
>>
>
>The problem here is that umount(2) doesn't take a flag.  I'd be happy to have 
>it fail unless called with the WITH_EXTREME_PREJUDICE flag or some such, but 
>that's an API change.
>
>Of course I haven't gotten that far yet, but eventually this will have to be 
>dealt with...
>
>Rob
>
>
>  
>
Call it forcedumount().

What apps need to know about how to call it besides umount anyway?

Not a lot that need a lot of worry.....

Hans



^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-13 17:32                               ` Rob Landley
  2002-10-13 23:51                                 ` Hans Reiser
@ 2002-10-14  7:10                                 ` Nikita Danilov
  2002-10-21 15:36                                   ` [OT] Please don't call it 3.0!! (was Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))) Calin A. Culianu
  1 sibling, 1 reply; 206+ messages in thread
From: Nikita Danilov @ 2002-10-14  7:10 UTC (permalink / raw)
  To: Rob Landley; +Cc: Hans Reiser, Martin J. Bligh, linux-kernel

Rob Landley writes:
 > On Saturday 12 October 2002 06:03 am, Hans Reiser wrote:
 > > Rob Landley wrote:
 > > >I'm also looking for an "unmount --force" option that works on something
 > > >other than NFS.  Close all active filehandles (the programs using it can
 > > > just deal with EBADF or whatever), flush the buffers to disk, and
 > > > unmount.  None of this "oh I can't do that, you have a zombie process
 > > > with an open file...", I want  "guillotine this filesystem pronto,
 > > > capice?" behavior.
 > >
 > > This sounds useful.  It would be nice if umount prompted you rather than
 > > refusing.
 > 
 > The problem here is that umount(2) doesn't take a flag.  I'd be happy to have 
 > it fail unless called with the WITH_EXTREME_PREJUDICE flag or some such, but 
 > that's an API change.
 > 
 > Of course I haven't gotten that far yet, but eventually this will have to be 
 > dealt with...

There were several patches to do this. If I remember correctly Tigran
Aivazian wrote one, for example.

 > 
 > Rob

Nikita.


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))
  2002-10-13 23:51                                 ` Hans Reiser
@ 2002-10-14 16:33                                   ` Rob Landley
  0 siblings, 0 replies; 206+ messages in thread
From: Rob Landley @ 2002-10-14 16:33 UTC (permalink / raw)
  To: Hans Reiser; +Cc: Martin J. Bligh, linux-kernel

On Sunday 13 October 2002 07:51 pm, Hans Reiser wrote:

> Call it forcedumount().
>
> What apps need to know about how to call it besides umount anyway?
>
> Not a lot that need a lot of worry.....

Actually, looking at the umount.c user space app thingy, it turns out there's 
a umount2() glibc call that doesn't have a man page associated with it.  
(Suspected there might be, since the existing -f had to get into the kernel 
some how...)

The new patch Hugh Dickens posted looks interesting, but of course real life 
has decided to intrude for a couple of days, looks like... :)

> Hans

Rob


^ permalink raw reply	[flat|nested] 206+ messages in thread

* [OT] Please don't call it 3.0!! (was Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)))
  2002-10-14  7:10                                 ` Nikita Danilov
@ 2002-10-21 15:36                                   ` Calin A. Culianu
  2002-10-21 16:20                                     ` Wakko Warner
  0 siblings, 1 reply; 206+ messages in thread
From: Calin A. Culianu @ 2002-10-21 15:36 UTC (permalink / raw)
  To: linux-kernel


So what's the verdict?  Are we calling it 3.0 or 2.6?  Who am I to say
this, but I really feel calling it kernel 3.0 is not fully justified.  We
should stick with the 2.x series until major ABI or API changes break the
C library in massive ways, at which point we increment the major version
number.

Although its tempting to appeal to the mainstream by inflating the version
number artificially (what's Redhat up to now? 8.0?? sheesh!!), we have to
respect ourselves as developers.  Why call it 3.0, other than to stroke
our own egos?


^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: [OT] Please don't call it 3.0!! (was Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)))
  2002-10-21 15:36                                   ` [OT] Please don't call it 3.0!! (was Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))) Calin A. Culianu
@ 2002-10-21 16:20                                     ` Wakko Warner
  0 siblings, 0 replies; 206+ messages in thread
From: Wakko Warner @ 2002-10-21 16:20 UTC (permalink / raw)
  To: Calin A. Culianu; +Cc: linux-kernel

> So what's the verdict?  Are we calling it 3.0 or 2.6?  Who am I to say
> this, but I really feel calling it kernel 3.0 is not fully justified.  We
> should stick with the 2.x series until major ABI or API changes break the
> C library in massive ways, at which point we increment the major version
> number.
> 
> Although its tempting to appeal to the mainstream by inflating the version
> number artificially (what's Redhat up to now? 8.0?? sheesh!!), we have to
> respect ourselves as developers.  Why call it 3.0, other than to stroke
> our own egos?

what about when they jumped from 1.3.x to 2.0.x?  I suggested around the pre
2.4 days it be called 3.0 becuase of that jump there.  IIRC it was slackware
that jumped to be versioned up there with redhat.  There've only been 2
major releases.

-- 
 Lab tests show that use of micro$oft causes cancer in lab animals

^ permalink raw reply	[flat|nested] 206+ messages in thread

* Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA))
  2002-10-07 18:31                                 ` Andrew Morton
                                                     ` (2 preceding siblings ...)
  2002-10-07 19:05                                   ` Daniel Phillips
@ 2002-10-30 18:26                                   ` Lee Leahu
  3 siblings, 0 replies; 206+ messages in thread
From: Lee Leahu @ 2002-10-30 18:26 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel

Pardon my ignorance,

How does Readahead relate to journaling filesystems such as ReiserFS, or XFS?

Do they have the same or similar problems that I've been reading about with ext2/3?

Andrew Morton <akpm@digeo.com> scribbled something about Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 -  (NUMA)):

> Daniel Phillips wrote:
> > 
> > On Sunday 06 October 2002 17:19, Martin J. Bligh wrote:
> > > > Then there's the issue of application startup. There's not enough
> > > > read ahead. This is especially sad, as the order of page faults is
> > > > at least partially predictable.
> > >
> > > Is the problem really, fundamentally a lack of readahead in the
> > > kernel? Or is it that your application is huge bloated pig?
> > 
> > Readahead isn't the only problem, but it is a huge problem.  The current
> > readahead model is per-inode, which is very little help with lots of small
> > files, especially if they are fragmented or out of order.  There are various
> > ways to fix this; they are all difficult[1].  Fortunately, we can call this
> > "tuning work" so it can be done during the stable series.
> > 
> > [1] We could teach each filesystem how to read ahead across directories, or
> > we could teach the vfs how to do physical readahead.  Choose your poison.
> 
> Devices do physical readahead, and it works nicely.
> 
> Go into ext2_new_inode, replace the call to find_group_dir with
> find_group_other.  Then untar a kernel tree, unmount the fs,
> remount it and see how long it takes to do a
> 
> 	`find . -type f  xargs cat > /dev/null'
> 
> on that tree.  If your disk is like my disk, you will achieve
> full disk bandwidth.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 


-- 
+----------------------------------+---------------------------------+
| Lee Leahu                        | voice -> 708-444-2690           |
| Internet Technology Specialist   | fax -> 708-444-2697             |
| RICIS, Inc.                      | email -> lee@ricis.com          |
+----------------------------------+---------------------------------+
| I cannot conceive that anybody will require multiplications at the |
| rate of 40,000 or even 4,000 per hour ...                          |
|		-- F. H. Wales (1936)                                |
+--------------------------------------------------------------------+

^ permalink raw reply	[flat|nested] 206+ messages in thread

end of thread, other threads:[~2002-10-30 18:19 UTC | newest]

Thread overview: 206+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-09-24  1:54 [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAID device driver Larry Kessler
2002-09-24  2:22 ` Jeff Garzik
2002-09-26 15:52   ` Alan Cox
2002-09-26 22:55     ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Larry Kessler
2002-09-26 22:58       ` Jeff Garzik
2002-09-26 23:07         ` Linus Torvalds
2002-09-27  2:27           ` Jeff Garzik
2002-09-27  4:45             ` Linus Torvalds
2002-09-28  7:46               ` Ingo Molnar
2002-09-28  9:16                 ` jw schultz
2002-09-30 14:05                   ` Denis Vlasenko
2002-09-30 10:22                     ` Tomas Szepe
2002-09-30 11:10                       ` jw schultz
2002-09-30 11:17                       ` Adrian Bunk
2002-09-30 19:48                       ` Rik van Riel
2002-09-30 20:30                         ` Christoph Hellwig
2002-09-28 15:40                 ` Kernel version [Was: Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver] Horst von Brand
2002-09-29  1:31                 ` v2.6 vs v3.0 Linus Torvalds
2002-09-29  6:14                   ` james
2002-09-29  6:55                     ` Andre Hedrick
2002-09-29 12:59                     ` Gerhard Mack
2002-09-29 13:46                       ` Dr. David Alan Gilbert
2002-09-29 14:06                         ` Wakko Warner
2002-09-29 15:42                         ` Jens Axboe
2002-09-29 16:21                           ` Alan Cox
2002-09-29 16:17                             ` Jens Axboe
2002-09-30  0:39                             ` Jeff Chua
2002-09-29 16:22                           ` Dave Jones
2002-09-29 16:26                             ` Jens Axboe
2002-09-29 21:46                             ` Matthias Andree
2002-09-30  7:05                               ` Michael Clark
2002-09-30  7:22                                 ` Andrew Morton
2002-09-30 13:08                                   ` Kevin Corry
2002-09-30 13:05                                 ` Kevin Corry
2002-09-30 13:49                                   ` Michael Clark
2002-09-30 14:26                                     ` Kevin Corry
2002-09-30 13:59                                   ` Michael Clark
2002-09-30 15:50                                     ` Kevin Corry
2002-09-29 17:06                       ` Jochen Friedrich
2002-09-29 15:18                     ` Trever L. Adams
2002-09-29 15:45                       ` Jens Axboe
2002-09-29 15:59                         ` Trever L. Adams
2002-09-29 16:06                           ` Jens Axboe
2002-09-29 16:13                             ` Trever L. Adams
2002-09-30  6:54                               ` Kai Henningsen
2002-09-30 18:40                                 ` Bill Davidsen
2002-10-01 12:38                                   ` Matthias Andree
2002-10-04 19:58                                     ` Bill Davidsen
2002-09-29 17:42                     ` Linus Torvalds
2002-09-29 17:54                       ` Rik van Riel
2002-09-29 18:24                       ` Alan Cox
2002-09-30  7:56                         ` Jens Axboe
2002-09-30  9:53                           ` Andre Hedrick
2002-09-30 11:54                             ` Jens Axboe
2002-09-30 12:58                           ` Alan Cox
2002-09-30 13:05                             ` Jens Axboe
2002-10-01  2:17                               ` Andre Hedrick
2002-09-30 16:39                       ` jbradford
2002-09-30 16:47                     ` Pau Aliagas
2002-09-29  7:16                   ` jbradford
2002-09-29  8:08                     ` Jeff Garzik
2002-09-29  8:17                     ` David S. Miller
2002-09-29  9:12                     ` Jens Axboe
2002-09-29 11:19                       ` Murray J. Root
2002-09-29 15:50                         ` Jens Axboe
2002-09-30  7:01                           ` Kai Henningsen
2002-09-29 16:04                         ` Zwane Mwaikambo
2002-09-29 14:56                       ` Alan Cox
2002-09-29 15:38                         ` Jens Axboe
2002-09-29 16:30                           ` Dave Jones
2002-09-29 16:42                           ` Bjoern A. Zeeb
2002-09-29 21:16                           ` Russell King
2002-09-29 21:32                             ` Alan Cox
2002-09-29 21:49                             ` steve
2002-09-29 21:52                           ` Matthias Andree
2002-09-30  7:31                             ` Tomas Szepe
2002-09-30 15:33                           ` Jan Harkes
2002-09-30 18:13                           ` Jeff Willis
2002-09-29 17:48                         ` Linus Torvalds
2002-09-29 18:13                           ` Jaroslav Kysela
2002-09-30 19:32                       ` Bill Davidsen
2002-10-01  6:26                         ` Jens Axboe
2002-10-01  7:54                           ` Mikael Pettersson
2002-10-01  8:27                             ` Jens Axboe
2002-10-01  8:44                               ` jbradford
2002-10-01 11:31                             ` Alan Cox
2002-10-01 11:25                               ` Jens Axboe
2002-09-29 15:34                     ` Andi Kleen
2002-09-29 17:26                       ` Jochen Friedrich
2002-09-29 17:35                         ` Jeff Garzik
2002-09-30  0:00                         ` Andi Kleen
2002-10-01 19:28                         ` IPv6 stability (success story ;) Petr Baudis
2002-09-29  9:15                   ` v2.6 vs v3.0 Jens Axboe
2002-09-29 19:53                     ` james
2002-09-29 15:26                   ` Matthias Andree
2002-09-29 16:24                     ` Alan Cox
2002-09-29 22:00                       ` Matthias Andree
2002-09-30 19:02                       ` Bill Davidsen
2002-09-30 18:37                   ` Bill Davidsen
2002-10-03 15:51               ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) jbradford
2002-10-03 15:57                 ` Linus Torvalds
2002-10-03 16:16                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem jbradford
2002-10-03 22:30                     ` Greg KH
2002-10-04  6:33                       ` jbradford
2002-10-04  6:37                         ` Greg KH
2002-10-04  7:17                           ` jbradford
2002-10-04  7:30                             ` Greg KH
2002-10-03 16:37                   ` [OT] 2.6 not 3.0 - (WAS Re: [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice) Alan Cox
2002-10-03 16:56                     ` Linus Torvalds
2002-10-03 17:40                       ` Alan Cox
2002-10-03 19:55                       ` jlnance
2002-10-03 16:51                   ` Dave Jones
2002-10-03 17:04                     ` Alan Cox
2002-10-03 20:43                     ` Andrew Morton
2002-10-03 22:05                       ` Dave Jones
2002-10-04  3:46                         ` Andreas Boman
2002-10-04  7:44                         ` jbradford
2002-10-03 19:51                   ` Rik van Riel
2002-10-04 22:26                   ` [OT] 2.6 not 3.0 - (NUMA) Martin J. Bligh
2002-10-04 23:13                     ` Linus Torvalds
2002-10-05  0:21                       ` Martin J. Bligh
2002-10-05  0:36                         ` Linus Torvalds
2002-10-05  1:25                           ` Michael Hohnbaum
2002-10-05 20:30                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Rob Landley
2002-10-06  2:15                         ` Andrew Morton
2002-10-06  9:42                           ` Russell King
2002-10-06 17:06                             ` Alan Cox
2002-10-06 13:44                           ` Oliver Neukum
2002-10-06 15:19                             ` Martin J. Bligh
2002-10-06 15:14                               ` Oliver Neukum
2002-10-07  8:08                               ` Helge Hafting
2002-10-07  9:18                                 ` Oliver Neukum
2002-10-07 14:11                                   ` Jan Hudec
2002-10-07 15:01                                     ` Jesse Pollard
2002-10-07 15:34                                       ` Jan Hudec
2002-10-08  3:12                                         ` [OT] " Scott Mcdermott
2002-10-10 23:49                                           ` Mike Fedyk
2002-10-07 15:15                                   ` Martin J. Bligh
2002-10-08 13:49                                   ` Helge Hafting
2002-10-07 17:43                               ` Daniel Phillips
2002-10-07 18:31                                 ` Andrew Morton
2002-10-07 18:51                                   ` Linus Torvalds
2002-10-07 20:14                                     ` Alan Cox
2002-10-07 20:31                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
2002-10-07 20:46                                         ` Linus Torvalds
2002-10-07 20:44                                       ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Linus Torvalds
2002-10-07 21:16                                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not3.0 " Andrew Morton
2002-10-07 23:47                                           ` jw schultz
2002-10-11  0:02                                           ` Mike Fedyk
2002-10-07 18:58                                   ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 " Chris Friesen
2002-10-07 19:21                                     ` Daniel Phillips
2002-10-07 19:35                                       ` Linus Torvalds
2002-10-08  0:39                                         ` Theodore Ts'o
2002-10-08  2:59                                           ` Andrew Morton
2002-10-08 16:15                                             ` Theodore Ts'o
2002-10-08 19:39                                               ` Andrew Morton
2002-10-08 17:06                                                 ` Rob Landley
2002-10-07 19:36                                     ` Andrew Morton
2002-10-08  2:36                                       ` Simon Kirby
2002-10-08  2:47                                         ` Daniel Phillips
2002-10-08  2:50                                         ` Andrew Morton
2002-10-08  2:54                                           ` Simon Kirby
2002-10-08  3:00                                             ` Andrew Morton
2002-10-08 16:17                                               ` Theodore Ts'o
2002-10-08 12:49                                           ` jlnance
2002-10-08 17:09                                             ` Andrew Morton
2002-10-10 20:53                                               ` Thomas Zimmerman
2002-10-08 13:54                                       ` Helge Hafting
2002-10-08 15:31                                         ` Andreas Dilger
2002-10-07 19:05                                   ` Daniel Phillips
2002-10-07 19:24                                     ` Linus Torvalds
2002-10-07 20:02                                       ` Daniel Phillips
2002-10-07 20:14                                         ` Andrew Morton
2002-10-07 20:22                                           ` Daniel Phillips
2002-10-07 20:28                                         ` Linus Torvalds
2002-10-07 21:16                                           ` Daniel Phillips
2002-10-07 21:55                                             ` Linus Torvalds
2002-10-07 22:02                                               ` Daniel Phillips
2002-10-07 22:12                                                 ` Andrew Morton
2002-10-08  8:49                                                   ` Padraig Brady
2002-10-07 22:14                                             ` Charles Cazabon
2002-10-30 18:26                                   ` Lee Leahu
2002-10-06  6:33                         ` Martin J. Bligh
2002-10-07  5:28                         ` John Alvord
2002-10-07  8:39                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 n Giuliano Pochini
2002-10-07 13:56                         ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Jesse Pollard
2002-10-07 14:03                           ` Rob Landley
2002-10-08 22:14                             ` Jesse Pollard
2002-10-08 19:11                               ` Rob Landley
2002-10-09  8:17                             ` Alexander Kellett
2002-10-07 18:22                           ` Daniel Phillips
2002-10-08  8:19                             ` Jan Hudec
2002-10-11 23:53                         ` Hans Reiser
2002-10-11 20:26                           ` Rob Landley
2002-10-12  4:14                             ` Nick LeRoy
2002-10-13 17:27                               ` Rob Landley
2002-10-12 10:03                             ` Hans Reiser
2002-10-13 17:32                               ` Rob Landley
2002-10-13 23:51                                 ` Hans Reiser
2002-10-14 16:33                                   ` Rob Landley
2002-10-14  7:10                                 ` Nikita Danilov
2002-10-21 15:36                                   ` [OT] Please don't call it 3.0!! (was Re: The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA))) Calin A. Culianu
2002-10-21 16:20                                     ` Wakko Warner
2002-10-12 11:42                             ` The reason to call it 3.0 is the desktop (was Re: [OT] 2.6 not 3.0 - (NUMA)) Matthias Andree
2002-10-12 14:56                               ` Hugh Dickins
2002-09-27 11:32       ` [PATCH-RFC] 4 of 4 - New problem logging macros, SCSI RAIDdevice driver Alan Cox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).