ext4, jbd2: ensure panic by fix a race between jbd2 abort and ext4 error handlers

author zhangyi (F) <yi.zhang@huawei.com>

Tue, 9 Jun 2020 07:35:40 +0000 (15:35 +0800)

committer Theodore Ts'o <tytso@mit.edu>

Fri, 12 Jun 2020 18:51:41 +0000 (14:51 -0400)
author zhangyi (F) <yi.zhang@huawei.com>
Tue, 9 Jun 2020 07:35:40 +0000 (15:35 +0800)
committer Theodore Ts'o <tytso@mit.edu>
Fri, 12 Jun 2020 18:51:41 +0000 (14:51 -0400)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 127b8ef..2660a7e 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb)
                 smp_wmb();
                 sb->s_flags |= SB_RDONLY;
         } else if (test_opt(sb, ERRORS_PANIC)) {
-               if (EXT4_SB(sb)->s_journal &&
-                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
-                       return;
                 panic("EXT4-fs (device %s): panic forced after error\n",
                         sb->s_id);
         }
@@ -725,23 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function,
         va_end(args);
  
         if (sb_rdonly(sb) == 0) {
-               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                 EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               if (EXT4_SB(sb)->s_journal)
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+
+               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                 /*
                  * Make sure updated value of ->s_mount_flags will be visible
                  * before ->s_flags update
                  */
                 smp_wmb();
                 sb->s_flags |= SB_RDONLY;
-               if (EXT4_SB(sb)->s_journal)
-                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
         }
-       if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
-               if (EXT4_SB(sb)->s_journal &&
-                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
-                       return;
+       if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
                 panic("EXT4-fs panic from previous error\n");
-       }
  }
  
  void __ext4_msg(struct super_block *sb,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index a49d0e6..e494443 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
         init_waitqueue_head(&journal->j_wait_commit);
         init_waitqueue_head(&journal->j_wait_updates);
         init_waitqueue_head(&journal->j_wait_reserved);
+       mutex_init(&journal->j_abort_mutex);
         mutex_init(&journal->j_barrier);
         mutex_init(&journal->j_checkpoint_mutex);
         spin_lock_init(&journal->j_revoke_lock);
@@ -1402,7 +1403,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
                 printk(KERN_ERR "JBD2: Error %d detected when updating "
                        "journal superblock for %s.\n", ret,
                        journal->j_devname);
-               jbd2_journal_abort(journal, ret);
+               if (!is_journal_aborted(journal))
+                       jbd2_journal_abort(journal, ret);
         }
  
         return ret;
@@ -2153,6 +2155,13 @@ void jbd2_journal_abort(journal_t *journal, int errno)
  {
         transaction_t *transaction;
  
+       /*
+        * Lock the aborting procedure until everything is done, this avoid
+        * races between filesystem's error handling flow (e.g. ext4_abort()),
+        * ensure panic after the error info is written into journal's
+        * superblock.
+        */
+       mutex_lock(&journal->j_abort_mutex);
         /*
          * ESHUTDOWN always takes precedence because a file system check
          * caused by any other journal abort error is not required after
@@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
                         journal->j_errno = errno;
                         jbd2_journal_update_sb_errno(journal);
                 }
+               mutex_unlock(&journal->j_abort_mutex);
                 return;
         }
  
@@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
          * layer could realise that a filesystem check is needed.
          */
         jbd2_journal_update_sb_errno(journal);
-
-       write_lock(&journal->j_state_lock);
-       journal->j_flags |= JBD2_REC_ERR;
-       write_unlock(&journal->j_state_lock);
+       mutex_unlock(&journal->j_abort_mutex);
  }
  
  /**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index f613d85..d56128d 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -765,6 +765,11 @@ struct journal_s
          */
         int                     j_errno;
  
+       /**
+        * @j_abort_mutex: Lock the whole aborting procedure.
+        */
+       struct mutex            j_abort_mutex;
+
         /**
          * @j_sb_buffer: The first part of the superblock buffer.
          */
@@ -1247,7 +1252,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,                CSUM_V3)
  #define JBD2_ABORT_ON_SYNCDATA_ERR     0x040   /* Abort the journal on file
                                                  * data write error in ordered
                                                  * mode */
-#define JBD2_REC_ERR   0x080   /* The errno in the sb has been recorded */
  
  /*
   * Function declarations for the journaling transaction and buffer
author	zhangyi (F) <yi.zhang@huawei.com>
	Tue, 9 Jun 2020 07:35:40 +0000 (15:35 +0800)
committer	Theodore Ts'o <tytso@mit.edu>
	Fri, 12 Jun 2020 18:51:41 +0000 (14:51 -0400)
fs/ext4/super.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history