source: src/linux/universal/linux-3.18/fs/xfs/libxfs/xfs_inode_buf.c @ 31885

Last change on this file since 31885 was 31885, checked in by brainslayer, 5 weeks ago

update

File size: 14.7 KB
Line 
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_shared.h"
21#include "xfs_format.h"
22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_mount.h"
27#include "xfs_inode.h"
28#include "xfs_error.h"
29#include "xfs_cksum.h"
30#include "xfs_icache.h"
31#include "xfs_trans.h"
32#include "xfs_ialloc.h"
33#include "xfs_dinode.h"
34
35/*
36 * Check that none of the inode's in the buffer have a next
37 * unlinked field of 0.
38 */
39#if defined(DEBUG)
40void
41xfs_inobp_check(
42        xfs_mount_t     *mp,
43        xfs_buf_t       *bp)
44{
45        int             i;
46        int             j;
47        xfs_dinode_t    *dip;
48
49        j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
50
51        for (i = 0; i < j; i++) {
52                dip = (xfs_dinode_t *)xfs_buf_offset(bp,
53                                        i * mp->m_sb.sb_inodesize);
54                if (!dip->di_next_unlinked)  {
55                        xfs_alert(mp,
56        "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
57                                i, (long long)bp->b_bn);
58                }
59        }
60}
61#endif
62
63/*
64 * If we are doing readahead on an inode buffer, we might be in log recovery
65 * reading an inode allocation buffer that hasn't yet been replayed, and hence
66 * has not had the inode cores stamped into it. Hence for readahead, the buffer
67 * may be potentially invalid.
68 *
69 * If the readahead buffer is invalid, we need to mark it with an error and
70 * clear the DONE status of the buffer so that a followup read will re-read it
71 * from disk. We don't report the error otherwise to avoid warnings during log
72 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
73 * because all we want to do is say readahead failed; there is no-one to report
74 * the error to, so this will distinguish it from a non-ra verifier failure.
75 * Changes to this readahead error behavour also need to be reflected in
76 * xfs_dquot_buf_readahead_verify().
77 */
78static void
79xfs_inode_buf_verify(
80        struct xfs_buf  *bp,
81        bool            readahead)
82{
83        struct xfs_mount *mp = bp->b_target->bt_mount;
84        int             i;
85        int             ni;
86
87        /*
88         * Validate the magic number and version of every inode in the buffer
89         */
90        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
91        for (i = 0; i < ni; i++) {
92                int             di_ok;
93                xfs_dinode_t    *dip;
94
95                dip = (struct xfs_dinode *)xfs_buf_offset(bp,
96                                        (i << mp->m_sb.sb_inodelog));
97                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
98                            XFS_DINODE_GOOD_VERSION(dip->di_version);
99                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
100                                                XFS_ERRTAG_ITOBP_INOTOBP,
101                                                XFS_RANDOM_ITOBP_INOTOBP))) {
102                        if (readahead) {
103                                bp->b_flags &= ~XBF_DONE;
104                                xfs_buf_ioerror(bp, -EIO);
105                                return;
106                        }
107
108                        xfs_buf_ioerror(bp, -EFSCORRUPTED);
109                        xfs_verifier_error(bp);
110#ifdef DEBUG
111                        xfs_alert(mp,
112                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
113                                (unsigned long long)bp->b_bn, i,
114                                be16_to_cpu(dip->di_magic));
115#endif
116                }
117        }
118        xfs_inobp_check(mp, bp);
119}
120
121
122static void
123xfs_inode_buf_read_verify(
124        struct xfs_buf  *bp)
125{
126        xfs_inode_buf_verify(bp, false);
127}
128
129static void
130xfs_inode_buf_readahead_verify(
131        struct xfs_buf  *bp)
132{
133        xfs_inode_buf_verify(bp, true);
134}
135
136static void
137xfs_inode_buf_write_verify(
138        struct xfs_buf  *bp)
139{
140        xfs_inode_buf_verify(bp, false);
141}
142
143const struct xfs_buf_ops xfs_inode_buf_ops = {
144        .name = "xfs_inode",
145        .verify_read = xfs_inode_buf_read_verify,
146        .verify_write = xfs_inode_buf_write_verify,
147};
148
149const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
150        .name = "xxfs_inode_ra",
151        .verify_read = xfs_inode_buf_readahead_verify,
152        .verify_write = xfs_inode_buf_write_verify,
153};
154
155
156/*
157 * This routine is called to map an inode to the buffer containing the on-disk
158 * version of the inode.  It returns a pointer to the buffer containing the
159 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
160 * pointer to the on-disk inode within that buffer.
161 *
162 * If a non-zero error is returned, then the contents of bpp and dipp are
163 * undefined.
164 */
165int
166xfs_imap_to_bp(
167        struct xfs_mount        *mp,
168        struct xfs_trans        *tp,
169        struct xfs_imap         *imap,
170        struct xfs_dinode       **dipp,
171        struct xfs_buf          **bpp,
172        uint                    buf_flags,
173        uint                    iget_flags)
174{
175        struct xfs_buf          *bp;
176        int                     error;
177
178        buf_flags |= XBF_UNMAPPED;
179        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
180                                   (int)imap->im_len, buf_flags, &bp,
181                                   &xfs_inode_buf_ops);
182        if (error) {
183                if (error == -EAGAIN) {
184                        ASSERT(buf_flags & XBF_TRYLOCK);
185                        return error;
186                }
187
188                if (error == -EFSCORRUPTED &&
189                    (iget_flags & XFS_IGET_UNTRUSTED))
190                        return -EINVAL;
191
192                xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
193                        __func__, error);
194                return error;
195        }
196
197        *bpp = bp;
198        *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
199        return 0;
200}
201
202void
203xfs_dinode_from_disk(
204        xfs_icdinode_t          *to,
205        xfs_dinode_t            *from)
206{
207        to->di_magic = be16_to_cpu(from->di_magic);
208        to->di_mode = be16_to_cpu(from->di_mode);
209        to->di_version = from ->di_version;
210        to->di_format = from->di_format;
211        to->di_onlink = be16_to_cpu(from->di_onlink);
212        to->di_uid = be32_to_cpu(from->di_uid);
213        to->di_gid = be32_to_cpu(from->di_gid);
214        to->di_nlink = be32_to_cpu(from->di_nlink);
215        to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
216        to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
217        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
218        to->di_flushiter = be16_to_cpu(from->di_flushiter);
219        to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
220        to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
221        to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
222        to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
223        to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
224        to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
225        to->di_size = be64_to_cpu(from->di_size);
226        to->di_nblocks = be64_to_cpu(from->di_nblocks);
227        to->di_extsize = be32_to_cpu(from->di_extsize);
228        to->di_nextents = be32_to_cpu(from->di_nextents);
229        to->di_anextents = be16_to_cpu(from->di_anextents);
230        to->di_forkoff = from->di_forkoff;
231        to->di_aformat  = from->di_aformat;
232        to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
233        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
234        to->di_flags    = be16_to_cpu(from->di_flags);
235        to->di_gen      = be32_to_cpu(from->di_gen);
236
237        if (to->di_version == 3) {
238                to->di_changecount = be64_to_cpu(from->di_changecount);
239                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
240                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
241                to->di_flags2 = be64_to_cpu(from->di_flags2);
242                to->di_ino = be64_to_cpu(from->di_ino);
243                to->di_lsn = be64_to_cpu(from->di_lsn);
244                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
245                uuid_copy(&to->di_uuid, &from->di_uuid);
246        }
247}
248
249void
250xfs_dinode_to_disk(
251        xfs_dinode_t            *to,
252        xfs_icdinode_t          *from)
253{
254        to->di_magic = cpu_to_be16(from->di_magic);
255        to->di_mode = cpu_to_be16(from->di_mode);
256        to->di_version = from ->di_version;
257        to->di_format = from->di_format;
258        to->di_onlink = cpu_to_be16(from->di_onlink);
259        to->di_uid = cpu_to_be32(from->di_uid);
260        to->di_gid = cpu_to_be32(from->di_gid);
261        to->di_nlink = cpu_to_be32(from->di_nlink);
262        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
263        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
264        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
265        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
266        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
267        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
268        to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
269        to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
270        to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
271        to->di_size = cpu_to_be64(from->di_size);
272        to->di_nblocks = cpu_to_be64(from->di_nblocks);
273        to->di_extsize = cpu_to_be32(from->di_extsize);
274        to->di_nextents = cpu_to_be32(from->di_nextents);
275        to->di_anextents = cpu_to_be16(from->di_anextents);
276        to->di_forkoff = from->di_forkoff;
277        to->di_aformat = from->di_aformat;
278        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
279        to->di_dmstate = cpu_to_be16(from->di_dmstate);
280        to->di_flags = cpu_to_be16(from->di_flags);
281        to->di_gen = cpu_to_be32(from->di_gen);
282
283        if (from->di_version == 3) {
284                to->di_changecount = cpu_to_be64(from->di_changecount);
285                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
286                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
287                to->di_flags2 = cpu_to_be64(from->di_flags2);
288                to->di_ino = cpu_to_be64(from->di_ino);
289                to->di_lsn = cpu_to_be64(from->di_lsn);
290                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
291                uuid_copy(&to->di_uuid, &from->di_uuid);
292                to->di_flushiter = 0;
293        } else {
294                to->di_flushiter = cpu_to_be16(from->di_flushiter);
295        }
296}
297
298static bool
299xfs_dinode_verify(
300        struct xfs_mount        *mp,
301        struct xfs_inode        *ip,
302        struct xfs_dinode       *dip)
303{
304        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
305                return false;
306
307        /* don't allow invalid i_size */
308        if (be64_to_cpu(dip->di_size) & (1ULL << 63))
309                return false;
310
311        /* No zero-length symlinks. */
312        if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
313                return false;
314
315        /* only version 3 or greater inodes are extensively verified here */
316        if (dip->di_version < 3)
317                return true;
318
319        if (!xfs_sb_version_hascrc(&mp->m_sb))
320                return false;
321        if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
322                              XFS_DINODE_CRC_OFF))
323                return false;
324        if (be64_to_cpu(dip->di_ino) != ip->i_ino)
325                return false;
326        if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
327                return false;
328        return true;
329}
330
331void
332xfs_dinode_calc_crc(
333        struct xfs_mount        *mp,
334        struct xfs_dinode       *dip)
335{
336        __uint32_t              crc;
337
338        if (dip->di_version < 3)
339                return;
340
341        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
342        crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
343                              XFS_DINODE_CRC_OFF);
344        dip->di_crc = xfs_end_cksum(crc);
345}
346
347/*
348 * Read the disk inode attributes into the in-core inode structure.
349 *
350 * For version 5 superblocks, if we are initialising a new inode and we are not
351 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
352 * inode core with a random generation number. If we are keeping inodes around,
353 * we need to read the inode cluster to get the existing generation number off
354 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
355 * format) then log recovery is dependent on the di_flushiter field being
356 * initialised from the current on-disk value and hence we must also read the
357 * inode off disk.
358 */
359int
360xfs_iread(
361        xfs_mount_t     *mp,
362        xfs_trans_t     *tp,
363        xfs_inode_t     *ip,
364        uint            iget_flags)
365{
366        xfs_buf_t       *bp;
367        xfs_dinode_t    *dip;
368        int             error;
369
370        /*
371         * Fill in the location information in the in-core inode.
372         */
373        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
374        if (error)
375                return error;
376
377        /* shortcut IO on inode allocation if possible */
378        if ((iget_flags & XFS_IGET_CREATE) &&
379            xfs_sb_version_hascrc(&mp->m_sb) &&
380            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
381                /* initialise the on-disk inode core */
382                memset(&ip->i_d, 0, sizeof(ip->i_d));
383                ip->i_d.di_magic = XFS_DINODE_MAGIC;
384                ip->i_d.di_gen = prandom_u32();
385                if (xfs_sb_version_hascrc(&mp->m_sb)) {
386                        ip->i_d.di_version = 3;
387                        ip->i_d.di_ino = ip->i_ino;
388                        uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
389                } else
390                        ip->i_d.di_version = 2;
391                return 0;
392        }
393
394        /*
395         * Get pointers to the on-disk inode and the buffer containing it.
396         */
397        error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
398        if (error)
399                return error;
400
401        /* even unallocated inodes are verified */
402        if (!xfs_dinode_verify(mp, ip, dip)) {
403                xfs_alert(mp, "%s: validation failed for inode %lld failed",
404                                __func__, ip->i_ino);
405
406                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
407                error = -EFSCORRUPTED;
408                goto out_brelse;
409        }
410
411        /*
412         * If the on-disk inode is already linked to a directory
413         * entry, copy all of the inode into the in-core inode.
414         * xfs_iformat_fork() handles copying in the inode format
415         * specific information.
416         * Otherwise, just get the truly permanent information.
417         */
418        if (dip->di_mode) {
419                xfs_dinode_from_disk(&ip->i_d, dip);
420                error = xfs_iformat_fork(ip, dip);
421                if (error)  {
422#ifdef DEBUG
423                        xfs_alert(mp, "%s: xfs_iformat() returned error %d",
424                                __func__, error);
425#endif /* DEBUG */
426                        goto out_brelse;
427                }
428        } else {
429                /*
430                 * Partial initialisation of the in-core inode. Just the bits
431                 * that xfs_ialloc won't overwrite or relies on being correct.
432                 */
433                ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
434                ip->i_d.di_version = dip->di_version;
435                ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
436                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
437
438                if (dip->di_version == 3) {
439                        ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
440                        uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
441                }
442
443                /*
444                 * Make sure to pull in the mode here as well in
445                 * case the inode is released without being used.
446                 * This ensures that xfs_inactive() will see that
447                 * the inode is already free and not try to mess
448                 * with the uninitialized part of it.
449                 */
450                ip->i_d.di_mode = 0;
451        }
452
453        /*
454         * Automatically convert version 1 inode formats in memory to version 2
455         * inode format. If the inode is modified, it will get logged and
456         * rewritten as a version 2 inode. We can do this because we set the
457         * superblock feature bit for v2 inodes unconditionally during mount
458         * and it means the reast of the code can assume the inode version is 2
459         * or higher.
460         */
461        if (ip->i_d.di_version == 1) {
462                ip->i_d.di_version = 2;
463                memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
464                ip->i_d.di_nlink = ip->i_d.di_onlink;
465                ip->i_d.di_onlink = 0;
466                xfs_set_projid(ip, 0);
467        }
468
469        ip->i_delayed_blks = 0;
470
471        /*
472         * Mark the buffer containing the inode as something to keep
473         * around for a while.  This helps to keep recently accessed
474         * meta-data in-core longer.
475         */
476        xfs_buf_set_ref(bp, XFS_INO_REF);
477
478        /*
479         * Use xfs_trans_brelse() to release the buffer containing the on-disk
480         * inode, because it was acquired with xfs_trans_read_buf() in
481         * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
482         * brelse().  If we're within a transaction, then xfs_trans_brelse()
483         * will only release the buffer if it is not dirty within the
484         * transaction.  It will be OK to release the buffer in this case,
485         * because inodes on disk are never destroyed and we will be locking the
486         * new in-core inode before putting it in the cache where other
487         * processes can find it.  Thus we don't have to worry about the inode
488         * being changed just because we released the buffer.
489         */
490 out_brelse:
491        xfs_trans_brelse(tp, bp);
492        return error;
493}
Note: See TracBrowser for help on using the repository browser.