1 |
/* $MidnightBSD$ */ |
2 |
/* |
3 |
* Copyright (c) 2000-2005 Silicon Graphics, Inc. |
4 |
* All Rights Reserved. |
5 |
* |
6 |
* This program is free software; you can redistribute it and/or |
7 |
* modify it under the terms of the GNU General Public License as |
8 |
* published by the Free Software Foundation. |
9 |
* |
10 |
* This program is distributed in the hope that it would be useful, |
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 |
* GNU General Public License for more details. |
14 |
* |
15 |
* You should have received a copy of the GNU General Public License |
16 |
* along with this program; if not, write the Free Software Foundation, |
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
18 |
*/ |
19 |
#include "xfs.h" |
20 |
#include "xfs_fs.h" |
21 |
#include "xfs_types.h" |
22 |
#include "xfs_bit.h" |
23 |
#include "xfs_log.h" |
24 |
#include "xfs_inum.h" |
25 |
#include "xfs_trans.h" |
26 |
#include "xfs_sb.h" |
27 |
#include "xfs_ag.h" |
28 |
#include "xfs_dir.h" |
29 |
#include "xfs_dir2.h" |
30 |
#include "xfs_dmapi.h" |
31 |
#include "xfs_mount.h" |
32 |
#include "xfs_da_btree.h" |
33 |
#include "xfs_bmap_btree.h" |
34 |
#include "xfs_ialloc_btree.h" |
35 |
#include "xfs_alloc_btree.h" |
36 |
#include "xfs_dir_sf.h" |
37 |
#include "xfs_dir2_sf.h" |
38 |
#include "xfs_attr_sf.h" |
39 |
#include "xfs_dinode.h" |
40 |
#include "xfs_inode.h" |
41 |
#include "xfs_inode_item.h" |
42 |
#include "xfs_btree.h" |
43 |
#include "xfs_alloc.h" |
44 |
#include "xfs_ialloc.h" |
45 |
#include "xfs_quota.h" |
46 |
#include "xfs_error.h" |
47 |
#include "xfs_bmap.h" |
48 |
#include "xfs_rw.h" |
49 |
#include "xfs_refcache.h" |
50 |
#include "xfs_buf_item.h" |
51 |
#include "xfs_log_priv.h" |
52 |
#include "xfs_dir2_trace.h" |
53 |
#include "xfs_extfree_item.h" |
54 |
#include "xfs_acl.h" |
55 |
#include "xfs_attr.h" |
56 |
#include "xfs_clnt.h" |
57 |
#include "xfs_fsops.h" |
58 |
#include "xfs_vnode.h" |
59 |
|
60 |
STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); |
61 |
|
62 |
extern kmem_zone_t *xfs_bmap_free_item_zone; |
63 |
extern kmem_zone_t *xfs_btree_cur_zone; |
64 |
extern kmem_zone_t *xfs_trans_zone; |
65 |
extern kmem_zone_t *xfs_dabuf_zone; |
66 |
extern kmem_zone_t *xfs_buf_item_zone; |
67 |
|
68 |
#ifdef XFS_DABUF_DEBUG |
69 |
extern lock_t xfs_dabuf_global_lock; |
70 |
#endif |
71 |
|
72 |
int |
73 |
xfs_init(void) |
74 |
{ |
75 |
#if 0 |
76 |
extern kmem_zone_t *xfs_bmap_free_item_zone; |
77 |
extern kmem_zone_t *xfs_btree_cur_zone; |
78 |
extern kmem_zone_t *xfs_trans_zone; |
79 |
extern kmem_zone_t *xfs_buf_item_zone; |
80 |
extern kmem_zone_t *xfs_dabuf_zone; |
81 |
#endif |
82 |
#ifdef XFS_DABUF_DEBUG |
83 |
spinlock_init(&xfs_dabuf_global_lock, "xfsda"); |
84 |
#endif |
85 |
/* |
86 |
* Initialize all of the zone allocators we use. |
87 |
*/ |
88 |
xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), |
89 |
"xfs_bmap_free_item"); |
90 |
xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), |
91 |
"xfs_btree_cur"); |
92 |
xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); |
93 |
xfs_da_state_zone = |
94 |
kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state"); |
95 |
xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); |
96 |
xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); |
97 |
xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); |
98 |
|
99 |
/* |
100 |
* The size of the zone allocated buf log item is the maximum |
101 |
* size possible under XFS. This wastes a little bit of memory, |
102 |
* but it is much faster. |
103 |
*/ |
104 |
xfs_buf_item_zone = |
105 |
kmem_zone_init((sizeof(xfs_buf_log_item_t) + |
106 |
(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / |
107 |
NBWORD) * sizeof(int))), |
108 |
"xfs_buf_item"); |
109 |
xfs_efd_zone = |
110 |
kmem_zone_init((sizeof(xfs_efd_log_item_t) + |
111 |
((XFS_EFD_MAX_FAST_EXTENTS - 1) * |
112 |
sizeof(xfs_extent_t))), |
113 |
"xfs_efd_item"); |
114 |
xfs_efi_zone = |
115 |
kmem_zone_init((sizeof(xfs_efi_log_item_t) + |
116 |
((XFS_EFI_MAX_FAST_EXTENTS - 1) * |
117 |
sizeof(xfs_extent_t))), |
118 |
"xfs_efi_item"); |
119 |
|
120 |
/* |
121 |
* These zones warrant special memory allocator hints |
122 |
*/ |
123 |
xfs_inode_zone = |
124 |
kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", |
125 |
KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | |
126 |
KM_ZONE_SPREAD, NULL); |
127 |
xfs_ili_zone = |
128 |
kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", |
129 |
KM_ZONE_SPREAD, NULL); |
130 |
xfs_chashlist_zone = |
131 |
kmem_zone_init_flags(sizeof(xfs_chashlist_t), "xfs_chashlist", |
132 |
KM_ZONE_SPREAD, NULL); |
133 |
|
134 |
/* |
135 |
* Allocate global trace buffers. |
136 |
*/ |
137 |
#ifdef XFS_ALLOC_TRACE |
138 |
xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP); |
139 |
#endif |
140 |
#ifdef XFS_BMAP_TRACE |
141 |
xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP); |
142 |
#endif |
143 |
#ifdef XFS_BMBT_TRACE |
144 |
xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP); |
145 |
#endif |
146 |
#ifdef XFS_DIR_TRACE |
147 |
xfs_dir_trace_buf = ktrace_alloc(XFS_DIR_TRACE_SIZE, KM_SLEEP); |
148 |
#endif |
149 |
#ifdef XFS_ATTR_TRACE |
150 |
xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP); |
151 |
#endif |
152 |
#ifdef XFS_DIR2_TRACE |
153 |
xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP); |
154 |
#endif |
155 |
|
156 |
xfs_dir_startup(); |
157 |
|
158 |
#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) |
159 |
xfs_error_test_init(); |
160 |
#endif /* DEBUG || INDUCE_IO_ERROR */ |
161 |
|
162 |
xfs_refcache_init(); |
163 |
xfs_init_procfs(); |
164 |
xfs_sysctl_register(); |
165 |
return 0; |
166 |
} |
167 |
|
168 |
void |
169 |
xfs_cleanup(void) |
170 |
{ |
171 |
#if 0 |
172 |
extern kmem_zone_t *xfs_bmap_free_item_zone; |
173 |
extern kmem_zone_t *xfs_btree_cur_zone; |
174 |
extern kmem_zone_t *xfs_inode_zone; |
175 |
extern kmem_zone_t *xfs_trans_zone; |
176 |
extern kmem_zone_t *xfs_da_state_zone; |
177 |
extern kmem_zone_t *xfs_dabuf_zone; |
178 |
extern kmem_zone_t *xfs_efd_zone; |
179 |
extern kmem_zone_t *xfs_efi_zone; |
180 |
extern kmem_zone_t *xfs_buf_item_zone; |
181 |
extern kmem_zone_t *xfs_chashlist_zone; |
182 |
#endif |
183 |
|
184 |
xfs_cleanup_procfs(); |
185 |
xfs_sysctl_unregister(); |
186 |
xfs_refcache_destroy(); |
187 |
xfs_acl_zone_destroy(xfs_acl_zone); |
188 |
|
189 |
#ifdef XFS_DIR2_TRACE |
190 |
ktrace_free(xfs_dir2_trace_buf); |
191 |
#endif |
192 |
#ifdef XFS_ATTR_TRACE |
193 |
ktrace_free(xfs_attr_trace_buf); |
194 |
#endif |
195 |
#ifdef XFS_DIR_TRACE |
196 |
ktrace_free(xfs_dir_trace_buf); |
197 |
#endif |
198 |
#ifdef XFS_BMBT_TRACE |
199 |
ktrace_free(xfs_bmbt_trace_buf); |
200 |
#endif |
201 |
#ifdef XFS_BMAP_TRACE |
202 |
ktrace_free(xfs_bmap_trace_buf); |
203 |
#endif |
204 |
#ifdef XFS_ALLOC_TRACE |
205 |
ktrace_free(xfs_alloc_trace_buf); |
206 |
#endif |
207 |
|
208 |
kmem_zone_destroy(xfs_bmap_free_item_zone); |
209 |
kmem_zone_destroy(xfs_btree_cur_zone); |
210 |
kmem_zone_destroy(xfs_inode_zone); |
211 |
kmem_zone_destroy(xfs_trans_zone); |
212 |
kmem_zone_destroy(xfs_da_state_zone); |
213 |
kmem_zone_destroy(xfs_dabuf_zone); |
214 |
kmem_zone_destroy(xfs_buf_item_zone); |
215 |
kmem_zone_destroy(xfs_efd_zone); |
216 |
kmem_zone_destroy(xfs_efi_zone); |
217 |
kmem_zone_destroy(xfs_ifork_zone); |
218 |
kmem_zone_destroy(xfs_ili_zone); |
219 |
kmem_zone_destroy(xfs_chashlist_zone); |
220 |
} |
221 |
|
222 |
/* |
223 |
* xfs_start_flags |
224 |
* |
225 |
* This function fills in xfs_mount_t fields based on mount args. |
226 |
* Note: the superblock has _not_ yet been read in. |
227 |
*/ |
228 |
STATIC int |
229 |
xfs_start_flags( |
230 |
struct xfs_vfs *vfs, |
231 |
struct xfs_mount_args *ap, |
232 |
struct xfs_mount *mp) |
233 |
{ |
234 |
/* Values are in BBs */ |
235 |
if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { |
236 |
/* |
237 |
* At this point the superblock has not been read |
238 |
* in, therefore we do not know the block size. |
239 |
* Before the mount call ends we will convert |
240 |
* these to FSBs. |
241 |
*/ |
242 |
mp->m_dalign = ap->sunit; |
243 |
mp->m_swidth = ap->swidth; |
244 |
} |
245 |
|
246 |
if (ap->logbufs != -1 && |
247 |
ap->logbufs != 0 && |
248 |
(ap->logbufs < XLOG_MIN_ICLOGS || |
249 |
ap->logbufs > XLOG_MAX_ICLOGS)) { |
250 |
cmn_err(CE_WARN, |
251 |
"XFS: invalid logbufs value: %d [not %d-%d]", |
252 |
ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); |
253 |
return XFS_ERROR(EINVAL); |
254 |
} |
255 |
mp->m_logbufs = ap->logbufs; |
256 |
if (ap->logbufsize != -1 && |
257 |
ap->logbufsize != 0 && |
258 |
ap->logbufsize != 16 * 1024 && |
259 |
ap->logbufsize != 32 * 1024 && |
260 |
ap->logbufsize != 64 * 1024 && |
261 |
ap->logbufsize != 128 * 1024 && |
262 |
ap->logbufsize != 256 * 1024) { |
263 |
cmn_err(CE_WARN, |
264 |
"XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", |
265 |
ap->logbufsize); |
266 |
return XFS_ERROR(EINVAL); |
267 |
} |
268 |
mp->m_ihsize = ap->ihashsize; |
269 |
mp->m_logbsize = ap->logbufsize; |
270 |
mp->m_fsname_len = strlen(ap->fsname) + 1; |
271 |
mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); |
272 |
strcpy(mp->m_fsname, ap->fsname); |
273 |
if (ap->rtname[0]) { |
274 |
mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); |
275 |
strcpy(mp->m_rtname, ap->rtname); |
276 |
} |
277 |
if (ap->logname[0]) { |
278 |
mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); |
279 |
strcpy(mp->m_logname, ap->logname); |
280 |
} |
281 |
|
282 |
if (ap->flags & XFSMNT_WSYNC) |
283 |
mp->m_flags |= XFS_MOUNT_WSYNC; |
284 |
#if XFS_BIG_INUMS |
285 |
if (ap->flags & XFSMNT_INO64) { |
286 |
mp->m_flags |= XFS_MOUNT_INO64; |
287 |
mp->m_inoadd = XFS_INO64_OFFSET; |
288 |
} |
289 |
#endif |
290 |
if (ap->flags & XFSMNT_RETERR) |
291 |
mp->m_flags |= XFS_MOUNT_RETERR; |
292 |
if (ap->flags & XFSMNT_NOALIGN) |
293 |
mp->m_flags |= XFS_MOUNT_NOALIGN; |
294 |
if (ap->flags & XFSMNT_SWALLOC) |
295 |
mp->m_flags |= XFS_MOUNT_SWALLOC; |
296 |
if (ap->flags & XFSMNT_OSYNCISOSYNC) |
297 |
mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; |
298 |
if (ap->flags & XFSMNT_32BITINODES) |
299 |
mp->m_flags |= XFS_MOUNT_32BITINODES; |
300 |
|
301 |
if (ap->flags & XFSMNT_IOSIZE) { |
302 |
if (ap->iosizelog > XFS_MAX_IO_LOG || |
303 |
ap->iosizelog < XFS_MIN_IO_LOG) { |
304 |
cmn_err(CE_WARN, |
305 |
"XFS: invalid log iosize: %d [not %d-%d]", |
306 |
ap->iosizelog, XFS_MIN_IO_LOG, |
307 |
XFS_MAX_IO_LOG); |
308 |
return XFS_ERROR(EINVAL); |
309 |
} |
310 |
|
311 |
mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; |
312 |
mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; |
313 |
} |
314 |
|
315 |
if (ap->flags & XFSMNT_IHASHSIZE) |
316 |
mp->m_flags |= XFS_MOUNT_IHASHSIZE; |
317 |
if (ap->flags & XFSMNT_IDELETE) |
318 |
mp->m_flags |= XFS_MOUNT_IDELETE; |
319 |
if (ap->flags & XFSMNT_DIRSYNC) |
320 |
mp->m_flags |= XFS_MOUNT_DIRSYNC; |
321 |
if (ap->flags & XFSMNT_ATTR2) |
322 |
mp->m_flags |= XFS_MOUNT_ATTR2; |
323 |
|
324 |
if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) |
325 |
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; |
326 |
|
327 |
/* |
328 |
* no recovery flag requires a read-only mount |
329 |
*/ |
330 |
if (ap->flags & XFSMNT_NORECOVERY) { |
331 |
if (!(vfs->vfs_flag & VFS_RDONLY)) { |
332 |
cmn_err(CE_WARN, |
333 |
"XFS: tried to mount a FS read-write without recovery!"); |
334 |
return XFS_ERROR(EINVAL); |
335 |
} |
336 |
mp->m_flags |= XFS_MOUNT_NORECOVERY; |
337 |
} |
338 |
|
339 |
if (ap->flags & XFSMNT_NOUUID) |
340 |
mp->m_flags |= XFS_MOUNT_NOUUID; |
341 |
if (ap->flags & XFSMNT_BARRIER) |
342 |
mp->m_flags |= XFS_MOUNT_BARRIER; |
343 |
else |
344 |
mp->m_flags &= ~XFS_MOUNT_BARRIER; |
345 |
|
346 |
return 0; |
347 |
} |
348 |
|
349 |
/* |
350 |
* This function fills in xfs_mount_t fields based on mount args. |
351 |
* Note: the superblock _has_ now been read in. |
352 |
*/ |
353 |
STATIC int |
354 |
xfs_finish_flags( |
355 |
struct xfs_vfs *vfs, |
356 |
struct xfs_mount_args *ap, |
357 |
struct xfs_mount *mp) |
358 |
{ |
359 |
int ronly = (vfs->vfs_flag & VFS_RDONLY); |
360 |
|
361 |
/* Fail a mount where the logbuf is smaller then the log stripe */ |
362 |
if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { |
363 |
if ((ap->logbufsize <= 0) && |
364 |
(mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { |
365 |
mp->m_logbsize = mp->m_sb.sb_logsunit; |
366 |
} else if (ap->logbufsize > 0 && |
367 |
ap->logbufsize < mp->m_sb.sb_logsunit) { |
368 |
cmn_err(CE_WARN, |
369 |
"XFS: logbuf size must be greater than or equal to log stripe size"); |
370 |
return XFS_ERROR(EINVAL); |
371 |
} |
372 |
} else { |
373 |
/* Fail a mount if the logbuf is larger than 32K */ |
374 |
if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { |
375 |
cmn_err(CE_WARN, |
376 |
"XFS: logbuf size for version 1 logs must be 16K or 32K"); |
377 |
return XFS_ERROR(EINVAL); |
378 |
} |
379 |
} |
380 |
|
381 |
if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) { |
382 |
mp->m_flags |= XFS_MOUNT_ATTR2; |
383 |
} |
384 |
|
385 |
/* |
386 |
* prohibit r/w mounts of read-only filesystems |
387 |
*/ |
388 |
if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { |
389 |
cmn_err(CE_WARN, |
390 |
"XFS: cannot mount a read-only filesystem as read-write"); |
391 |
return XFS_ERROR(EROFS); |
392 |
} |
393 |
|
394 |
/* |
395 |
* check for shared mount. |
396 |
*/ |
397 |
if (ap->flags & XFSMNT_SHARED) { |
398 |
if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb)) |
399 |
return XFS_ERROR(EINVAL); |
400 |
|
401 |
/* |
402 |
* For IRIX 6.5, shared mounts must have the shared |
403 |
* version bit set, have the persistent readonly |
404 |
* field set, must be version 0 and can only be mounted |
405 |
* read-only. |
406 |
*/ |
407 |
if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || |
408 |
(mp->m_sb.sb_shared_vn != 0)) |
409 |
return XFS_ERROR(EINVAL); |
410 |
|
411 |
mp->m_flags |= XFS_MOUNT_SHARED; |
412 |
|
413 |
/* |
414 |
* Shared XFS V0 can't deal with DMI. Return EINVAL. |
415 |
*/ |
416 |
if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) |
417 |
return XFS_ERROR(EINVAL); |
418 |
} |
419 |
|
420 |
return 0; |
421 |
} |
422 |
|
423 |
/* |
424 |
* xfs_mount |
425 |
* |
426 |
* The file system configurations are: |
427 |
* (1) device (partition) with data and internal log |
428 |
* (2) logical volume with data and log subvolumes. |
429 |
* (3) logical volume with data, log, and realtime subvolumes. |
430 |
* |
431 |
* We only have to handle opening the log and realtime volumes here if |
432 |
* they are present. The data subvolume has already been opened by |
433 |
* get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev. |
434 |
*/ |
435 |
STATIC int |
436 |
xfs_mount( |
437 |
struct bhv_desc *bhvp, |
438 |
struct xfs_mount_args *args, |
439 |
cred_t *credp) |
440 |
{ |
441 |
struct xfs_vfs *vfsp = bhvtovfs(bhvp); |
442 |
struct bhv_desc *p; |
443 |
struct xfs_mount *mp = XFS_BHVTOM(bhvp); |
444 |
struct vnode *ddev, *logdev, *rtdev; |
445 |
int flags = 0, error; |
446 |
|
447 |
ddev = logdev = rtdev = NULL; |
448 |
|
449 |
error = xfs_blkdev_get(mp, args->fsname, &ddev); |
450 |
if (error) |
451 |
return error; |
452 |
|
453 |
/* |
454 |
* Setup xfs_mount function vectors from available behaviors |
455 |
*/ |
456 |
p = vfs_bhv_lookup(vfsp, VFS_POSITION_DM); |
457 |
mp->m_dm_ops = p ? *(xfs_dmops_t *) vfs_bhv_custom(p) : xfs_dmcore_stub; |
458 |
p = vfs_bhv_lookup(vfsp, VFS_POSITION_QM); |
459 |
mp->m_qm_ops = p ? *(xfs_qmops_t *) vfs_bhv_custom(p) : xfs_qmcore_stub; |
460 |
p = vfs_bhv_lookup(vfsp, VFS_POSITION_IO); |
461 |
mp->m_io_ops = p ? *(xfs_ioops_t *) vfs_bhv_custom(p) : xfs_iocore_xfs; |
462 |
|
463 |
if (args->flags & XFSMNT_QUIET) |
464 |
flags |= XFS_MFSI_QUIET; |
465 |
|
466 |
/* |
467 |
* Open real time and log devices - order is important. |
468 |
*/ |
469 |
if (args->logname[0]) { |
470 |
error = xfs_blkdev_get(mp, args->logname, &logdev); |
471 |
if (error) { |
472 |
xfs_blkdev_put(ddev); |
473 |
return error; |
474 |
} |
475 |
} |
476 |
if (args->rtname[0]) { |
477 |
error = xfs_blkdev_get(mp, args->rtname, &rtdev); |
478 |
if (error) { |
479 |
xfs_blkdev_put(logdev); |
480 |
xfs_blkdev_put(ddev); |
481 |
return error; |
482 |
} |
483 |
|
484 |
if (rtdev == ddev || rtdev == logdev) { |
485 |
cmn_err(CE_WARN, |
486 |
"XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); |
487 |
xfs_blkdev_put(logdev); |
488 |
xfs_blkdev_put(rtdev); |
489 |
xfs_blkdev_put(ddev); |
490 |
return EINVAL; |
491 |
} |
492 |
} |
493 |
|
494 |
/* |
495 |
* Setup xfs_mount buffer target pointers |
496 |
*/ |
497 |
error = ENOMEM; |
498 |
mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); |
499 |
if (!mp->m_ddev_targp) { |
500 |
xfs_blkdev_put(logdev); |
501 |
xfs_blkdev_put(rtdev); |
502 |
return error; |
503 |
} |
504 |
if (rtdev) { |
505 |
mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); |
506 |
if (!mp->m_rtdev_targp) |
507 |
goto error0; |
508 |
} |
509 |
mp->m_logdev_targp = (logdev && logdev != ddev) ? |
510 |
xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp; |
511 |
if (!mp->m_logdev_targp) |
512 |
goto error0; |
513 |
|
514 |
/* |
515 |
* Setup flags based on mount(2) options and then the superblock |
516 |
*/ |
517 |
error = xfs_start_flags(vfsp, args, mp); |
518 |
if (error) |
519 |
goto error1; |
520 |
error = xfs_readsb(mp, flags); |
521 |
if (error) |
522 |
goto error1; |
523 |
error = xfs_finish_flags(vfsp, args, mp); |
524 |
if (error) |
525 |
goto error2; |
526 |
|
527 |
/* |
528 |
* Setup xfs_mount buffer target pointers based on superblock |
529 |
*/ |
530 |
error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, |
531 |
mp->m_sb.sb_sectsize); |
532 |
if (!error && logdev && logdev != ddev) { |
533 |
unsigned int log_sector_size = BBSIZE; |
534 |
|
535 |
if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) |
536 |
log_sector_size = mp->m_sb.sb_logsectsize; |
537 |
error = xfs_setsize_buftarg(mp->m_logdev_targp, |
538 |
mp->m_sb.sb_blocksize, |
539 |
log_sector_size); |
540 |
} |
541 |
if (!error && rtdev) |
542 |
error = xfs_setsize_buftarg(mp->m_rtdev_targp, |
543 |
mp->m_sb.sb_blocksize, |
544 |
mp->m_sb.sb_sectsize); |
545 |
if (error) |
546 |
goto error2; |
547 |
|
548 |
if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY)) |
549 |
xfs_mountfs_check_barriers(mp); |
550 |
|
551 |
error = XFS_IOINIT(vfsp, args, flags); |
552 |
if (error) |
553 |
goto error2; |
554 |
|
555 |
return 0; |
556 |
|
557 |
error2: |
558 |
if (mp->m_sb_bp) |
559 |
xfs_freesb(mp); |
560 |
error1: |
561 |
xfs_binval(mp->m_ddev_targp); |
562 |
if (logdev && logdev != ddev) |
563 |
xfs_binval(mp->m_logdev_targp); |
564 |
if (rtdev) |
565 |
xfs_binval(mp->m_rtdev_targp); |
566 |
error0: |
567 |
xfs_unmountfs_close(mp, credp); |
568 |
return error; |
569 |
} |
570 |
|
571 |
STATIC int |
572 |
xfs_unmount( |
573 |
bhv_desc_t *bdp, |
574 |
int flags, |
575 |
cred_t *credp) |
576 |
{ |
577 |
struct xfs_vfs *vfsp = bhvtovfs(bdp); |
578 |
xfs_mount_t *mp = XFS_BHVTOM(bdp); |
579 |
xfs_inode_t *rip; |
580 |
xfs_vnode_t *rvp; |
581 |
int unmount_event_wanted = 0; |
582 |
int unmount_event_flags = 0; |
583 |
int xfs_unmountfs_needed = 0; |
584 |
int error; |
585 |
|
586 |
rip = mp->m_rootip; |
587 |
rvp = XFS_ITOV(rip); |
588 |
|
589 |
if (vfsp->vfs_flag & VFS_DMI) { |
590 |
error = XFS_SEND_PREUNMOUNT(mp, vfsp, |
591 |
rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, |
592 |
NULL, NULL, 0, 0, |
593 |
(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? |
594 |
0:DM_FLAGS_UNWANTED); |
595 |
if (error) |
596 |
return XFS_ERROR(error); |
597 |
unmount_event_wanted = 1; |
598 |
unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))? |
599 |
0 : DM_FLAGS_UNWANTED; |
600 |
} |
601 |
|
602 |
/* |
603 |
* Linux (& presumably Irix) do not reach this code if |
604 |
* any of this FS vnodes have active references. FreeBSD |
605 |
* relies on FS to clean after itself. |
606 |
*/ |
607 |
xfs_iflush_all(mp); |
608 |
|
609 |
/* |
610 |
* First blow any referenced inode from this file system |
611 |
* out of the reference cache, and delete the timer. |
612 |
*/ |
613 |
xfs_refcache_purge_mp(mp); |
614 |
|
615 |
XFS_bflush(mp->m_ddev_targp); |
616 |
error = xfs_unmount_flush(mp, 0); |
617 |
if (error) |
618 |
goto out; |
619 |
|
620 |
ASSERT(vn_count(rvp) == 1); |
621 |
|
622 |
/* |
623 |
* Drop the reference count |
624 |
*/ |
625 |
VN_RELE(rvp); |
626 |
|
627 |
/* |
628 |
* If we're forcing a shutdown, typically because of a media error, |
629 |
* we want to make sure we invalidate dirty pages that belong to |
630 |
* referenced vnodes as well. |
631 |
*/ |
632 |
if (XFS_FORCED_SHUTDOWN(mp)) { |
633 |
error = xfs_sync(&mp->m_bhv, |
634 |
(SYNC_WAIT | SYNC_CLOSE), credp); |
635 |
ASSERT(error != EFSCORRUPTED); |
636 |
} |
637 |
xfs_unmountfs_needed = 1; |
638 |
|
639 |
out: |
640 |
/* Send DMAPI event, if required. |
641 |
* Then do xfs_unmountfs() if needed. |
642 |
* Then return error (or zero). |
643 |
*/ |
644 |
if (unmount_event_wanted) { |
645 |
/* Note: mp structure must still exist for |
646 |
* XFS_SEND_UNMOUNT() call. |
647 |
*/ |
648 |
XFS_SEND_UNMOUNT(mp, vfsp, error == 0 ? rvp : NULL, |
649 |
DM_RIGHT_NULL, 0, error, unmount_event_flags); |
650 |
} |
651 |
if (xfs_unmountfs_needed) { |
652 |
/* |
653 |
* Call common unmount function to flush to disk |
654 |
* and free the super block buffer & mount structures. |
655 |
*/ |
656 |
xfs_unmountfs(mp, credp); |
657 |
} |
658 |
|
659 |
return XFS_ERROR(error); |
660 |
} |
661 |
|
662 |
STATIC int |
663 |
xfs_quiesce_fs( |
664 |
xfs_mount_t *mp) |
665 |
{ |
666 |
int count = 0, pincount; |
667 |
|
668 |
xfs_refcache_purge_mp(mp); |
669 |
xfs_flush_buftarg(mp->m_ddev_targp, 0); |
670 |
xfs_finish_reclaim_all(mp, 0); |
671 |
|
672 |
/* This loop must run at least twice. |
673 |
* The first instance of the loop will flush |
674 |
* most meta data but that will generate more |
675 |
* meta data (typically directory updates). |
676 |
* Which then must be flushed and logged before |
677 |
* we can write the unmount record. |
678 |
*/ |
679 |
do { |
680 |
xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL); |
681 |
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); |
682 |
if (!pincount) { |
683 |
delay(50); |
684 |
count++; |
685 |
} |
686 |
} while (count < 2); |
687 |
|
688 |
return 0; |
689 |
} |
690 |
|
691 |
/* XXXKAN */ |
692 |
#define pagebuf_delwri_flush(a,b,c) \ |
693 |
do { \ |
694 |
printf("pagebuf_delwri_flush NI\n"); \ |
695 |
if (c) *((int *)(c)) = 0; \ |
696 |
} while(0) |
697 |
|
698 |
STATIC int |
699 |
xfs_mntupdate( |
700 |
bhv_desc_t *bdp, |
701 |
int *flags, |
702 |
struct xfs_mount_args *args) |
703 |
{ |
704 |
struct xfs_vfs *vfsp = bhvtovfs(bdp); |
705 |
xfs_mount_t *mp = XFS_BHVTOM(bdp); |
706 |
int error; |
707 |
|
708 |
#ifdef RMC |
709 |
if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */ |
710 |
#endif |
711 |
if (!(*flags & VFS_RDONLY)) { /* rw/ro -> rw */ |
712 |
if (vfsp->vfs_flag & VFS_RDONLY) |
713 |
vfsp->vfs_flag &= ~VFS_RDONLY; |
714 |
if (args->flags & XFSMNT_BARRIER) { |
715 |
mp->m_flags |= XFS_MOUNT_BARRIER; |
716 |
xfs_mountfs_check_barriers(mp); |
717 |
} else { |
718 |
mp->m_flags &= ~XFS_MOUNT_BARRIER; |
719 |
} |
720 |
} else if (!(vfsp->vfs_flag & VFS_RDONLY)) { /* rw -> ro */ |
721 |
XVFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error); |
722 |
xfs_quiesce_fs(mp); |
723 |
xfs_log_unmount_write(mp); |
724 |
xfs_unmountfs_writesb(mp); |
725 |
vfsp->vfs_flag |= VFS_RDONLY; |
726 |
} |
727 |
return 0; |
728 |
} |
729 |
|
730 |
/* |
731 |
* xfs_unmount_flush implements a set of flush operation on special |
732 |
* inodes, which are needed as a separate set of operations so that |
733 |
* they can be called as part of relocation process. |
734 |
*/ |
735 |
int |
736 |
xfs_unmount_flush( |
737 |
xfs_mount_t *mp, /* Mount structure we are getting |
738 |
rid of. */ |
739 |
int relocation) /* Called from vfs relocation. */ |
740 |
{ |
741 |
xfs_inode_t *rip = mp->m_rootip; |
742 |
xfs_inode_t *rbmip; |
743 |
xfs_inode_t *rsumip = NULL; |
744 |
xfs_vnode_t *rvp = XFS_ITOV_NULL(rip); |
745 |
int error; |
746 |
|
747 |
if (rvp == NULL) |
748 |
return (0); |
749 |
xfs_ilock(rip, XFS_ILOCK_EXCL); |
750 |
xfs_iflock(rip); |
751 |
|
752 |
/* |
753 |
* Flush out the real time inodes. |
754 |
*/ |
755 |
if ((rbmip = mp->m_rbmip) != NULL) { |
756 |
xfs_ilock(rbmip, XFS_ILOCK_EXCL); |
757 |
xfs_iflock(rbmip); |
758 |
error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); |
759 |
xfs_iunlock(rbmip, XFS_ILOCK_EXCL); |
760 |
|
761 |
if (error == EFSCORRUPTED) |
762 |
goto fscorrupt_out; |
763 |
|
764 |
ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); |
765 |
|
766 |
rsumip = mp->m_rsumip; |
767 |
xfs_ilock(rsumip, XFS_ILOCK_EXCL); |
768 |
xfs_iflock(rsumip); |
769 |
error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); |
770 |
xfs_iunlock(rsumip, XFS_ILOCK_EXCL); |
771 |
|
772 |
if (error == EFSCORRUPTED) |
773 |
goto fscorrupt_out; |
774 |
|
775 |
ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); |
776 |
} |
777 |
|
778 |
/* |
779 |
* Synchronously flush root inode to disk |
780 |
*/ |
781 |
error = xfs_iflush(rip, XFS_IFLUSH_SYNC); |
782 |
if (error == EFSCORRUPTED) |
783 |
goto fscorrupt_out2; |
784 |
|
785 |
if (vn_count(rvp) != 1 && !relocation) { |
786 |
xfs_iunlock(rip, XFS_ILOCK_EXCL); |
787 |
return XFS_ERROR(EBUSY); |
788 |
} |
789 |
|
790 |
/* |
791 |
* Release dquot that rootinode, rbmino and rsumino might be holding, |
792 |
* flush and purge the quota inodes. |
793 |
*/ |
794 |
error = XFS_QM_UNMOUNT(mp); |
795 |
if (error == EFSCORRUPTED) |
796 |
goto fscorrupt_out2; |
797 |
|
798 |
if (rbmip) { |
799 |
VN_RELE(XFS_ITOV(rbmip)); |
800 |
VN_RELE(XFS_ITOV(rsumip)); |
801 |
} |
802 |
|
803 |
xfs_iunlock(rip, XFS_ILOCK_EXCL); |
804 |
return 0; |
805 |
|
806 |
fscorrupt_out: |
807 |
xfs_ifunlock(rip); |
808 |
|
809 |
fscorrupt_out2: |
810 |
xfs_iunlock(rip, XFS_ILOCK_EXCL); |
811 |
|
812 |
return XFS_ERROR(EFSCORRUPTED); |
813 |
} |
814 |
|
815 |
/* |
816 |
* xfs_root extracts the root vnode from a vfs. |
817 |
* |
818 |
* vfsp -- the vfs struct for the desired file system |
819 |
* vpp -- address of the caller's vnode pointer which should be |
820 |
* set to the desired fs root vnode |
821 |
*/ |
822 |
STATIC int |
823 |
xfs_root( |
824 |
bhv_desc_t *bdp, |
825 |
xfs_vnode_t **vpp) |
826 |
{ |
827 |
xfs_vnode_t *vp; |
828 |
|
829 |
vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip); |
830 |
VN_HOLD(vp); |
831 |
*vpp = vp; |
832 |
return 0; |
833 |
} |
834 |
|
835 |
/* |
836 |
* xfs_statvfs |
837 |
* |
838 |
* Fill in the statvfs structure for the given file system. We use |
839 |
* the superblock lock in the mount structure to ensure a consistent |
840 |
* snapshot of the counters returned. |
841 |
*/ |
842 |
STATIC int |
843 |
xfs_statvfs( |
844 |
bhv_desc_t *bdp, |
845 |
xfs_statfs_t *statp, |
846 |
xfs_vnode_t *vp) |
847 |
{ |
848 |
__uint64_t fakeinos; |
849 |
xfs_extlen_t lsize; |
850 |
xfs_mount_t *mp; |
851 |
xfs_sb_t *sbp; |
852 |
unsigned long s; |
853 |
|
854 |
mp = XFS_BHVTOM(bdp); |
855 |
sbp = &(mp->m_sb); |
856 |
|
857 |
statp->f_type = XFS_SB_MAGIC; |
858 |
|
859 |
xfs_icsb_sync_counters_lazy(mp); |
860 |
s = XFS_SB_LOCK(mp); |
861 |
statp->f_bsize = sbp->sb_blocksize; |
862 |
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; |
863 |
statp->f_blocks = sbp->sb_dblocks - lsize; |
864 |
statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; |
865 |
fakeinos = statp->f_bfree << sbp->sb_inopblog; |
866 |
#if XFS_BIG_INUMS |
867 |
fakeinos += mp->m_inoadd; |
868 |
#endif |
869 |
statp->f_files = |
870 |
MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); |
871 |
if (mp->m_maxicount) |
872 |
#if XFS_BIG_INUMS |
873 |
if (!mp->m_inoadd) |
874 |
#endif |
875 |
statp->f_files = min_t(typeof(statp->f_files), |
876 |
statp->f_files, |
877 |
mp->m_maxicount); |
878 |
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); |
879 |
XFS_SB_UNLOCK(mp, s); |
880 |
|
881 |
xfs_statvfs_fsid(statp, mp); |
882 |
return 0; |
883 |
} |
884 |
|
885 |
|
886 |
/* |
887 |
* xfs_sync flushes any pending I/O to file system vfsp. |
888 |
* |
889 |
* This routine is called by vfs_sync() to make sure that things make it |
890 |
* out to disk eventually, on sync() system calls to flush out everything, |
891 |
* and when the file system is unmounted. For the vfs_sync() case, all |
892 |
* we really need to do is sync out the log to make all of our meta-data |
893 |
* updates permanent (except for timestamps). For calls from pflushd(), |
894 |
* dirty pages are kept moving by calling pdflush() on the inodes |
895 |
* containing them. We also flush the inodes that we can lock without |
896 |
* sleeping and the superblock if we can lock it without sleeping from |
897 |
* vfs_sync() so that items at the tail of the log are always moving out. |
898 |
* |
899 |
* Flags: |
900 |
* SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want |
901 |
* to sleep if we can help it. All we really need |
902 |
* to do is ensure that the log is synced at least |
903 |
* periodically. We also push the inodes and |
904 |
* superblock if we can lock them without sleeping |
905 |
* and they are not pinned. |
906 |
* SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not |
907 |
* set, then we really want to lock each inode and flush |
908 |
* it. |
909 |
* SYNC_WAIT - All the flushes that take place in this call should |
910 |
* be synchronous. |
911 |
* SYNC_DELWRI - This tells us to push dirty pages associated with |
912 |
* inodes. SYNC_WAIT and SYNC_BDFLUSH are used to |
913 |
* determine if they should be flushed sync, async, or |
914 |
* delwri. |
915 |
* SYNC_CLOSE - This flag is passed when the system is being |
916 |
* unmounted. We should sync and invalidate everything. |
917 |
* SYNC_FSDATA - This indicates that the caller would like to make |
918 |
* sure the superblock is safe on disk. We can ensure |
919 |
* this by simply making sure the log gets flushed |
920 |
* if SYNC_BDFLUSH is set, and by actually writing it |
921 |
* out otherwise. |
922 |
* |
923 |
*/ |
924 |
/*ARGSUSED*/ |
925 |
STATIC int |
926 |
xfs_sync( |
927 |
bhv_desc_t *bdp, |
928 |
int flags, |
929 |
cred_t *credp) |
930 |
{ |
931 |
xfs_mount_t *mp = XFS_BHVTOM(bdp); |
932 |
|
933 |
if (unlikely(flags == SYNC_QUIESCE)) |
934 |
return xfs_quiesce_fs(mp); |
935 |
else |
936 |
return xfs_syncsub(mp, flags, 0, NULL); |
937 |
} |
938 |
|
939 |
/* |
940 |
* xfs sync routine for internal use |
941 |
* |
942 |
* This routine supports all of the flags defined for the generic VFS_SYNC |
943 |
* interface as explained above under xfs_sync. In the interests of not |
944 |
* changing interfaces within the 6.5 family, additional internally- |
945 |
* required functions are specified within a separate xflags parameter, |
946 |
* only available by calling this routine. |
947 |
* |
948 |
*/ |
949 |
int |
950 |
xfs_sync_inodes( |
951 |
xfs_mount_t *mp, |
952 |
int flags, |
953 |
int xflags, |
954 |
int *bypassed) |
955 |
{ |
956 |
xfs_inode_t *ip = NULL; |
957 |
xfs_inode_t *ip_next; |
958 |
xfs_buf_t *bp; |
959 |
xfs_vnode_t *vp = NULL; |
960 |
int error; |
961 |
int last_error; |
962 |
uint64_t fflag; |
963 |
uint lock_flags; |
964 |
uint base_lock_flags; |
965 |
boolean_t mount_locked; |
966 |
boolean_t vnode_refed; |
967 |
int preempt; |
968 |
xfs_dinode_t *dip; |
969 |
xfs_iptr_t *ipointer; |
970 |
#ifdef DEBUG |
971 |
boolean_t ipointer_in = B_FALSE; |
972 |
|
973 |
#define IPOINTER_SET ipointer_in = B_TRUE |
974 |
#define IPOINTER_CLR ipointer_in = B_FALSE |
975 |
#else |
976 |
#define IPOINTER_SET |
977 |
#define IPOINTER_CLR |
978 |
#endif |
979 |
|
980 |
|
981 |
/* Insert a marker record into the inode list after inode ip. The list |
982 |
* must be locked when this is called. After the call the list will no |
983 |
* longer be locked. |
984 |
*/ |
985 |
#define IPOINTER_INSERT(ip, mp) { \ |
986 |
ASSERT(ipointer_in == B_FALSE); \ |
987 |
ipointer->ip_mnext = ip->i_mnext; \ |
988 |
ipointer->ip_mprev = ip; \ |
989 |
ip->i_mnext = (xfs_inode_t *)ipointer; \ |
990 |
ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ |
991 |
preempt = 0; \ |
992 |
XFS_MOUNT_IUNLOCK(mp); \ |
993 |
mount_locked = B_FALSE; \ |
994 |
IPOINTER_SET; \ |
995 |
} |
996 |
|
997 |
/* Remove the marker from the inode list. If the marker was the only item |
998 |
* in the list then there are no remaining inodes and we should zero out |
999 |
* the whole list. If we are the current head of the list then move the head |
1000 |
* past us. |
1001 |
*/ |
1002 |
#define IPOINTER_REMOVE(ip, mp) { \ |
1003 |
ASSERT(ipointer_in == B_TRUE); \ |
1004 |
if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ |
1005 |
ip = ipointer->ip_mnext; \ |
1006 |
ip->i_mprev = ipointer->ip_mprev; \ |
1007 |
ipointer->ip_mprev->i_mnext = ip; \ |
1008 |
if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ |
1009 |
mp->m_inodes = ip; \ |
1010 |
} \ |
1011 |
} else { \ |
1012 |
ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ |
1013 |
mp->m_inodes = NULL; \ |
1014 |
ip = NULL; \ |
1015 |
} \ |
1016 |
IPOINTER_CLR; \ |
1017 |
} |
1018 |
|
1019 |
#define XFS_PREEMPT_MASK 0x7f |
1020 |
|
1021 |
if (bypassed) |
1022 |
*bypassed = 0; |
1023 |
if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) |
1024 |
return 0; |
1025 |
error = 0; |
1026 |
last_error = 0; |
1027 |
preempt = 0; |
1028 |
|
1029 |
/* Allocate a reference marker */ |
1030 |
ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); |
1031 |
|
1032 |
fflag = XFS_B_ASYNC; /* default is don't wait */ |
1033 |
if (flags & (SYNC_BDFLUSH | SYNC_DELWRI)) |
1034 |
fflag = XFS_B_DELWRI; |
1035 |
if (flags & SYNC_WAIT) |
1036 |
fflag = 0; /* synchronous overrides all */ |
1037 |
|
1038 |
base_lock_flags = XFS_ILOCK_SHARED; |
1039 |
if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { |
1040 |
/* |
1041 |
* We need the I/O lock if we're going to call any of |
1042 |
* the flush/inval routines. |
1043 |
*/ |
1044 |
base_lock_flags |= XFS_IOLOCK_SHARED; |
1045 |
} |
1046 |
|
1047 |
XFS_MOUNT_ILOCK(mp); |
1048 |
|
1049 |
ip = mp->m_inodes; |
1050 |
|
1051 |
mount_locked = B_TRUE; |
1052 |
vnode_refed = B_FALSE; |
1053 |
|
1054 |
IPOINTER_CLR; |
1055 |
|
1056 |
do { |
1057 |
ASSERT(ipointer_in == B_FALSE); |
1058 |
ASSERT(vnode_refed == B_FALSE); |
1059 |
|
1060 |
lock_flags = base_lock_flags; |
1061 |
|
1062 |
/* |
1063 |
* There were no inodes in the list, just break out |
1064 |
* of the loop. |
1065 |
*/ |
1066 |
if (ip == NULL) { |
1067 |
break; |
1068 |
} |
1069 |
|
1070 |
/* |
1071 |
* We found another sync thread marker - skip it |
1072 |
*/ |
1073 |
if (ip->i_mount == NULL) { |
1074 |
ip = ip->i_mnext; |
1075 |
continue; |
1076 |
} |
1077 |
|
1078 |
vp = XFS_ITOV_NULL(ip); |
1079 |
|
1080 |
/* |
1081 |
* If the vnode is gone then this is being torn down, |
1082 |
* call reclaim if it is flushed, else let regular flush |
1083 |
* code deal with it later in the loop. |
1084 |
*/ |
1085 |
|
1086 |
if (vp == NULL) { |
1087 |
/* Skip ones already in reclaim */ |
1088 |
if (ip->i_flags & XFS_IRECLAIM) { |
1089 |
ip = ip->i_mnext; |
1090 |
continue; |
1091 |
} |
1092 |
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { |
1093 |
ip = ip->i_mnext; |
1094 |
} else if ((xfs_ipincount(ip) == 0) && |
1095 |
xfs_iflock_nowait(ip)) { |
1096 |
IPOINTER_INSERT(ip, mp); |
1097 |
|
1098 |
xfs_finish_reclaim(ip, 1, |
1099 |
XFS_IFLUSH_DELWRI_ELSE_ASYNC); |
1100 |
|
1101 |
XFS_MOUNT_ILOCK(mp); |
1102 |
mount_locked = B_TRUE; |
1103 |
IPOINTER_REMOVE(ip, mp); |
1104 |
} else { |
1105 |
xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1106 |
ip = ip->i_mnext; |
1107 |
} |
1108 |
continue; |
1109 |
} |
1110 |
|
1111 |
if (VN_BAD(vp)) { |
1112 |
ip = ip->i_mnext; |
1113 |
continue; |
1114 |
} |
1115 |
|
1116 |
if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { |
1117 |
XFS_MOUNT_IUNLOCK(mp); |
1118 |
kmem_free(ipointer, sizeof(xfs_iptr_t)); |
1119 |
return 0; |
1120 |
} |
1121 |
|
1122 |
/* |
1123 |
* If this is just vfs_sync() or pflushd() calling |
1124 |
* then we can skip inodes for which it looks like |
1125 |
* there is nothing to do. Since we don't have the |
1126 |
* inode locked this is racy, but these are periodic |
1127 |
* calls so it doesn't matter. For the others we want |
1128 |
* to know for sure, so we at least try to lock them. |
1129 |
*/ |
1130 |
if (flags & SYNC_BDFLUSH) { |
1131 |
if (((ip->i_itemp == NULL) || |
1132 |
!(ip->i_itemp->ili_format.ilf_fields & |
1133 |
XFS_ILOG_ALL)) && |
1134 |
(ip->i_update_core == 0)) { |
1135 |
ip = ip->i_mnext; |
1136 |
continue; |
1137 |
} |
1138 |
} |
1139 |
|
1140 |
/* |
1141 |
* Try to lock without sleeping. We're out of order with |
1142 |
* the inode list lock here, so if we fail we need to drop |
1143 |
* the mount lock and try again. If we're called from |
1144 |
* bdflush() here, then don't bother. |
1145 |
* |
1146 |
* The inode lock here actually coordinates with the |
1147 |
* almost spurious inode lock in xfs_ireclaim() to prevent |
1148 |
* the vnode we handle here without a reference from |
1149 |
* being freed while we reference it. If we lock the inode |
1150 |
* while it's on the mount list here, then the spurious inode |
1151 |
* lock in xfs_ireclaim() after the inode is pulled from |
1152 |
* the mount list will sleep until we release it here. |
1153 |
* This keeps the vnode from being freed while we reference |
1154 |
* it. |
1155 |
*/ |
1156 |
if (xfs_ilock_nowait(ip, lock_flags) == 0) { |
1157 |
if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { |
1158 |
ip = ip->i_mnext; |
1159 |
continue; |
1160 |
} |
1161 |
|
1162 |
vp = vn_grab(vp); |
1163 |
if (vp == NULL) { |
1164 |
ip = ip->i_mnext; |
1165 |
continue; |
1166 |
} |
1167 |
|
1168 |
IPOINTER_INSERT(ip, mp); |
1169 |
xfs_ilock(ip, lock_flags); |
1170 |
|
1171 |
ASSERT(vp == XFS_ITOV(ip)); |
1172 |
ASSERT(ip->i_mount == mp); |
1173 |
|
1174 |
vnode_refed = B_TRUE; |
1175 |
} |
1176 |
|
1177 |
/* From here on in the loop we may have a marker record |
1178 |
* in the inode list. |
1179 |
*/ |
1180 |
|
1181 |
if ((flags & SYNC_CLOSE) && (vp != NULL)) { |
1182 |
/* |
1183 |
* This is the shutdown case. We just need to |
1184 |
* flush and invalidate all the pages associated |
1185 |
* with the inode. Drop the inode lock since |
1186 |
* we can't hold it across calls to the buffer |
1187 |
* cache. |
1188 |
* |
1189 |
* We don't set the VREMAPPING bit in the vnode |
1190 |
* here, because we don't hold the vnode lock |
1191 |
* exclusively. It doesn't really matter, though, |
1192 |
* because we only come here when we're shutting |
1193 |
* down anyway. |
1194 |
*/ |
1195 |
xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1196 |
|
1197 |
if (XFS_FORCED_SHUTDOWN(mp)) { |
1198 |
XVOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); |
1199 |
} else { |
1200 |
XVOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF); |
1201 |
} |
1202 |
|
1203 |
xfs_ilock(ip, XFS_ILOCK_SHARED); |
1204 |
|
1205 |
} else if ((flags & SYNC_DELWRI) && (vp != NULL)) { |
1206 |
if (VN_DIRTY(vp)) { |
1207 |
/* We need to have dropped the lock here, |
1208 |
* so insert a marker if we have not already |
1209 |
* done so. |
1210 |
*/ |
1211 |
if (mount_locked) { |
1212 |
IPOINTER_INSERT(ip, mp); |
1213 |
} |
1214 |
|
1215 |
/* |
1216 |
* Drop the inode lock since we can't hold it |
1217 |
* across calls to the buffer cache. |
1218 |
*/ |
1219 |
xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1220 |
XVOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, |
1221 |
fflag, FI_NONE, error); |
1222 |
xfs_ilock(ip, XFS_ILOCK_SHARED); |
1223 |
} |
1224 |
|
1225 |
} |
1226 |
|
1227 |
if (flags & SYNC_BDFLUSH) { |
1228 |
if ((flags & SYNC_ATTR) && |
1229 |
((ip->i_update_core) || |
1230 |
((ip->i_itemp != NULL) && |
1231 |
(ip->i_itemp->ili_format.ilf_fields != 0)))) { |
1232 |
|
1233 |
/* Insert marker and drop lock if not already |
1234 |
* done. |
1235 |
*/ |
1236 |
if (mount_locked) { |
1237 |
IPOINTER_INSERT(ip, mp); |
1238 |
} |
1239 |
|
1240 |
/* |
1241 |
* We don't want the periodic flushing of the |
1242 |
* inodes by vfs_sync() to interfere with |
1243 |
* I/O to the file, especially read I/O |
1244 |
* where it is only the access time stamp |
1245 |
* that is being flushed out. To prevent |
1246 |
* long periods where we have both inode |
1247 |
* locks held shared here while reading the |
1248 |
* inode's buffer in from disk, we drop the |
1249 |
* inode lock while reading in the inode |
1250 |
* buffer. We have to release the buffer |
1251 |
* and reacquire the inode lock so that they |
1252 |
* are acquired in the proper order (inode |
1253 |
* locks first). The buffer will go at the |
1254 |
* end of the lru chain, though, so we can |
1255 |
* expect it to still be there when we go |
1256 |
* for it again in xfs_iflush(). |
1257 |
*/ |
1258 |
if ((xfs_ipincount(ip) == 0) && |
1259 |
xfs_iflock_nowait(ip)) { |
1260 |
|
1261 |
xfs_ifunlock(ip); |
1262 |
xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1263 |
|
1264 |
error = xfs_itobp(mp, NULL, ip, |
1265 |
&dip, &bp, 0, 0); |
1266 |
if (!error) { |
1267 |
xfs_buf_relse(bp); |
1268 |
} else { |
1269 |
/* Bailing out, remove the |
1270 |
* marker and free it. |
1271 |
*/ |
1272 |
XFS_MOUNT_ILOCK(mp); |
1273 |
|
1274 |
IPOINTER_REMOVE(ip, mp); |
1275 |
|
1276 |
XFS_MOUNT_IUNLOCK(mp); |
1277 |
|
1278 |
ASSERT(!(lock_flags & |
1279 |
XFS_IOLOCK_SHARED)); |
1280 |
|
1281 |
kmem_free(ipointer, |
1282 |
sizeof(xfs_iptr_t)); |
1283 |
return (0); |
1284 |
} |
1285 |
|
1286 |
/* |
1287 |
* Since we dropped the inode lock, |
1288 |
* the inode may have been reclaimed. |
1289 |
* Therefore, we reacquire the mount |
1290 |
* lock and check to see if we were the |
1291 |
* inode reclaimed. If this happened |
1292 |
* then the ipointer marker will no |
1293 |
* longer point back at us. In this |
1294 |
* case, move ip along to the inode |
1295 |
* after the marker, remove the marker |
1296 |
* and continue. |
1297 |
*/ |
1298 |
XFS_MOUNT_ILOCK(mp); |
1299 |
mount_locked = B_TRUE; |
1300 |
|
1301 |
if (ip != ipointer->ip_mprev) { |
1302 |
IPOINTER_REMOVE(ip, mp); |
1303 |
|
1304 |
ASSERT(!vnode_refed); |
1305 |
ASSERT(!(lock_flags & |
1306 |
XFS_IOLOCK_SHARED)); |
1307 |
continue; |
1308 |
} |
1309 |
|
1310 |
ASSERT(ip->i_mount == mp); |
1311 |
|
1312 |
if (xfs_ilock_nowait(ip, |
1313 |
XFS_ILOCK_SHARED) == 0) { |
1314 |
ASSERT(ip->i_mount == mp); |
1315 |
/* |
1316 |
* We failed to reacquire |
1317 |
* the inode lock without |
1318 |
* sleeping, so just skip |
1319 |
* the inode for now. We |
1320 |
* clear the ILOCK bit from |
1321 |
* the lock_flags so that we |
1322 |
* won't try to drop a lock |
1323 |
* we don't hold below. |
1324 |
*/ |
1325 |
lock_flags &= ~XFS_ILOCK_SHARED; |
1326 |
IPOINTER_REMOVE(ip_next, mp); |
1327 |
} else if ((xfs_ipincount(ip) == 0) && |
1328 |
xfs_iflock_nowait(ip)) { |
1329 |
ASSERT(ip->i_mount == mp); |
1330 |
/* |
1331 |
* Since this is vfs_sync() |
1332 |
* calling we only flush the |
1333 |
* inode out if we can lock |
1334 |
* it without sleeping and |
1335 |
* it is not pinned. Drop |
1336 |
* the mount lock here so |
1337 |
* that we don't hold it for |
1338 |
* too long. We already have |
1339 |
* a marker in the list here. |
1340 |
*/ |
1341 |
XFS_MOUNT_IUNLOCK(mp); |
1342 |
mount_locked = B_FALSE; |
1343 |
error = xfs_iflush(ip, |
1344 |
XFS_IFLUSH_DELWRI); |
1345 |
} else { |
1346 |
ASSERT(ip->i_mount == mp); |
1347 |
IPOINTER_REMOVE(ip_next, mp); |
1348 |
} |
1349 |
} |
1350 |
|
1351 |
} |
1352 |
|
1353 |
} else { |
1354 |
if ((flags & SYNC_ATTR) && |
1355 |
((ip->i_update_core) || |
1356 |
((ip->i_itemp != NULL) && |
1357 |
(ip->i_itemp->ili_format.ilf_fields != 0)))) { |
1358 |
if (mount_locked) { |
1359 |
IPOINTER_INSERT(ip, mp); |
1360 |
} |
1361 |
|
1362 |
if (flags & SYNC_WAIT) { |
1363 |
xfs_iflock(ip); |
1364 |
error = xfs_iflush(ip, |
1365 |
XFS_IFLUSH_SYNC); |
1366 |
} else { |
1367 |
/* |
1368 |
* If we can't acquire the flush |
1369 |
* lock, then the inode is already |
1370 |
* being flushed so don't bother |
1371 |
* waiting. If we can lock it then |
1372 |
* do a delwri flush so we can |
1373 |
* combine multiple inode flushes |
1374 |
* in each disk write. |
1375 |
*/ |
1376 |
if (xfs_iflock_nowait(ip)) { |
1377 |
error = xfs_iflush(ip, |
1378 |
XFS_IFLUSH_DELWRI); |
1379 |
} |
1380 |
else if (bypassed) |
1381 |
(*bypassed)++; |
1382 |
} |
1383 |
} |
1384 |
} |
1385 |
|
1386 |
if (lock_flags != 0) { |
1387 |
xfs_iunlock(ip, lock_flags); |
1388 |
} |
1389 |
|
1390 |
if (vnode_refed) { |
1391 |
/* |
1392 |
* If we had to take a reference on the vnode |
1393 |
* above, then wait until after we've unlocked |
1394 |
* the inode to release the reference. This is |
1395 |
* because we can be already holding the inode |
1396 |
* lock when VN_RELE() calls xfs_inactive(). |
1397 |
* |
1398 |
* Make sure to drop the mount lock before calling |
1399 |
* VN_RELE() so that we don't trip over ourselves if |
1400 |
* we have to go for the mount lock again in the |
1401 |
* inactive code. |
1402 |
*/ |
1403 |
if (mount_locked) { |
1404 |
IPOINTER_INSERT(ip, mp); |
1405 |
} |
1406 |
|
1407 |
VN_RELE(vp); |
1408 |
|
1409 |
vnode_refed = B_FALSE; |
1410 |
} |
1411 |
|
1412 |
if (error) { |
1413 |
last_error = error; |
1414 |
} |
1415 |
|
1416 |
/* |
1417 |
* bail out if the filesystem is corrupted. |
1418 |
*/ |
1419 |
if (error == EFSCORRUPTED) { |
1420 |
if (!mount_locked) { |
1421 |
XFS_MOUNT_ILOCK(mp); |
1422 |
IPOINTER_REMOVE(ip, mp); |
1423 |
} |
1424 |
XFS_MOUNT_IUNLOCK(mp); |
1425 |
ASSERT(ipointer_in == B_FALSE); |
1426 |
kmem_free(ipointer, sizeof(xfs_iptr_t)); |
1427 |
return XFS_ERROR(error); |
1428 |
} |
1429 |
|
1430 |
/* Let other threads have a chance at the mount lock |
1431 |
* if we have looped many times without dropping the |
1432 |
* lock. |
1433 |
*/ |
1434 |
if ((++preempt & XFS_PREEMPT_MASK) == 0) { |
1435 |
if (mount_locked) { |
1436 |
IPOINTER_INSERT(ip, mp); |
1437 |
} |
1438 |
} |
1439 |
|
1440 |
if (mount_locked == B_FALSE) { |
1441 |
XFS_MOUNT_ILOCK(mp); |
1442 |
mount_locked = B_TRUE; |
1443 |
IPOINTER_REMOVE(ip, mp); |
1444 |
continue; |
1445 |
} |
1446 |
|
1447 |
ASSERT(ipointer_in == B_FALSE); |
1448 |
ip = ip->i_mnext; |
1449 |
|
1450 |
} while (ip != mp->m_inodes); |
1451 |
|
1452 |
XFS_MOUNT_IUNLOCK(mp); |
1453 |
|
1454 |
ASSERT(ipointer_in == B_FALSE); |
1455 |
|
1456 |
kmem_free(ipointer, sizeof(xfs_iptr_t)); |
1457 |
return XFS_ERROR(last_error); |
1458 |
} |
1459 |
|
1460 |
/* |
1461 |
* xfs sync routine for internal use |
1462 |
* |
1463 |
* This routine supports all of the flags defined for the generic VFS_SYNC |
1464 |
* interface as explained above under xfs_sync. In the interests of not |
1465 |
* changing interfaces within the 6.5 family, additional internally- |
1466 |
* required functions are specified within a separate xflags parameter, |
1467 |
* only available by calling this routine. |
1468 |
* |
1469 |
*/ |
1470 |
int |
1471 |
xfs_syncsub( |
1472 |
xfs_mount_t *mp, |
1473 |
int flags, |
1474 |
int xflags, |
1475 |
int *bypassed) |
1476 |
{ |
1477 |
int error = 0; |
1478 |
int last_error = 0; |
1479 |
uint log_flags = XFS_LOG_FORCE; |
1480 |
xfs_buf_t *bp; |
1481 |
xfs_buf_log_item_t *bip; |
1482 |
|
1483 |
/* |
1484 |
* Sync out the log. This ensures that the log is periodically |
1485 |
* flushed even if there is not enough activity to fill it up. |
1486 |
*/ |
1487 |
if (flags & SYNC_WAIT) |
1488 |
log_flags |= XFS_LOG_SYNC; |
1489 |
|
1490 |
xfs_log_force(mp, (xfs_lsn_t)0, log_flags); |
1491 |
|
1492 |
if (flags & (SYNC_ATTR|SYNC_DELWRI)) { |
1493 |
if (flags & SYNC_BDFLUSH) |
1494 |
xfs_finish_reclaim_all(mp, 1); |
1495 |
else |
1496 |
error = xfs_sync_inodes(mp, flags, xflags, bypassed); |
1497 |
} |
1498 |
|
1499 |
/* |
1500 |
* Flushing out dirty data above probably generated more |
1501 |
* log activity, so if this isn't vfs_sync() then flush |
1502 |
* the log again. |
1503 |
*/ |
1504 |
if (flags & SYNC_DELWRI) { |
1505 |
xfs_log_force(mp, (xfs_lsn_t)0, log_flags); |
1506 |
} |
1507 |
|
1508 |
if (flags & SYNC_FSDATA) { |
1509 |
/* |
1510 |
* If this is vfs_sync() then only sync the superblock |
1511 |
* if we can lock it without sleeping and it is not pinned. |
1512 |
*/ |
1513 |
if (flags & SYNC_BDFLUSH) { |
1514 |
bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); |
1515 |
if (bp != NULL) { |
1516 |
bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); |
1517 |
if ((bip != NULL) && |
1518 |
xfs_buf_item_dirty(bip)) { |
1519 |
if (!(XFS_BUF_ISPINNED(bp))) { |
1520 |
XFS_BUF_ASYNC(bp); |
1521 |
error = xfs_bwrite(mp, bp); |
1522 |
} else { |
1523 |
xfs_buf_relse(bp); |
1524 |
} |
1525 |
} else { |
1526 |
xfs_buf_relse(bp); |
1527 |
} |
1528 |
} |
1529 |
} else { |
1530 |
bp = xfs_getsb(mp, 0); |
1531 |
/* |
1532 |
* If the buffer is pinned then push on the log so |
1533 |
* we won't get stuck waiting in the write for |
1534 |
* someone, maybe ourselves, to flush the log. |
1535 |
* Even though we just pushed the log above, we |
1536 |
* did not have the superblock buffer locked at |
1537 |
* that point so it can become pinned in between |
1538 |
* there and here. |
1539 |
*/ |
1540 |
if (XFS_BUF_ISPINNED(bp)) |
1541 |
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
1542 |
if (flags & SYNC_WAIT) |
1543 |
XFS_BUF_UNASYNC(bp); |
1544 |
else |
1545 |
XFS_BUF_ASYNC(bp); |
1546 |
error = xfs_bwrite(mp, bp); |
1547 |
} |
1548 |
if (error) { |
1549 |
last_error = error; |
1550 |
} |
1551 |
} |
1552 |
|
1553 |
/* |
1554 |
* If this is the periodic sync, then kick some entries out of |
1555 |
* the reference cache. This ensures that idle entries are |
1556 |
* eventually kicked out of the cache. |
1557 |
*/ |
1558 |
if (flags & SYNC_REFCACHE) { |
1559 |
if (flags & SYNC_WAIT) |
1560 |
xfs_refcache_purge_mp(mp); |
1561 |
else |
1562 |
xfs_refcache_purge_some(mp); |
1563 |
} |
1564 |
|
1565 |
/* |
1566 |
* Now check to see if the log needs a "dummy" transaction. |
1567 |
*/ |
1568 |
|
1569 |
if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { |
1570 |
xfs_trans_t *tp; |
1571 |
xfs_inode_t *ip; |
1572 |
|
1573 |
/* |
1574 |
* Put a dummy transaction in the log to tell |
1575 |
* recovery that all others are OK. |
1576 |
*/ |
1577 |
tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); |
1578 |
if ((error = xfs_trans_reserve(tp, 0, |
1579 |
XFS_ICHANGE_LOG_RES(mp), |
1580 |
0, 0, 0))) { |
1581 |
xfs_trans_cancel(tp, 0); |
1582 |
return error; |
1583 |
} |
1584 |
|
1585 |
ip = mp->m_rootip; |
1586 |
xfs_ilock(ip, XFS_ILOCK_EXCL); |
1587 |
|
1588 |
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1589 |
xfs_trans_ihold(tp, ip); |
1590 |
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1591 |
error = xfs_trans_commit(tp, 0, NULL); |
1592 |
xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1593 |
xfs_log_force(mp, (xfs_lsn_t)0, log_flags); |
1594 |
} |
1595 |
|
1596 |
/* |
1597 |
* When shutting down, we need to insure that the AIL is pushed |
1598 |
* to disk or the filesystem can appear corrupt from the PROM. |
1599 |
*/ |
1600 |
if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { |
1601 |
XFS_bflush(mp->m_ddev_targp); |
1602 |
if (mp->m_rtdev_targp) { |
1603 |
XFS_bflush(mp->m_rtdev_targp); |
1604 |
} |
1605 |
} |
1606 |
|
1607 |
return XFS_ERROR(last_error); |
1608 |
} |
1609 |
|
1610 |
/* |
1611 |
* xfs_vget - called by DMAPI and NFSD to get vnode from file handle |
1612 |
*/ |
1613 |
STATIC int |
1614 |
xfs_vget( |
1615 |
bhv_desc_t *bdp, |
1616 |
xfs_vnode_t **vpp, |
1617 |
fid_t *fidp) |
1618 |
{ |
1619 |
xfs_mount_t *mp = XFS_BHVTOM(bdp); |
1620 |
xfs_fid_t *xfid = (struct xfs_fid *)fidp; |
1621 |
xfs_inode_t *ip; |
1622 |
int error; |
1623 |
xfs_ino_t ino; |
1624 |
unsigned int igen; |
1625 |
|
1626 |
/* |
1627 |
* Invalid. Since handles can be created in user space and passed in |
1628 |
* via gethandle(), this is not cause for a panic. |
1629 |
*/ |
1630 |
if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) |
1631 |
return XFS_ERROR(EINVAL); |
1632 |
|
1633 |
ino = xfid->xfs_fid_ino; |
1634 |
igen = xfid->xfs_fid_gen; |
1635 |
|
1636 |
/* |
1637 |
* NFS can sometimes send requests for ino 0. Fail them gracefully. |
1638 |
*/ |
1639 |
if (ino == 0) |
1640 |
return XFS_ERROR(ESTALE); |
1641 |
|
1642 |
error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); |
1643 |
if (error) { |
1644 |
*vpp = NULL; |
1645 |
return error; |
1646 |
} |
1647 |
|
1648 |
if (ip == NULL) { |
1649 |
*vpp = NULL; |
1650 |
return XFS_ERROR(EIO); |
1651 |
} |
1652 |
|
1653 |
if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { |
1654 |
xfs_iput_new(ip, XFS_ILOCK_SHARED); |
1655 |
*vpp = NULL; |
1656 |
return XFS_ERROR(ENOENT); |
1657 |
} |
1658 |
|
1659 |
*vpp = XFS_ITOV(ip); |
1660 |
xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1661 |
return 0; |
1662 |
} |
1663 |
|
1664 |
|
1665 |
#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ |
1666 |
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ |
1667 |
#define MNTOPT_LOGDEV "logdev" /* log device */ |
1668 |
#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ |
1669 |
#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ |
1670 |
#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ |
1671 |
#define MNTOPT_INO64 "ino64" /* force inodes into 64-bit range */ |
1672 |
#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ |
1673 |
#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ |
1674 |
#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ |
1675 |
#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ |
1676 |
#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ |
1677 |
#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ |
1678 |
#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ |
1679 |
#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ |
1680 |
#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ |
1681 |
#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ |
1682 |
#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ |
1683 |
#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */ |
1684 |
#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ |
1685 |
#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and |
1686 |
* unwritten extent conversion */ |
1687 |
#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ |
1688 |
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ |
1689 |
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ |
1690 |
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ |
1691 |
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ |
1692 |
#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ |
1693 |
#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes |
1694 |
* in stat(). */ |
1695 |
#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ |
1696 |
#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ |
1697 |
#define simple_strtoul strtoul |
1698 |
|
1699 |
STATIC unsigned long |
1700 |
suffix_strtoul(char *cp, char **endp, unsigned int base) |
1701 |
{ |
1702 |
int last, shift_left_factor = 0; |
1703 |
char *value = (char *)cp; |
1704 |
|
1705 |
last = strlen(value) - 1; |
1706 |
if (value[last] == 'K' || value[last] == 'k') { |
1707 |
shift_left_factor = 10; |
1708 |
value[last] = '\0'; |
1709 |
} |
1710 |
if (value[last] == 'M' || value[last] == 'm') { |
1711 |
shift_left_factor = 20; |
1712 |
value[last] = '\0'; |
1713 |
} |
1714 |
if (value[last] == 'G' || value[last] == 'g') { |
1715 |
shift_left_factor = 30; |
1716 |
value[last] = '\0'; |
1717 |
} |
1718 |
|
1719 |
return simple_strtoul(cp, endp, base) << shift_left_factor; |
1720 |
} |
1721 |
|
1722 |
|
1723 |
STATIC int |
1724 |
xfs_parseargs( |
1725 |
struct bhv_desc *bhv, |
1726 |
char *options, |
1727 |
struct xfs_mount_args *args, |
1728 |
int update) |
1729 |
{ |
1730 |
struct xfs_vfs *vfsp = bhvtovfs(bhv); |
1731 |
char *this_char, *value, *eov; |
1732 |
int dsunit, dswidth, vol_dsunit, vol_dswidth; |
1733 |
int iosize; |
1734 |
|
1735 |
args->flags |= XFSMNT_IDELETE; |
1736 |
args->flags |= XFSMNT_BARRIER; |
1737 |
args->flags2 |= XFSMNT2_COMPAT_IOSIZE; |
1738 |
|
1739 |
if (!options) |
1740 |
goto done; |
1741 |
|
1742 |
iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; |
1743 |
|
1744 |
while ((this_char = strsep(&options, ",")) != NULL) { |
1745 |
if (!*this_char) |
1746 |
continue; |
1747 |
|
1748 |
if ((value = index(this_char, '=')) != NULL) |
1749 |
*value++ = 0; |
1750 |
|
1751 |
if (!strcmp(this_char, MNTOPT_LOGBUFS)) { |
1752 |
if (!value || !*value) { |
1753 |
printf("XFS: %s option requires an argument\n", |
1754 |
this_char); |
1755 |
return EINVAL; |
1756 |
} |
1757 |
args->logbufs = simple_strtoul(value, &eov, 10); |
1758 |
} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { |
1759 |
if (!value || !*value) { |
1760 |
printf("XFS: %s option requires an argument\n", |
1761 |
this_char); |
1762 |
return EINVAL; |
1763 |
} |
1764 |
args->logbufsize = suffix_strtoul(value, &eov, 10); |
1765 |
} else if (!strcmp(this_char, MNTOPT_LOGDEV)) { |
1766 |
if (!value || !*value) { |
1767 |
printf("XFS: %s option requires an argument\n", |
1768 |
this_char); |
1769 |
return EINVAL; |
1770 |
} |
1771 |
strncpy(args->logname, value, MAXNAMELEN); |
1772 |
} else if (!strcmp(this_char, MNTOPT_MTPT)) { |
1773 |
if (!value || !*value) { |
1774 |
printf("XFS: %s option requires an argument\n", |
1775 |
this_char); |
1776 |
return EINVAL; |
1777 |
} |
1778 |
strncpy(args->mtpt, value, MAXNAMELEN); |
1779 |
} else if (!strcmp(this_char, MNTOPT_RTDEV)) { |
1780 |
if (!value || !*value) { |
1781 |
printf("XFS: %s option requires an argument\n", |
1782 |
this_char); |
1783 |
return EINVAL; |
1784 |
} |
1785 |
strncpy(args->rtname, value, MAXNAMELEN); |
1786 |
} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { |
1787 |
if (!value || !*value) { |
1788 |
printf("XFS: %s option requires an argument\n", |
1789 |
this_char); |
1790 |
return EINVAL; |
1791 |
} |
1792 |
iosize = simple_strtoul(value, &eov, 10); |
1793 |
args->flags |= XFSMNT_IOSIZE; |
1794 |
args->iosizelog = (uint8_t) iosize; |
1795 |
} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { |
1796 |
if (!value || !*value) { |
1797 |
printk("XFS: %s option requires an argument\n", |
1798 |
this_char); |
1799 |
return EINVAL; |
1800 |
} |
1801 |
iosize = suffix_strtoul(value, &eov, 10); |
1802 |
args->flags |= XFSMNT_IOSIZE; |
1803 |
args->iosizelog = ffs(iosize) - 1; |
1804 |
} else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) { |
1805 |
if (!value || !*value) { |
1806 |
printk("XFS: %s option requires an argument\n", |
1807 |
this_char); |
1808 |
return EINVAL; |
1809 |
} |
1810 |
args->flags |= XFSMNT_IHASHSIZE; |
1811 |
args->ihashsize = simple_strtoul(value, &eov, 10); |
1812 |
} else if (!strcmp(this_char, MNTOPT_GRPID) || |
1813 |
!strcmp(this_char, MNTOPT_BSDGROUPS)) { |
1814 |
vfsp->vfs_flag |= VFS_GRPID; |
1815 |
} else if (!strcmp(this_char, MNTOPT_NOGRPID) || |
1816 |
!strcmp(this_char, MNTOPT_SYSVGROUPS)) { |
1817 |
vfsp->vfs_flag &= ~VFS_GRPID; |
1818 |
} else if (!strcmp(this_char, MNTOPT_WSYNC)) { |
1819 |
args->flags |= XFSMNT_WSYNC; |
1820 |
} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { |
1821 |
args->flags |= XFSMNT_OSYNCISOSYNC; |
1822 |
} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { |
1823 |
args->flags |= XFSMNT_NORECOVERY; |
1824 |
} else if (!strcmp(this_char, MNTOPT_INO64)) { |
1825 |
args->flags |= XFSMNT_INO64; |
1826 |
#if !XFS_BIG_INUMS |
1827 |
|
1828 |
printf("XFS: %s option not allowed on this system\n", |
1829 |
this_char); |
1830 |
return EINVAL; |
1831 |
#endif |
1832 |
} else if (!strcmp(this_char, MNTOPT_NOALIGN)) { |
1833 |
args->flags |= XFSMNT_NOALIGN; |
1834 |
} else if (!strcmp(this_char, MNTOPT_SWALLOC)) { |
1835 |
args->flags |= XFSMNT_SWALLOC; |
1836 |
} else if (!strcmp(this_char, MNTOPT_SUNIT)) { |
1837 |
if (!value || !*value) { |
1838 |
printf("XFS: %s option requires an argument\n", |
1839 |
this_char); |
1840 |
return EINVAL; |
1841 |
} |
1842 |
dsunit = simple_strtoul(value, &eov, 10); |
1843 |
} else if (!strcmp(this_char, MNTOPT_SWIDTH)) { |
1844 |
if (!value || !*value) { |
1845 |
printf("XFS: %s option requires an argument\n", |
1846 |
this_char); |
1847 |
return EINVAL; |
1848 |
} |
1849 |
dswidth = simple_strtoul(value, &eov, 10); |
1850 |
} else if (!strcmp(this_char, MNTOPT_64BITINODE)) { |
1851 |
args->flags &= ~XFSMNT_32BITINODES; |
1852 |
#if !XFS_BIG_INUMS |
1853 |
|
1854 |
printf("XFS: %s option not allowed on this system\n", |
1855 |
this_char); |
1856 |
return EINVAL; |
1857 |
#endif |
1858 |
} else if (!strcmp(this_char, MNTOPT_NOUUID)) { |
1859 |
args->flags |= XFSMNT_NOUUID; |
1860 |
} else if (!strcmp(this_char, MNTOPT_BARRIER)) { |
1861 |
args->flags |= XFSMNT_BARRIER; |
1862 |
} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { |
1863 |
args->flags &= ~XFSMNT_BARRIER; |
1864 |
} else if (!strcmp(this_char, MNTOPT_IKEEP)) { |
1865 |
args->flags &= ~XFSMNT_IDELETE; |
1866 |
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { |
1867 |
args->flags |= XFSMNT_IDELETE; |
1868 |
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) { |
1869 |
args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; |
1870 |
} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { |
1871 |
args->flags2 |= XFSMNT2_COMPAT_IOSIZE; |
1872 |
} else if (!strcmp(this_char, MNTOPT_ATTR2)) { |
1873 |
args->flags |= XFSMNT_ATTR2; |
1874 |
} else if (!strcmp(this_char, MNTOPT_NOATTR2)) { |
1875 |
args->flags &= ~XFSMNT_ATTR2; |
1876 |
} else if (!strcmp(this_char, "osyncisdsync")) { |
1877 |
/* no-op, this is now the default */ |
1878 |
printf("XFS: osyncisdsync is now the default, option is deprecated.\n"); |
1879 |
} else if (!strcmp(this_char, "irixsgid")) { |
1880 |
printf("XFS: irixsgid is now a sysctl(2) variable, option is deprecated.\n"); |
1881 |
} else { |
1882 |
printf("XFS: unknown mount option [%s].\n", this_char); |
1883 |
return EINVAL; |
1884 |
} |
1885 |
} |
1886 |
|
1887 |
if (args->flags & XFSMNT_NORECOVERY) { |
1888 |
if ((vfsp->vfs_flag & VFS_RDONLY) == 0) { |
1889 |
printf("XFS: no-recovery mounts must be read-only.\n"); |
1890 |
return EINVAL; |
1891 |
} |
1892 |
} |
1893 |
|
1894 |
if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { |
1895 |
printf( |
1896 |
"XFS: sunit and swidth options incompatible with the noalign option\n"); |
1897 |
return EINVAL; |
1898 |
} |
1899 |
|
1900 |
if ((dsunit && !dswidth) || (!dsunit && dswidth)) { |
1901 |
printf("XFS: sunit and swidth must be specified together\n"); |
1902 |
return EINVAL; |
1903 |
} |
1904 |
|
1905 |
if (dsunit && (dswidth % dsunit != 0)) { |
1906 |
printf( |
1907 |
"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)\n", |
1908 |
dswidth, dsunit); |
1909 |
return EINVAL; |
1910 |
} |
1911 |
|
1912 |
if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { |
1913 |
if (dsunit) { |
1914 |
args->sunit = dsunit; |
1915 |
args->flags |= XFSMNT_RETERR; |
1916 |
} else { |
1917 |
args->sunit = vol_dsunit; |
1918 |
} |
1919 |
dswidth ? (args->swidth = dswidth) : |
1920 |
(args->swidth = vol_dswidth); |
1921 |
} else { |
1922 |
args->sunit = args->swidth = 0; |
1923 |
} |
1924 |
|
1925 |
done: |
1926 |
if (args->flags & XFSMNT_32BITINODES) |
1927 |
vfsp->vfs_flag |= VFS_32BITINODES; |
1928 |
if (args->flags2) |
1929 |
args->flags |= XFSMNT_FLAGS2; |
1930 |
return 0; |
1931 |
} |
1932 |
|
1933 |
#define seq_printf sbuf_printf |
1934 |
STATIC int |
1935 |
xfs_showargs( |
1936 |
struct bhv_desc *bhv, |
1937 |
struct sbuf *m) |
1938 |
{ |
1939 |
static struct proc_xfs_info { |
1940 |
int flag; |
1941 |
char *str; |
1942 |
} xfs_info[] = { |
1943 |
/* the few simple ones we can get from the mount struct */ |
1944 |
{ XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, |
1945 |
{ XFS_MOUNT_INO64, "," MNTOPT_INO64 }, |
1946 |
{ XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, |
1947 |
{ XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, |
1948 |
{ XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, |
1949 |
{ XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, |
1950 |
{ XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC }, |
1951 |
{ 0, NULL } |
1952 |
}; |
1953 |
struct proc_xfs_info *xfs_infop; |
1954 |
struct xfs_mount *mp = XFS_BHVTOM(bhv); |
1955 |
struct xfs_vfs *vfsp = XFS_MTOVFS(mp); |
1956 |
|
1957 |
for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) { |
1958 |
if (mp->m_flags & xfs_infop->flag) |
1959 |
sbuf_printf(m, "%s", xfs_infop->str); |
1960 |
} |
1961 |
|
1962 |
if (mp->m_flags & XFS_MOUNT_IHASHSIZE) |
1963 |
seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize); |
1964 |
|
1965 |
if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) |
1966 |
seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", |
1967 |
(int)(1 << mp->m_writeio_log) >> 10); |
1968 |
|
1969 |
if (mp->m_logbufs > 0) |
1970 |
sbuf_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); |
1971 |
if (mp->m_logbsize > 0) |
1972 |
seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); |
1973 |
|
1974 |
if (mp->m_logname) |
1975 |
seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); |
1976 |
if (mp->m_rtname) |
1977 |
seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); |
1978 |
|
1979 |
if (mp->m_dalign > 0) |
1980 |
sbuf_printf(m, "," MNTOPT_SUNIT "=%d", |
1981 |
(int)XFS_FSB_TO_BB(mp, mp->m_dalign)); |
1982 |
if (mp->m_swidth > 0) |
1983 |
sbuf_printf(m, "," MNTOPT_SWIDTH "=%d", |
1984 |
(int)XFS_FSB_TO_BB(mp, mp->m_swidth)); |
1985 |
|
1986 |
if (!(mp->m_flags & XFS_MOUNT_IDELETE)) |
1987 |
seq_printf(m, "," MNTOPT_IKEEP); |
1988 |
if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) |
1989 |
seq_printf(m, "," MNTOPT_LARGEIO); |
1990 |
|
1991 |
if (!(vfsp->vfs_flag & VFS_32BITINODES)) |
1992 |
sbuf_printf(m, "," MNTOPT_64BITINODE); |
1993 |
if (vfsp->vfs_flag & VFS_GRPID) |
1994 |
seq_printf(m, "," MNTOPT_GRPID); |
1995 |
|
1996 |
return 0; |
1997 |
} |
1998 |
|
1999 |
STATIC void |
2000 |
xfs_freeze( |
2001 |
bhv_desc_t *bdp) |
2002 |
{ |
2003 |
xfs_mount_t *mp = XFS_BHVTOM(bdp); |
2004 |
|
2005 |
while (atomic_read(&mp->m_active_trans) > 0) |
2006 |
delay(100); |
2007 |
|
2008 |
/* Push the superblock and write an unmount record */ |
2009 |
xfs_log_unmount_write(mp); |
2010 |
xfs_unmountfs_writesb(mp); |
2011 |
xfs_fs_log_dummy(mp); |
2012 |
} |
2013 |
|
2014 |
|
2015 |
xvfsops_t xfs_vfsops = { |
2016 |
BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS), |
2017 |
.xvfs_parseargs = xfs_parseargs, |
2018 |
.xvfs_showargs = xfs_showargs, |
2019 |
.xvfs_mount = xfs_mount, |
2020 |
.xvfs_unmount = xfs_unmount, |
2021 |
.xvfs_mntupdate = xfs_mntupdate, |
2022 |
.xvfs_root = xfs_root, |
2023 |
.xvfs_statvfs = xfs_statvfs, |
2024 |
.xvfs_sync = xfs_sync, |
2025 |
.xvfs_vget = xfs_vget, |
2026 |
.xvfs_dmapiops = (xvfs_dmapiops_t)fs_nosys, |
2027 |
.xvfs_quotactl = (xvfs_quotactl_t)fs_nosys, |
2028 |
.xvfs_get_inode = (xvfs_get_inode_t)fs_nosys, |
2029 |
.xvfs_init_vnode = xfs_initialize_vnode, |
2030 |
.xvfs_force_shutdown = xfs_do_force_shutdown, |
2031 |
.xvfs_freeze = xfs_freeze, |
2032 |
}; |