1#------------------------------------------------------------------------------
2# $File: compress,v 1.91 2023/06/16 19:37:47 christos Exp $
3# compress:  file(1) magic for pure-compression formats (no archives)
4#
5# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
6#
7# Formats for various forms of compressed data
8# Formats for "compress" proper have been moved into "compress.c",
9# because it tries to uncompress it to figure out what's inside.
10
11# standard unix compress
120         string              \037\235  compress'd data
13!:mime    application/x-compress
14!:apple   LZIVZIVU
15!:ext     Z
16>2        byte&0x80 >0                  block compressed
17>2        byte&0x1f x                   %d bits
18
19# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
20# URL: https://en.wikipedia.org/wiki/Gzip
21# Reference: https://tools.ietf.org/html/rfc1952
22# Update: Joerg Jenderek, Apr 2019, Dec 2022
23#   Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
24#         * Original filename is only at offset 10 if "extra field" absent
25#         * Produce shorter output - notably, only report compression methods
26#         other than 8 ("deflate", the only method defined in RFC 1952).
27# Note: find defs -iname '*.trid.xml' -exec grep -q '<Bytes>1F8B08' {} \; -ls
28# TODO:
29# FBR     Blueberry FlashBack screen Record       https://www.flashbackrecorder.com/
30# KPR     KOffice/Calligra KPresenter             application/x-kpresenter
31# KPT     KOffice/Calligra KPresenter template?   application/x-kpresenter
32# SAV     Diggles Saved Game File                           http://www.innonics.com
33# SAV     FarCry (demo) saved game                http://www.farcry-thegame.com
34# DAT     ZOAGZIP game data format                http://en.wikipedia.org/wiki/SD_Gundam_Capsule_Fighter
350       string          \037\213
36# to display gzip compressed (strength=100=2*50) before other (strength=50)?
37#!:strength * 2
38# no FNAME and FCOMMENT bit implies no file name/comment. That means only binary
39>3        byte&0x18 =0
40# For binary gzipped no ASCII text should occur
41#         mcd-monu-cad.trid.xml
42>>10      string              MCD                           Monu-Cad Drawing, Component or Font
43#>>36     string              Created\ with\ MONU-CAD
44#!:mime   application/octet-stream
45# http://fileformats.archiveteam.org/wiki/Monu-CAD
46#         http://www.monucad.com/downloads/FullDemo-2005.EXE
47#         /HANDS96.MCC        Component
48#         /DEMO_DD01.MCD      Drawing
49#         /MCALF020.FNT       Font
50!:ext     mcc/mcd/fnt
51# http://www.generalcadd.com
52>>10      string              GXD                           General CADD, Drawing or Component
53#!:mime   application/octet-stream
54#         /gxc/BUILDINGEDGE.gxc                             Component
55#         /gxd/HOCKETT-STPAUL-WRHSE.gxd           Drawing
56#         /gxd/POWERLAND-MILL-ADD-11.gxd                    Drawing             v9.1.06
57!:ext     gxc/gxd
58#>>>13    ubyte               0                             \b, version 0
59>>>13     string              09                            \b, version 9
60# other gzipped binary like gzipped tar, VirtualBox extension package,...
61>>10      default             x                   gzip compressed data
62!:mime    application/gzip
63>>>0      use       gzip-info
64# size of the original (uncompressed) input data modulo 2^32
65# TODO: check for GXD MCD cad the reported size
66>>>-4     ulelong             x                   \b, original size modulo 2^32 %u
67# gzipped TAR or VirtualBox extension package
68#!:mime   application/x-compressed-tar
69#!:mime   application/x-virtualbox-vbox-extpack
70# https://www.w3.org/TR/SVG/mimereg.html
71#!:mime   image/svg+xml-compressed
72#         zlib.3.gz
73#         microcode-20180312.tgz
74#         tpz same as tgz
75#         lua-md5_1.2-1_i386_i486.ipk   https://en.wikipedia.org/wiki/Opkg
76#         Oracle_VM_VirtualBox_Extension_Pack-5.0.12-104815.vbox-extpack
77#         trees.blend                             http://fileformats.archiveteam.org/wiki/BLEND
78#         2020-07-19-Note-16-24.xoj     https://xournal.sourceforge.net/manual.html
79#         MYgnucash-gz.gnucash                    https://wiki.gnucash.org/wiki/GnuCash_XML_format
80#         text-rotate.dia                         https://en.wikipedia.org/wiki/Dia_(software)
81#         MYrdata.RData                           https://en.wikipedia.org/wiki/R_(programming_language)
82!:ext     gz/tgz/tpz/ipk/vbox-extpack/svgz/blend/dia/gnucash/rdata/xoj
83# FNAME/FCOMMENT bit implies file name/comment as iso-8859-1 text
84>3        byte&0x18 >0                  gzip compressed data
85!:mime    application/gzip
86# gzipped tar, gzipped Abiword document
87#!:mime   application/x-compressed-tar
88#!:mime   application/x-abiword-compressed
89#!:mime   image/image/svg+xml-compressed
90#         kleopatra_splashscreen.svgz   gzipped .svg
91#         RSI-Mega-Demo_Disk1.adz                 gzipped .adf        http://fileformats.archiveteam.org/wiki/ADF_(Amiga)
92#         PostbankTest.kmy              gzipped XML         https://docs.kde.org/stable5/en/kmymoney/kmymoney/details.formats.compressed.html
93#         Logo.xcfgz                              gzipped .xcf        http://fileformats.archiveteam.org/wiki/XCF
94!:ext     gz/tgz/tpz/zabw/svgz/adz/kmy/xcfgz
95>>0       use       gzip-info
96# size of the original (uncompressed) input data modulo 2^32
97>>-4      ulelong             x                   \b, original size modulo 2^32 %u
98#         display information of gzip compressed files
990         name                                    gzip-info
100#>2       byte                x                   THIS iS GZIP
101>2        byte                <8                  \b, reserved method
102>2        byte                >8                  \b, unknown method
103>3        byte                &0x01               \b, ASCII
104>3        byte                &0x02               \b, has CRC
105>3        byte                &0x04               \b, extra field
106>3        byte&0xC  =0x08
107>>10      string              x                   \b, was "%s"
108>3        byte                &0x10               \b, has comment
109>3        byte                &0x20               \b, encrypted
110>4        ledate              >0                  \b, last modified: %s
111>8        byte                2                   \b, max compression
112>8        byte                4                   \b, max speed
113>9        byte                =0x00               \b, from FAT filesystem (MS-DOS, OS/2, NT)
114>9        byte                =0x01               \b, from Amiga
115>9        byte                =0x02               \b, from VMS
116>9        byte                =0x03               \b, from Unix
117>9        byte                =0x04               \b, from VM/CMS
118>9        byte                =0x05               \b, from Atari
119>9        byte                =0x06               \b, from HPFS filesystem (OS/2, NT)
120>9        byte                =0x07               \b, from MacOS
121>9        byte                =0x08               \b, from Z-System
122>9        byte                =0x09               \b, from CP/M
123>9        byte                =0x0A               \b, from TOPS/20
124>9        byte                =0x0B               \b, from NTFS filesystem (NT)
125>9        byte                =0x0C               \b, from QDOS
126>9        byte                =0x0D               \b, from Acorn RISCOS
127# size of the original (uncompressed) input data modulo 2^32
128#>-4      ulelong             x                   \b, original size modulo 2^32 %u
129#ERROR: line 114: non zero offset 1048572 at level 1
130
131# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
1320         string              \037\036  packed data
133!:mime    application/octet-stream
134!:ext     z
135>2        belong              >1                  \b, %d characters originally
136>2        belong              =1                  \b, %d character originally
137#
138# This magic number is byte-order-independent.
1390         short               0x1f1f              old packed data
140!:mime    application/octet-stream
141
142# XXX - why *two* entries for "compacted data", one of which is
143# byte-order independent, and one of which is byte-order dependent?
144#
1450         short               0x1fff              compacted data
146!:mime    application/octet-stream
147# This string is valid for SunOS (BE) and a matching "short" is listed
148# in the Ultrix (LE) magic file.
1490         string              \377\037  compacted data
150!:mime    application/octet-stream
1510         short               0145405             huf output
152!:mime    application/octet-stream
153
154# bzip2
1550         string              BZh                 bzip2 compressed data
156!:mime    application/x-bzip2
157!:ext     bz2
158>3        byte                >47                 \b, block size = %c00k
159
160# bzip    a block-sorting file compressor
161#         by Julian Seward <sewardj@cs.man.ac.uk> and others
1620         string              BZ0                 bzip compressed data
163!:mime    application/x-bzip
164>3        byte                >47                 \b, block size = %c00k
165
166# lzip
1670         string              LZIP                lzip compressed data
168!:mime application/x-lzip
169!:ext lz
170>4        byte                x                   \b, version: %d
171
172# squeeze and crunch
173# Michael Haardt <michael@cantor.informatik.rwth-aachen.de>
1740         beshort             0x76FF              squeezed data,
175>4        string              x                   original name %s
1760         beshort             0x76FE              crunched data,
177>2        string              x                   original name %s
1780         beshort             0x76FD              LZH compressed data,
179>2        string              x                   original name %s
180
181# Freeze
1820         string              \037\237  frozen file 2.1
1830         string              \037\236  frozen file 1.0 (or gzip 0.5)
184
185# SCO compress -H (LZH)
1860         string              \037\240  SCO compress -H (LZH) data
187
188# European GSM 06.10 is a provisional standard for full-rate speech
189# transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
190# excitation/long term prediction) coding at 13 kbit/s.
191#
192# There's only a magic nibble (4 bits); that nibble repeats every 33
193# bytes.  This isn't suited for use, but maybe we can use it someday.
194#
195# This will cause very short GSM files to be declared as data and
196# mismatches to be declared as data too!
197#0        byte&0xF0 0xd0                data
198#>33      byte&0xF0 0xd0
199#>66      byte&0xF0 0xd0
200#>99      byte&0xF0 0xd0
201#>132     byte&0xF0 0xd0                GSM 06.10 compressed audio
202
203# lzop from <markus.oberhumer@jk.uni-linz.ac.at>
2040         string              \x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a    lzop compressed data
205!:ext     lzo
206>9        beshort             <0x0940
207>>9       byte&0xf0 =0x00               - version 0.
208>>9       beshort&0x0fff      x                   \b%03x,
209>>13      byte                1                   LZO1X-1,
210>>13      byte                2                   LZO1X-1(15),
211>>13      byte                3                   LZO1X-999,
212## >>22   bedate              >0                  last modified: %s,
213>>14      byte                =0x00               os: MS-DOS
214>>14      byte                =0x01               os: Amiga
215>>14      byte                =0x02               os: VMS
216>>14      byte                =0x03               os: Unix
217>>14      byte                =0x05               os: Atari
218>>14      byte                =0x06               os: OS/2
219>>14      byte                =0x07               os: MacOS
220>>14      byte                =0x0A               os: Tops/20
221>>14      byte                =0x0B               os: WinNT
222>>14      byte                =0x0E               os: Win32
223>9        beshort             >0x0939
224>>9       byte&0xf0 =0x00               - version 0.
225>>9       byte&0xf0 =0x10               - version 1.
226>>9       byte&0xf0 =0x20               - version 2.
227>>9       beshort&0x0fff      x                   \b%03x,
228>>15      byte                1                   LZO1X-1,
229>>15      byte                2                   LZO1X-1(15),
230>>15      byte                3                   LZO1X-999,
231## >>25   bedate              >0                  last modified: %s,
232>>17      byte                =0x00               os: MS-DOS
233>>17      byte                =0x01               os: Amiga
234>>17      byte                =0x02               os: VMS
235>>17      byte                =0x03               os: Unix
236>>17      byte                =0x05               os: Atari
237>>17      byte                =0x06               os: OS/2
238>>17      byte                =0x07               os: MacOS
239>>17      byte                =0x0A               os: Tops/20
240>>17      byte                =0x0B               os: WinNT
241>>17      byte                =0x0E               os: Win32
242
243# 4.3BSD-Quasijarus Strong Compression
244# https://minnie.tuhs.org/Quasijarus/compress.html
2450         string              \037\241  Quasijarus strong compressed data
246
247# From: Cory Dikkers <cdikkers@swbell.net>
2480         string              XPKF                Amiga xpkf.library compressed data
2490         string              PP11                Power Packer 1.1 compressed data
2500         string              PP20                Power Packer 2.0 compressed data,
251>4        belong              0x09090909          fast compression
252>4        belong              0x090A0A0A          mediocre compression
253>4        belong              0x090A0B0B          good compression
254>4        belong              0x090A0C0C          very good compression
255>4        belong              0x090A0C0D          best compression
256
257# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
258# https://www.7-zip.org or DOC/7zFormat.txt
259#
2600         string              7z\274\257\047\034  7-zip archive data,
261>6        byte                x                             version %d
262>7        byte                x                             \b.%d
263!:mime    application/x-7z-compressed
264!:ext 7z/cb7
265
2660         name                lzma                          LZMA compressed data,
267!:mime    application/x-lzma
268!:ext     lzma
269>5        lequad              =0xffffffffffffffff streamed
270>5        lequad              !0xffffffffffffffff non-streamed, size %lld
271
272# Type: LZMA
2730         lelong&0xffffff     =0x5d
274>12       leshort             0xff
275>>0       use                 lzma
276>12       leshort             0
277>>0       use                 lzma
278
279# http://tukaani.org/xz/xz-file-format.txt
2800         ustring             \xFD7zXZ\x00                  XZ compressed data, checksum
281!:strength * 2
282!:mime    application/x-xz
283!:ext     xz
284>7        byte&0xf  0x0                           NONE
285>7        byte&0xf  0x1                           CRC32
286>7        byte&0xf  0x4                           CRC64
287>7        byte&0xf  0xa                           SHA-256
288
289# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt
2900         string              LRZI                          LRZIP compressed data
291!:mime  application/x-lrzip
292>4        byte                x                             - version %d
293>5        byte                x                             \b.%d
294>22       byte                1                             \b, encrypted
295
296# https://fastcompression.blogspot.fi/2013/04/lz4-streaming-format-final.html
2970         lelong              0x184d2204          LZ4 compressed data (v1.4+)
298!:mime    application/x-lz4
299!:ext     lz4
300# Added by osm0sis@xda-developers.com
3010         lelong              0x184c2103          LZ4 compressed data (v1.0-v1.3)
302!:mime    application/x-lz4
3030         lelong              0x184c2102          LZ4 compressed data (v0.1-v0.9)
304!:mime    application/x-lz4
305
306# Zstandard/LZ4 skippable frames
307# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3080         lelong&0xFFFFFFF0  0x184D2A50
309>(4.l+8)  indirect  x
310
311# Zstandard Dictionary ID subroutine
3120     name        zstd-dictionary-id
313# Single Segment = True
314>0    byte        &0x20   \b, Dictionary ID:
315>>0   byte&0x03   0       None
316>>0   byte&0x03   1
317>>>1  byte        x       %u
318>>0   byte&0x03   2
319>>>1  leshort     x       %u
320>>0   byte&0x03   3
321>>>1  lelong      x       %u
322# Single Segment = False
323>0    byte        ^0x20   \b, Dictionary ID:
324>>0   byte&0x03   0       None
325>>0   byte&0x03   1
326>>>2  byte        x       %u
327>>0   byte&0x03   2
328>>>2  leshort     x       %u
329>>0   byte&0x03   3
330>>>2  lelong      x       %u
331
332# Zstandard compressed data
333# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3340     lelong       0xFD2FB522  Zstandard compressed data (v0.2)
335!:mime  application/zstd
336!:ext zst
3370     lelong       0xFD2FB523  Zstandard compressed data (v0.3)
338!:mime  application/zstd
339!:ext zst
3400     lelong       0xFD2FB524  Zstandard compressed data (v0.4)
341!:mime  application/zstd
342!:ext zst
3430     lelong       0xFD2FB525  Zstandard compressed data (v0.5)
344!:mime  application/zstd
345!:ext zst
3460     lelong       0xFD2FB526  Zstandard compressed data (v0.6)
347!:mime  application/zstd
348!:ext zst
3490     lelong       0xFD2FB527  Zstandard compressed data (v0.7)
350!:mime  application/zstd
351!:ext zst
352>4    use          zstd-dictionary-id
3530     lelong       0xFD2FB528  Zstandard compressed data (v0.8+)
354!:mime  application/zstd
355!:ext zst
356>4    use          zstd-dictionary-id
357
358# https://github.com/facebook/zstd/blob/dev/zstd_compression_format.md
3590  lelong    0xEC30A437  Zstandard dictionary
360!:mime  application/x-std-dictionary
361>4 lelong    x           (ID %u)
362
363# AFX compressed files (Wolfram Kleff)
3642         string              -afx-               AFX compressed file data
365
366# Supplementary magic data for the file(1) command to support
367# rzip(1).  The format is described in magic(5).
368#
369# Copyright (C) 2003 by Andrew Tridgell.  You may do whatever you want with
370# this file.
371#
3720         string              RZIP                rzip compressed data
373>4        byte                x                   - version %d
374>5        byte                x                   \b.%d
375>6        belong              x                   (%d bytes)
376
3770         string              ArC\x01             FreeArc archive <http://freearc.org>
378
379# Type:   DACT compressed files
3800         long      0x444354C3          DACT compressed data
381>4        byte      >-1                 (version %i.
382>5        byte      >-1                 %i.
383>6        byte      >-1                 %i)
384>7        long      >0                  , original size: %i bytes
385>15       long      >30                 , block size: %i bytes
386
387# Valve Pack (VPK) files
3880         lelong    0x55aa1234          Valve Pak file
389>0x4      lelong    x                   \b, version %u
390>0x8      lelong    x                   \b, %u entries
391
392# Snappy framing format
393# https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
3940         string    \377\006\0\0sNaPpY  snappy framed data
395!:mime    application/x-snappy-framed
396
397# qpress, https://www.quicklz.com/
3980         string    qpress10  qpress compressed data
399!:mime    application/x-qpress
400
401# Zlib https://www.ietf.org/rfc/rfc6713.txt
4020         string/b  x
403>0        beshort%31          =0
404>>0       byte&0xf  =8
405>>>0      byte&0x80           =0        zlib compressed data
406!:mime    application/zlib
407
408# BWC compression
4090         string              BWC
410>3        byte                0         BWC compressed data
411
412# UCL compression
4130         bequad              0x00e955434cff011a  UCL compressed data
414
415# Softlib archive
4160         string              SLIB      Softlib archive
417>4        leshort             x         \b, version %d
418>6        leshort             x         (contains %d files)
419
420# URL:  https://github.com/lzfse/lzfse/blob/master/src/lzfse_internal.h#L276
421# From: Eric Hall <eric.hall@darkart.com>
4220         string    bvx-      lzfse encoded, no compression
4230         string    bvx1      lzfse compressed, uncompressed tables
4240         string    bvx2      lzfse compressed, compressed tables
4250         string    bvxn      lzfse encoded, lzvn compressed
426
427# pcxLib.exe compression program
428# http://www.shikadi.net/moddingwiki/PCX_Library
4290         string/b  pcxLib
430>0x0A     string/b  Copyright\020(c)\020Genus\020Microprogramming,\020Inc.      pcxLib compressed
431
432# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/SW/ORA/ORAFormatSpecification.htm
4330         uleshort  0x7c49
434>2        lelong              0x80      ORA FASTQ compressed file
435>>6       ulelong             x         \b, DNA size %u
436>>10      ulelong             x         \b, read names size %u
437>>14      ulelong             x         \b, quality buffer 1 size %u
438>>18      ulelong             x         \b, quality buffer 2 size %u
439>>22      ulelong             x         \b, sequence buffer size %u
440>>26      ulelong             x         \b, N-position buffer size %u
441>>30      ulelong             x         \b, crypto buffer size %u
442>>34      ulelong             x         \b, misc  buffer 1 size %u
443>>38      ulelong             x         \b, misc  buffer 2 size %u
444>>42      ulelong             x         \b, flags %#x
445>>46      lelong              x         \b, read size %d
446>>50      lelong              x         \b, number of reads %d
447>>54      leshort             x         \b, version %d
448
449# https://github.com/kspalaiologos/bzip3/blob/master/doc/file_format.md
4500         string/b  BZ3v1     bzip3 compressed data
451>5        ulelong             x         \b, blocksize %u
452
453
454# https://support-docs.illumina.com/SW/ORA_Format_Specification/Content/\
455# SW/ORA/ORAFormatSpecification.htm
456# From Guillaume Rizk
4570         short     =0x7C49 DRAGEN ORA file,
458>-261     short     =0x7C49 with metadata:
459>-125     u8        x         NB reads: %llu,
460>-109     u8        x         NB bases: %llu.
461>-219     u4&0x02   2         File contains interleaved paired reads
462