xrootd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
XrdZipCDFH.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // Copyright (c) 2011-2014 by European Organization for Nuclear Research (CERN)
3 // Author: Michal Simon <michal.simon@cern.ch>
4 //------------------------------------------------------------------------------
5 // This file is part of the XRootD software suite.
6 //
7 // XRootD is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // XRootD is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
19 //
20 // In applying this licence, CERN does not waive the privileges and immunities
21 // granted to it by virtue of its status as an Intergovernmental Organization
22 // or submit itself to any jurisdiction.
23 //------------------------------------------------------------------------------
24 
25 #ifndef SRC_XRDZIP_XRDZIPCDFH_HH_
26 #define SRC_XRDZIP_XRDZIPCDFH_HH_
27 
28 #include "XrdZip/XrdZipLFH.hh"
29 #include "XrdZip/XrdZipUtils.hh"
31 
32 #include <string>
33 #include <algorithm>
34 #include <iterator>
35 #include <unordered_map>
36 #include <memory>
37 #include <tuple>
38 
39 namespace XrdZip
40 {
41  //---------------------------------------------------------------------------
42  // Forward declaration for CDFH
43  //---------------------------------------------------------------------------
44  struct CDFH;
45 
46  //---------------------------------------------------------------------------
47  // Vector of Central Directory records
48  //---------------------------------------------------------------------------
49  typedef std::vector<std::unique_ptr<CDFH>> cdvec_t;
50 
51  //---------------------------------------------------------------------------
52  // Map file name to index of CD record
53  //---------------------------------------------------------------------------
54  typedef std::unordered_map<std::string, size_t> cdmap_t;
55 
56  //---------------------------------------------------------------------------
57  // Map of Central Directory records
58  //---------------------------------------------------------------------------
59  typedef std::unordered_map<std::string, std::unique_ptr<CDFH>> cdrecs_t;
60 
61  //---------------------------------------------------------------------------
62  // A data structure representing the Central Directory File header record
63  //---------------------------------------------------------------------------
64  struct CDFH
65  {
66  //-------------------------------------------------------------------------
67  // Parse central directory
68  // @param buffer : buffer containing the CD records
69  // @param bufferSize : size of the buffer
70  // @param nbCdRecords : nb of CD records
71  // @return : vector of CD records / file name to index mapping
72  //-------------------------------------------------------------------------
73  inline static std::tuple<cdvec_t, cdmap_t> Parse( const char *buffer, uint32_t bufferSize, uint16_t nbCdRecords )
74  {
75  uint32_t offset = 0;
76  cdvec_t cdvec;
77  cdmap_t cdmap;
78  cdvec.reserve( nbCdRecords );
79 
80  for( size_t i = 0; i < nbCdRecords; ++i )
81  {
82  if( bufferSize < cdfhBaseSize ) break;
83  // check the signature
84  uint32_t signature = to<uint32_t>( buffer + offset );
85  if( signature != cdfhSign ) throw bad_data();
86  // parse the record
87  std::unique_ptr<CDFH> cdfh( new CDFH( buffer + offset ) );
88  offset += cdfh->cdfhSize;
89  bufferSize -= cdfh->cdfhSize;
90  cdmap[cdfh->filename] = i;
91  cdvec.push_back( std::move( cdfh ) );
92  }
93 
94  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
95  }
96 
97  //-------------------------------------------------------------------------
98  // Parse central directory
99  // @param buffer : buffer containing the CD records
100  // @param bufferSize : size of the buffer
101  // @return : vector of CD records / file name to index mapping
102  //-------------------------------------------------------------------------
103  inline static std::tuple<cdvec_t, cdmap_t> Parse( const char *&buffer, uint32_t bufferSize )
104  {
105  cdvec_t cdvec;
106  cdmap_t cdmap;
107  size_t i = 0;
108  while( bufferSize > 0 )
109  {
110  if( bufferSize < sizeof( uint32_t ) ) throw bad_data();
111  // check the signature
112  uint32_t signature = to<uint32_t>( buffer );
113  if( signature != cdfhSign )
114  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
115  // parse the record
116  std::unique_ptr<CDFH> cdfh( new CDFH( buffer ) );
117  if( bufferSize < cdfh->cdfhSize ) throw bad_data();
118  buffer += cdfh->cdfhSize;
119  bufferSize -= cdfh->cdfhSize;
120  cdmap[cdfh->filename] = i++;
121  cdvec.push_back( std::move( cdfh ) );
122  }
123 
124  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
125  }
126 
127  //---------------------------------------------------------------------------
128  // Calculate size of the Central Directory
129  //---------------------------------------------------------------------------
130  inline static size_t CalcSize( const cdvec_t &cdvec, uint32_t orgcdsz, uint32_t orgcdcnt )
131  {
132  size_t size = 0;
133  auto itr = cdvec.begin() + orgcdcnt;
134  for( ; itr != cdvec.end() ; ++itr )
135  {
136  CDFH *cdfh = itr->get();
137  size += cdfh->cdfhSize;
138  }
139  return size + orgcdsz;
140  }
141 
142  inline static void Serialize( uint32_t orgcdcnt,
143  const buffer_t &orgcdbuf,
144  const cdvec_t &cdvec,
145  buffer_t &buffer )
146  {
147  std::copy( orgcdbuf.begin(), orgcdbuf.end(), std::back_inserter( buffer ) );
148  auto itr = cdvec.begin() + orgcdcnt;
149  for( ; itr != cdvec.end() ; ++itr )
150  {
151  CDFH *cdfh = itr->get();
152  cdfh->Serialize( buffer );
153  }
154  }
155 
156  //-------------------------------------------------------------------------
157  // Constructor from Local File Header
158  //-------------------------------------------------------------------------
159  CDFH( LFH *lfh, mode_t mode, uint64_t lfhOffset ):
160  zipVersion( ( 3 << 8 ) | 63 ),
163  timestmp( lfh->timestmp ),
164  ZCRC32( lfh->ZCRC32 ),
168  commentLength( 0 ),
169  nbDisk( 0 ),
170  internAttr( 0 ),
171  externAttr( mode << 16 ),
172  filename( lfh->filename ),
173  extra( new Extra( lfh->extra.get(), lfhOffset ) )
174  {
175  if ( lfhOffset >= ovrflw<uint32_t>::value )
177  else
178  offset = lfhOffset;
179 
180  extraLength = extra->totalSize;
181 
182  if ( extraLength == 0 )
183  minZipVersion = 10;
184  else
185  minZipVersion = 45;
186 
188  }
189 
190  //-------------------------------------------------------------------------
191  // Constructor from buffer
192  //-------------------------------------------------------------------------
193  CDFH( const char *buffer )
194  {
195  zipVersion = *reinterpret_cast<const uint16_t*>( buffer + 4 );
196  minZipVersion = *reinterpret_cast<const uint16_t*>( buffer + 6 );
197  generalBitFlag = *reinterpret_cast<const uint16_t*>( buffer + 8 );
198  compressionMethod = *reinterpret_cast<const uint16_t*>( buffer + 10 );
199  timestmp.time = *reinterpret_cast<const uint16_t*>( buffer + 12 );
200  timestmp.date = *reinterpret_cast<const uint16_t*>( buffer + 14 );
201  ZCRC32 = *reinterpret_cast<const uint32_t*>( buffer + 16 );
202  compressedSize = *reinterpret_cast<const uint32_t*>( buffer + 20 );
203  uncompressedSize = *reinterpret_cast<const uint32_t*>( buffer + 24 );
204  filenameLength = *reinterpret_cast<const uint16_t*>( buffer + 28 );
205  extraLength = *reinterpret_cast<const uint16_t*>( buffer + 30 );
206  commentLength = *reinterpret_cast<const uint16_t*>( buffer + 32 );
207  nbDisk = *reinterpret_cast<const uint16_t*>( buffer + 34 );
208  internAttr = *reinterpret_cast<const uint16_t*>( buffer + 36 );
209  externAttr = *reinterpret_cast<const uint32_t*>( buffer + 38 );
210  offset = *reinterpret_cast<const uint32_t*>( buffer + 42 );
211 
212  filename.assign( buffer + 46, filenameLength );
213 
214  // now parse the 'extra' (may contain the zip64 extension to CDFH)
215  ParseExtra( buffer + 46 + filenameLength, extraLength );
216 
218  }
219 
220  //-------------------------------------------------------------------------
221  // Choose the right offset value from the CDFH record
222  //-------------------------------------------------------------------------
223  inline static uint64_t GetOffset( const CDFH &cdfh )
224  {
225  if( cdfh.offset != ovrflw<uint32_t>::value )
226  return cdfh.offset;
227  return cdfh.extra->offset;
228  }
229 
230  //-------------------------------------------------------------------------
231  // Parse the extensible data fields
232  //-------------------------------------------------------------------------
233  void ParseExtra( const char *buffer, uint16_t length)
234  {
235  uint8_t ovrflws = Extra::NONE;
236  uint16_t exsize = 0;
237 
238  // check if compressed size is overflown
240  {
241  ovrflws |= Extra::CPMSIZE;
242  exsize += sizeof( uint64_t );
243  }
244 
245  // check if original size is overflown
247  {
248  ovrflws |= Extra::UCMPSIZE;
249  exsize += sizeof( uint64_t );
250  }
251 
252  // check if offset is overflown
254  {
255  ovrflws |= Extra::OFFSET;
256  exsize += sizeof( uint64_t );
257  }
258 
259  // check if number of disks is overflown
261  {
262  ovrflws |= Extra::NBDISK;
263  exsize += sizeof( uint32_t );
264  }
265 
266  // if the expected size of ZIP64 extension is 0 we
267  // can skip parsing of 'extra'
268  if( exsize == 0 ) return;
269 
270  // Parse the extra part
271  buffer = Extra::Find( buffer, length );
272  if( buffer )
273  {
274  extra.reset( new Extra() );
275  extra->FromBuffer( buffer, exsize, ovrflws );
276  }
277  }
278 
279  //-------------------------------------------------------------------------
281  //-------------------------------------------------------------------------
282  void Serialize( buffer_t &buffer )
283  {
284  copy_bytes( cdfhSign, buffer );
285  copy_bytes( zipVersion, buffer );
286  copy_bytes( minZipVersion, buffer );
287  copy_bytes( generalBitFlag, buffer );
288  copy_bytes( compressionMethod, buffer );
289  copy_bytes( timestmp.time, buffer );
290  copy_bytes( timestmp.date, buffer );
291  copy_bytes( ZCRC32, buffer );
292  copy_bytes( compressedSize, buffer );
293  copy_bytes( uncompressedSize, buffer );
294  copy_bytes( filenameLength, buffer );
295  copy_bytes( extraLength, buffer );
296  copy_bytes( commentLength, buffer );
297  copy_bytes( nbDisk, buffer );
298  copy_bytes( internAttr, buffer );
299  copy_bytes( externAttr, buffer );
300  copy_bytes( offset, buffer );
301  std::copy( filename.begin(), filename.end(), std::back_inserter( buffer ) );
302  if( extra )
303  extra->Serialize( buffer );
304 
305  if ( commentLength > 0 )
306  std::copy( comment.begin(), comment.end(), std::back_inserter( buffer ) );
307  }
308 
309  //-------------------------------------------------------------------------
311  //-------------------------------------------------------------------------
312  inline bool IsZIP64() const
313  {
314  return extra.get();
315  }
316 
317  //-------------------------------------------------------------------------
319  //-------------------------------------------------------------------------
321  {
323  }
324 
325  uint16_t zipVersion; // ZIP version
326  uint16_t minZipVersion; //< minumum ZIP version
327  uint16_t generalBitFlag; //< flags
328  uint16_t compressionMethod; //< compression method
329  dos_timestmp timestmp; //< DOS timestamp
330  uint32_t ZCRC32; //< CRC32
331  uint32_t compressedSize; //< compressed size
332  uint32_t uncompressedSize; //< uncompressed size
333  uint16_t filenameLength; //< filename length
334  uint16_t extraLength; //< size of the ZIP64 extra field
335  uint16_t commentLength; //< comment length
336  uint16_t nbDisk; //< number of disks
337  uint16_t internAttr; //< internal attributes
338  uint32_t externAttr; //< external attributes
339  uint32_t offset; //< offset
340  std::string filename; //< file name
341  std::unique_ptr<Extra> extra; //< ZIP64 extra field
342  std::string comment; //< user comment
343  uint16_t cdfhSize; // size of the record
344 
345  //-------------------------------------------------------------------------
346  // the Central Directory File Header signature
347  //-------------------------------------------------------------------------
348  static const uint32_t cdfhSign = 0x02014b50;
349  static const uint16_t cdfhBaseSize = 46;
350  };
351 }
352 
353 #endif /* SRC_XRDZIP_XRDZIPCDFH_HH_ */
uint16_t internAttr
Definition: XrdZipCDFH.hh:337
static const uint16_t cdfhBaseSize
Definition: XrdZipCDFH.hh:349
std::vector< char > buffer_t
Definition: XrdZipUtils.hh:54
uint16_t generalBitFlag
Definition: XrdZipCDFH.hh:327
uint16_t time
Definition: XrdZipUtils.hh:130
CDFH(const char *buffer)
Definition: XrdZipCDFH.hh:193
static uint64_t GetOffset(const CDFH &cdfh)
Definition: XrdZipCDFH.hh:223
static void copy_bytes(const INT value, buffer_t &buffer)
Definition: XrdZipUtils.hh:60
uint16_t commentLength
Definition: XrdZipCDFH.hh:335
static const uint16_t flag
Definition: XrdZipDataDescriptor.hh:41
uint16_t filenameLength
Definition: XrdZipCDFH.hh:333
void Serialize(buffer_t &buffer)
Serialize the object into a buffer.
Definition: XrdZipCDFH.hh:282
static const uint32_t cdfhSign
Definition: XrdZipCDFH.hh:348
static size_t CalcSize(const cdvec_t &cdvec, uint32_t orgcdsz, uint32_t orgcdcnt)
Definition: XrdZipCDFH.hh:130
Definition: XrdZipExtra.hh:163
uint32_t uncompressedSize
Definition: XrdZipCDFH.hh:332
static const char * Find(const char *buffer, uint16_t length)
Definition: XrdZipExtra.hh:98
uint16_t extraLength
Definition: XrdZipCDFH.hh:334
uint32_t offset
Definition: XrdZipCDFH.hh:339
std::unique_ptr< Extra > extra
Definition: XrdZipCDFH.hh:341
uint32_t compressedSize
Definition: XrdZipCDFH.hh:331
uint16_t cdfhSize
Definition: XrdZipCDFH.hh:343
dos_timestmp timestmp
Definition: XrdZipCDFH.hh:329
std::unordered_map< std::string, std::unique_ptr< CDFH > > cdrecs_t
Definition: XrdZipCDFH.hh:59
Definition: XrdZipExtra.hh:159
static void Serialize(uint32_t orgcdcnt, const buffer_t &orgcdbuf, const cdvec_t &cdvec, buffer_t &buffer)
Definition: XrdZipCDFH.hh:142
Definition: XrdZipExtra.hh:162
Definition: XrdZipCDFH.hh:64
std::string comment
Definition: XrdZipCDFH.hh:342
A data structure representing ZIP Local File Header.
Definition: XrdZipLFH.hh:41
auto get(const nlohmann::detail::iteration_proxy_value< IteratorType > &i) -> decltype(i.key())
Definition: XrdOucJson.hh:4497
uint16_t nbDisk
Definition: XrdZipCDFH.hh:336
static std::tuple< cdvec_t, cdmap_t > Parse(const char *buffer, uint32_t bufferSize, uint16_t nbCdRecords)
Definition: XrdZipCDFH.hh:73
uint32_t ZCRC32
Definition: XrdZipCDFH.hh:330
Definition: XrdZipUtils.hh:46
std::unordered_map< std::string, size_t > cdmap_t
Definition: XrdZipCDFH.hh:54
Definition: XrdZipUtils.hh:40
bool IsZIP64() const
Definition: XrdZipCDFH.hh:312
uint16_t compressionMethod
Definition: XrdZipCDFH.hh:328
Definition: XrdZipUtils.hh:92
void ParseExtra(const char *buffer, uint16_t length)
Definition: XrdZipCDFH.hh:233
uint32_t externAttr
Definition: XrdZipCDFH.hh:338
bool HasDataDescriptor()
Definition: XrdZipCDFH.hh:320
std::vector< std::unique_ptr< CDFH > > cdvec_t
Definition: XrdZipCDFH.hh:44
uint16_t date
Definition: XrdZipUtils.hh:143
Definition: XrdZipExtra.hh:160
uint16_t minZipVersion
Definition: XrdZipCDFH.hh:326
Definition: XrdZipExtra.hh:161
Definition: XrdZipExtra.hh:35
uint16_t zipVersion
Definition: XrdZipCDFH.hh:325
CDFH(LFH *lfh, mode_t mode, uint64_t lfhOffset)
Definition: XrdZipCDFH.hh:159
std::string filename
Definition: XrdZipCDFH.hh:340
static std::tuple< cdvec_t, cdmap_t > Parse(const char *&buffer, uint32_t bufferSize)
Definition: XrdZipCDFH.hh:103