xrootd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
XrdZipCDFH.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // Copyright (c) 2011-2014 by European Organization for Nuclear Research (CERN)
3 // Author: Michal Simon <michal.simon@cern.ch>
4 //------------------------------------------------------------------------------
5 // This file is part of the XRootD software suite.
6 //
7 // XRootD is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // XRootD is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
19 //
20 // In applying this licence, CERN does not waive the privileges and immunities
21 // granted to it by virtue of its status as an Intergovernmental Organization
22 // or submit itself to any jurisdiction.
23 //------------------------------------------------------------------------------
24 
25 #ifndef SRC_XRDZIP_XRDZIPCDFH_HH_
26 #define SRC_XRDZIP_XRDZIPCDFH_HH_
27 
28 #include "XrdZip/XrdZipLFH.hh"
29 #include "XrdZip/XrdZipUtils.hh"
30 
31 #include <string>
32 #include <algorithm>
33 #include <iterator>
34 #include <unordered_map>
35 #include <memory>
36 #include <tuple>
37 
38 namespace XrdZip
39 {
40  //---------------------------------------------------------------------------
41  // Forward declaration for CDFH
42  //---------------------------------------------------------------------------
43  struct CDFH;
44 
45  //---------------------------------------------------------------------------
46  // Vector of Central Directory records
47  //---------------------------------------------------------------------------
48  typedef std::vector<std::unique_ptr<CDFH>> cdvec_t;
49 
50  //---------------------------------------------------------------------------
51  // Map file name to index of CD record
52  //---------------------------------------------------------------------------
53  typedef std::unordered_map<std::string, size_t> cdmap_t;
54 
55  //---------------------------------------------------------------------------
56  // Map of Central Directory records
57  //---------------------------------------------------------------------------
58  typedef std::unordered_map<std::string, std::unique_ptr<CDFH>> cdrecs_t;
59 
60  //---------------------------------------------------------------------------
61  // A data structure representing the Central Directory File header record
62  //---------------------------------------------------------------------------
63  struct CDFH
64  {
65  //-------------------------------------------------------------------------
66  // Parse central directory
67  // @param buffer : buffer containing the CD records
68  // @param bufferSize : size of the buffer
69  // @param nbCdRecords : nb of CD records
70  // @return : vector of CD records / file name to index mapping
71  //-------------------------------------------------------------------------
72  inline static std::tuple<cdvec_t, cdmap_t> Parse( const char *buffer, uint32_t bufferSize, uint16_t nbCdRecords )
73  {
74  uint32_t offset = 0;
75  cdvec_t cdvec;
76  cdmap_t cdmap;
77  cdvec.reserve( nbCdRecords );
78 
79  for( size_t i = 0; i < nbCdRecords; ++i )
80  {
81  if( bufferSize < cdfhBaseSize ) break;
82  // check the signature
83  uint32_t signature = to<uint32_t>( buffer + offset );
84  if( signature != cdfhSign ) throw bad_data();
85  // parse the record
86  std::unique_ptr<CDFH> cdfh( new CDFH( buffer + offset ) );
87  offset += cdfh->cdfhSize;
88  bufferSize -= cdfh->cdfhSize;
89  cdmap[cdfh->filename] = i;
90  cdvec.push_back( std::move( cdfh ) );
91  }
92 
93  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
94  }
95 
96  //-------------------------------------------------------------------------
97  // Parse central directory
98  // @param buffer : buffer containing the CD records
99  // @param bufferSize : size of the buffer
100  // @return : vector of CD records / file name to index mapping
101  //-------------------------------------------------------------------------
102  inline static std::tuple<cdvec_t, cdmap_t> Parse( const char *&buffer, uint32_t bufferSize )
103  {
104  cdvec_t cdvec;
105  cdmap_t cdmap;
106  size_t i = 0;
107  while( bufferSize > 0 )
108  {
109  if( bufferSize < sizeof( uint32_t ) ) throw bad_data();
110  // check the signature
111  uint32_t signature = to<uint32_t>( buffer );
112  if( signature != cdfhSign )
113  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
114  // parse the record
115  std::unique_ptr<CDFH> cdfh( new CDFH( buffer ) );
116  if( bufferSize < cdfh->cdfhSize ) throw bad_data();
117  buffer += cdfh->cdfhSize;
118  bufferSize -= cdfh->cdfhSize;
119  cdmap[cdfh->filename] = i++;
120  cdvec.push_back( std::move( cdfh ) );
121  }
122 
123  return std::make_tuple( std::move( cdvec ), std::move( cdmap ) );
124  }
125 
126  //---------------------------------------------------------------------------
127  // Calculate size of the Central Directory
128  //---------------------------------------------------------------------------
129  inline static size_t CalcSize( const cdvec_t &cdvec, uint32_t orgcdsz, uint32_t orgcdcnt )
130  {
131  size_t size = 0;
132  auto itr = cdvec.begin() + orgcdcnt;
133  for( ; itr != cdvec.end() ; ++itr )
134  {
135  CDFH *cdfh = itr->get();
136  size += cdfh->cdfhSize;
137  }
138  return size + orgcdsz;
139  }
140 
141  inline static void Serialize( uint32_t orgcdcnt,
142  const buffer_t &orgcdbuf,
143  const cdvec_t &cdvec,
144  buffer_t &buffer )
145  {
146  std::copy( orgcdbuf.begin(), orgcdbuf.end(), std::back_inserter( buffer ) );
147  auto itr = cdvec.begin() + orgcdcnt;
148  for( ; itr != cdvec.end() ; ++itr )
149  {
150  CDFH *cdfh = itr->get();
151  cdfh->Serialize( buffer );
152  }
153  }
154 
155  //-------------------------------------------------------------------------
156  // Constructor from Local File Header
157  //-------------------------------------------------------------------------
158  CDFH( LFH *lfh, mode_t mode, uint64_t lfhOffset ):
159  zipVersion( ( 3 << 8 ) | 63 ),
162  timestmp( lfh->timestmp ),
163  ZCRC32( lfh->ZCRC32 ),
167  commentLength( 0 ),
168  nbDisk( 0 ),
169  internAttr( 0 ),
170  externAttr( mode << 16 ),
171  filename( lfh->filename ),
172  extra( new Extra( lfh->extra.get(), lfhOffset ) )
173  {
174  if ( lfhOffset >= ovrflw<uint32_t>::value )
176  else
177  offset = lfhOffset;
178 
179  extraLength = extra->totalSize;
180 
181  if ( extraLength == 0 )
182  minZipVersion = 10;
183  else
184  minZipVersion = 45;
185 
187  }
188 
189  //-------------------------------------------------------------------------
190  // Constructor from buffer
191  //-------------------------------------------------------------------------
192  CDFH( const char *buffer )
193  {
194  zipVersion = *reinterpret_cast<const uint16_t*>( buffer + 4 );
195  minZipVersion = *reinterpret_cast<const uint16_t*>( buffer + 6 );
196  generalBitFlag = *reinterpret_cast<const uint16_t*>( buffer + 8 );
197  compressionMethod = *reinterpret_cast<const uint16_t*>( buffer + 10 );
198  timestmp.time = *reinterpret_cast<const uint16_t*>( buffer + 12 );
199  timestmp.date = *reinterpret_cast<const uint16_t*>( buffer + 14 );
200  ZCRC32 = *reinterpret_cast<const uint32_t*>( buffer + 16 );
201  compressedSize = *reinterpret_cast<const uint32_t*>( buffer + 20 );
202  uncompressedSize = *reinterpret_cast<const uint32_t*>( buffer + 24 );
203  filenameLength = *reinterpret_cast<const uint16_t*>( buffer + 28 );
204  extraLength = *reinterpret_cast<const uint16_t*>( buffer + 30 );
205  commentLength = *reinterpret_cast<const uint16_t*>( buffer + 32 );
206  nbDisk = *reinterpret_cast<const uint16_t*>( buffer + 34 );
207  internAttr = *reinterpret_cast<const uint16_t*>( buffer + 36 );
208  externAttr = *reinterpret_cast<const uint32_t*>( buffer + 38 );
209  offset = *reinterpret_cast<const uint32_t*>( buffer + 42 );
210 
211  filename.assign( buffer + 46, filenameLength );
212 
213  // now parse the 'extra' (may contain the zip64 extension to CDFH)
214  ParseExtra( buffer + 46 + filenameLength, extraLength );
215 
217  }
218 
219  //-------------------------------------------------------------------------
220  // Choose the right offset value from the CDFH record
221  //-------------------------------------------------------------------------
222  inline static uint64_t GetOffset( const CDFH &cdfh )
223  {
224  if( cdfh.offset != ovrflw<uint32_t>::value )
225  return cdfh.offset;
226  return cdfh.extra->offset;
227  }
228 
229  //-------------------------------------------------------------------------
230  // Parse the extensible data fields
231  //-------------------------------------------------------------------------
232  void ParseExtra( const char *buffer, uint16_t length)
233  {
234  uint8_t ovrflws = Extra::NONE;
235  uint16_t exsize = 0;
236 
237  // check if compressed size is overflown
239  {
240  ovrflws |= Extra::CPMSIZE;
241  exsize += sizeof( uint64_t );
242  }
243 
244  // check if original size is overflown
246  {
247  ovrflws |= Extra::UCMPSIZE;
248  exsize += sizeof( uint64_t );
249  }
250 
251  // check if offset is overflown
253  {
254  ovrflws |= Extra::OFFSET;
255  exsize += sizeof( uint64_t );
256  }
257 
258  // check if number of disks is overflown
260  {
261  ovrflws |= Extra::NBDISK;
262  exsize += sizeof( uint32_t );
263  }
264 
265  // if the expected size of ZIP64 extension is 0 we
266  // can skip parsing of 'extra'
267  if( exsize == 0 ) return;
268 
269  extra.reset( new Extra() );
270 
271  // Parse the extra part
272  buffer = Extra::Find( buffer, length );
273  if( buffer )
274  extra->FromBuffer( buffer, exsize, ovrflws );
275  }
276 
277  //-------------------------------------------------------------------------
279  //-------------------------------------------------------------------------
280  void Serialize( buffer_t &buffer )
281  {
282  copy_bytes( cdfhSign, buffer );
283  copy_bytes( zipVersion, buffer );
284  copy_bytes( minZipVersion, buffer );
285  copy_bytes( generalBitFlag, buffer );
286  copy_bytes( compressionMethod, buffer );
287  copy_bytes( timestmp.time, buffer );
288  copy_bytes( timestmp.date, buffer );
289  copy_bytes( ZCRC32, buffer );
290  copy_bytes( compressedSize, buffer );
291  copy_bytes( uncompressedSize, buffer );
292  copy_bytes( filenameLength, buffer );
293  copy_bytes( extraLength, buffer );
294  copy_bytes( commentLength, buffer );
295  copy_bytes( nbDisk, buffer );
296  copy_bytes( internAttr, buffer );
297  copy_bytes( externAttr, buffer );
298  copy_bytes( offset, buffer );
299  std::copy( filename.begin(), filename.end(), std::back_inserter( buffer ) );
300  if( extra )
301  extra->Serialize( buffer );
302 
303  if ( commentLength > 0 )
304  std::copy( comment.begin(), comment.end(), std::back_inserter( buffer ) );
305  }
306 
307  uint16_t zipVersion; // ZIP version
308  uint16_t minZipVersion; //< minumum ZIP version
309  uint16_t generalBitFlag; //< flags
310  uint16_t compressionMethod; //< compression method
311  dos_timestmp timestmp; //< DOS timestamp
312  uint32_t ZCRC32; //< CRC32
313  uint32_t compressedSize; //< compressed size
314  uint32_t uncompressedSize; //< uncompressed size
315  uint16_t filenameLength; //< filename length
316  uint16_t extraLength; //< size of the ZIP64 extra field
317  uint16_t commentLength; //< comment length
318  uint16_t nbDisk; //< number of disks
319  uint16_t internAttr; //< internal attributes
320  uint32_t externAttr; //< external attributes
321  uint32_t offset; //< offset
322  std::string filename; //< file name
323  std::unique_ptr<Extra> extra; //< ZIP64 extra field
324  std::string comment; //< user comment
325  uint16_t cdfhSize; // size of the record
326 
327  //-------------------------------------------------------------------------
328  // the Central Directory File Header signature
329  //-------------------------------------------------------------------------
330  static const uint32_t cdfhSign = 0x02014b50;
331  static const uint16_t cdfhBaseSize = 46;
332  };
333 }
334 
335 #endif /* SRC_XRDZIP_XRDZIPCDFH_HH_ */
uint16_t internAttr
Definition: XrdZipCDFH.hh:319
static const uint16_t cdfhBaseSize
Definition: XrdZipCDFH.hh:331
std::vector< char > buffer_t
Definition: XrdZipUtils.hh:54
uint16_t generalBitFlag
Definition: XrdZipCDFH.hh:309
uint16_t time
Definition: XrdZipUtils.hh:130
CDFH(const char *buffer)
Definition: XrdZipCDFH.hh:192
static uint64_t GetOffset(const CDFH &cdfh)
Definition: XrdZipCDFH.hh:222
static void copy_bytes(const INT value, buffer_t &buffer)
Definition: XrdZipUtils.hh:60
uint16_t commentLength
Definition: XrdZipCDFH.hh:317
uint16_t filenameLength
Definition: XrdZipCDFH.hh:315
void Serialize(buffer_t &buffer)
Serialize the object into a buffer.
Definition: XrdZipCDFH.hh:280
static const uint32_t cdfhSign
Definition: XrdZipCDFH.hh:330
static size_t CalcSize(const cdvec_t &cdvec, uint32_t orgcdsz, uint32_t orgcdcnt)
Definition: XrdZipCDFH.hh:129
Definition: XrdZipExtra.hh:163
uint32_t uncompressedSize
Definition: XrdZipCDFH.hh:314
static const char * Find(const char *buffer, uint16_t length)
Definition: XrdZipExtra.hh:98
uint16_t extraLength
Definition: XrdZipCDFH.hh:316
uint32_t offset
Definition: XrdZipCDFH.hh:321
std::unique_ptr< Extra > extra
Definition: XrdZipCDFH.hh:323
uint32_t compressedSize
Definition: XrdZipCDFH.hh:313
uint16_t cdfhSize
Definition: XrdZipCDFH.hh:325
dos_timestmp timestmp
Definition: XrdZipCDFH.hh:311
std::unordered_map< std::string, std::unique_ptr< CDFH > > cdrecs_t
Definition: XrdZipCDFH.hh:58
Definition: XrdZipExtra.hh:159
static void Serialize(uint32_t orgcdcnt, const buffer_t &orgcdbuf, const cdvec_t &cdvec, buffer_t &buffer)
Definition: XrdZipCDFH.hh:141
Definition: XrdZipExtra.hh:162
Definition: XrdZipCDFH.hh:63
std::string comment
Definition: XrdZipCDFH.hh:324
A data structure representing ZIP Local File Header.
Definition: XrdZipLFH.hh:41
uint16_t nbDisk
Definition: XrdZipCDFH.hh:318
static std::tuple< cdvec_t, cdmap_t > Parse(const char *buffer, uint32_t bufferSize, uint16_t nbCdRecords)
Definition: XrdZipCDFH.hh:72
uint32_t ZCRC32
Definition: XrdZipCDFH.hh:312
Definition: XrdZipUtils.hh:46
std::unordered_map< std::string, size_t > cdmap_t
Definition: XrdZipCDFH.hh:53
Definition: XrdZipUtils.hh:40
uint16_t compressionMethod
Definition: XrdZipCDFH.hh:310
Definition: XrdZipUtils.hh:92
void ParseExtra(const char *buffer, uint16_t length)
Definition: XrdZipCDFH.hh:232
uint32_t externAttr
Definition: XrdZipCDFH.hh:320
std::vector< std::unique_ptr< CDFH > > cdvec_t
Definition: XrdZipCDFH.hh:43
uint16_t date
Definition: XrdZipUtils.hh:143
Definition: XrdZipExtra.hh:160
uint16_t minZipVersion
Definition: XrdZipCDFH.hh:308
Definition: XrdZipExtra.hh:161
Definition: XrdZipExtra.hh:35
uint16_t zipVersion
Definition: XrdZipCDFH.hh:307
CDFH(LFH *lfh, mode_t mode, uint64_t lfhOffset)
Definition: XrdZipCDFH.hh:158
std::string filename
Definition: XrdZipCDFH.hh:322
static std::tuple< cdvec_t, cdmap_t > Parse(const char *&buffer, uint32_t bufferSize)
Definition: XrdZipCDFH.hh:102