En iOS 4.0, Apple ha rediseñado el proceso de copia de seguridad.
iTunes solía almacenar una lista de nombres de archivo asociados con archivos de respaldo en el archivo Manifest.plist, pero en iOS 4.0 ha movido esta información a Manifest.mbdb
Puede ver un ejemplo de este archivo haciendo una copia de seguridad con sus dispositivos iOS 4.0 y buscando en su carpeta ~ / Library / Application Support / MobileSync / Backup (busque dentro de las subcarpetas con la fecha más reciente)
Aquí hay una captura de pantalla de cómo se ve el archivo en un editor de texto:
(fuente: supercrazyawesome.com )
¿Cómo puedo analizar esto en una aplicación Cocoa para poder actualizar mi aplicación (gratuita) iPhone Backup Extractor ( http://supercrazyawesome.com ) para iOS 4.0?
Respuestas:
Gracias, user374559 y reneD, ese código y descripción son muy útiles.
Mi prueba con Python para analizar e imprimir la información en un formato similar a Unix ls-l:
#!/usr/bin/env python import sys def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo return mbdb def process_mbdx_file(filename): mbdx = {} # Map offset of info in the MBDB file => fileID string data = open(filename).read() if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file") offset = 4 offset = offset + 2 # value 0x02 0x00, not sure what this is filecount, offset = getint(data, offset, 4) # 4-byte count of records while offset < len(data): # 26 byte record, made up of ... fileID = data[offset:offset+20] # 20 bytes of fileID fileID_string = ''.join(['%02x' % ord(b) for b in fileID]) offset = offset + 20 mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog mode, offset = getint(data, offset, 2) # 2-byte mode field mbdx[mbdb_offset] = fileID_string return mbdx def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose)
fuente
En iOS 5, se eliminó el archivo Manifest.mbdx. Para el propósito de este artículo, era redundante de todos modos, porque el dominio y la ruta están en Manifest.mbdb y el hash de ID se puede generar con SHA1.
Aquí está mi actualización del código de galloglass para que funcione con copias de seguridad de dispositivos iOS 5. Los únicos cambios son la eliminación de process_mbdx_file () y la adición de algunas líneas en process_mbdb_file ().
Probado con copias de seguridad de un iPhone 4S y un iPad 1, ambos con muchas aplicaciones y archivos.
#!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose)
fuente
Terminé mi trabajo en estas cosas, es decir, la actualización de iOS 4 + iTunes 9.2 de mi biblioteca de decodificadores de respaldo para Python - http://www.iki.fi/fingon/iphonebackupdb.py
Hace lo que necesito, poca documentación, pero siéntete libre de copiar ideas desde allí ;-)
(Parece funcionar bien con mis copias de seguridad al menos).
fuente
Puede encontrar información y una pequeña descripción del formato MBDB / MBDX aquí:
http://code.google.com/p/iphonebackupbrowser/
Esta es mi aplicación para buscar archivos de respaldo. Intenté documentar el formato de los nuevos archivos que vienen con iTunes 9.2.
fuente
Esta secuencia de comandos de Python es increíble.
Aquí está mi versión Ruby (con una pequeña mejora) y capacidades de búsqueda. (para iOS 5)
# encoding: utf-8 require 'fileutils' require 'digest/sha1' class ManifestParser def initialize(mbdb_filename, verbose = false) @verbose = verbose process_mbdb_file(mbdb_filename) end # Returns the numbers of records in the Manifest files. def record_number @mbdb.size end # Returns a huge string containing the parsing of the Manifest files. def to_s s = '' @mbdb.each do |v| s += "#{fileinfo_str(v)}\n" end s end def to_file(filename) File.open(filename, 'w') do |f| @mbdb.each do |v| f.puts fileinfo_str(v) end end end # Copy the backup files to their real path/name. # * domain_match Can be a regexp to restrict the files to copy. # * filename_match Can be a regexp to restrict the files to copy. def rename_files(domain_match = nil, filename_match = nil) @mbdb.each do |v| if v[:type] == '-' # Only rename files. if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match) dst = "#{v[:domain]}/#{v[:filename]}" puts "Creating: #{dst}" FileUtils.mkdir_p(File.dirname(dst)) FileUtils.cp(v[:fileID], dst) end end end end # Return the filename that math the given regexp. def search(regexp) result = Array.new @mbdb.each do |v| if "#{v[:domain]}::#{v[:filename]}" =~ regexp result << v end end result end private # Retrieve an integer (big-endian) and new offset from the current offset def getint(data, offset, intsize) value = 0 while intsize > 0 value = (value<<8) + data[offset].ord offset += 1 intsize -= 1 end return value, offset end # Retrieve a string and new offset from the current offset into the data def getstring(data, offset) return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset...(offset + length)] return value, (offset + length) end def process_mbdb_file(filename) @mbdb = Array.new data = File.open(filename, 'rb') { |f| f.read } puts "MBDB file read. Size: #{data.size}" raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb' offset = 4 offset += 2 # value x05 x00, not sure what this is while offset < data.size fileinfo = Hash.new fileinfo[:start_offset] = offset fileinfo[:domain], offset = getstring(data, offset) fileinfo[:filename], offset = getstring(data, offset) fileinfo[:linktarget], offset = getstring(data, offset) fileinfo[:datahash], offset = getstring(data, offset) fileinfo[:unknown1], offset = getstring(data, offset) fileinfo[:mode], offset = getint(data, offset, 2) if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink fileinfo[:type] = 'l' elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File fileinfo[:type] = '-' elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir fileinfo[:type] = 'd' else # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode'] fileinfo[:type] = '?' end fileinfo[:unknown2], offset = getint(data, offset, 4) fileinfo[:unknown3], offset = getint(data, offset, 4) fileinfo[:userid], offset = getint(data, offset, 4) fileinfo[:groupid], offset = getint(data, offset, 4) fileinfo[:mtime], offset = getint(data, offset, 4) fileinfo[:atime], offset = getint(data, offset, 4) fileinfo[:ctime], offset = getint(data, offset, 4) fileinfo[:filelen], offset = getint(data, offset, 8) fileinfo[:flag], offset = getint(data, offset, 1) fileinfo[:numprops], offset = getint(data, offset, 1) fileinfo[:properties] = Hash.new (0...(fileinfo[:numprops])).each do |ii| propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo[:properties][propname] = propval end # Compute the ID of the file. fullpath = fileinfo[:domain] + '-' + fileinfo[:filename] fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath) # We add the file to the list of files. @mbdb << fileinfo end @mbdb end def modestr(val) def mode(val) r = (val & 0x4) ? 'r' : '-' w = (val & 0x2) ? 'w' : '-' x = (val & 0x1) ? 'x' : '-' r + w + x end mode(val >> 6) + mode(val >> 3) + mode(val) end def fileinfo_str(f) return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]] info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination f[:properties].each do |k, v| info += " #{k}=#{v.inspect}" end info end end if __FILE__ == $0 mp = ManifestParser.new 'Manifest.mbdb', true mp.to_file 'filenames.txt' end
fuente
Me gustó el código de galloglas y cambié la función principal para que muestre una lista ordenada del tamaño total por aplicación:
verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") sizes = {} for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) if (fileinfo['mode'] & 0xE000) == 0x8000: sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen'] for domain in sorted(sizes, key=sizes.get): print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
De esa manera, puede averiguar qué aplicación está consumiendo todo ese espacio.
fuente
Para aquellos que buscan una implementación Java de un lector de archivos MBDB, existen varios:
Proyecto "iPhone Analyzer" (código muy limpio): http://sourceforge.net/p/iphoneanalyzer/code/HEAD/tree/trunk/library/src/main/java/com/crypticbit/ipa/io/parser/manifest /Mbdb.java
Proyecto "iPhone Stalker": https://code.google.com/p/iphonestalker/source/browse/trunk/src/iphonestalker/util/io/MBDBReader.java
fuente
Gracias a la respuesta de galloglass. El código funciona muy bien con Python 2.7. Solo hay una cosa que quiero mencionar. Cuando lea el archivo manifest.mbdb, debe usar el modo binario. De lo contrario, no se leerá todo el contenido.
También hice algunos cambios menores para que el código funcionara con Python 3.4. Aquí está el código.
#!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value << 8) + data[offset] offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF): return '', offset + 2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset + length] return value.decode(encoding='latin-1'), (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename, 'rb').read() # 'b' is needed to read all content at once if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath.encode()) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r + w + x return mode(val >> 6) + mode((val >> 3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file( r"Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print(fileinfo_str(fileinfo, verbose))
fuente
value = (value<<8) + ord(data[offset])