首页| 论坛| 消息
主题:Office文档修复原理
1直在努力发表于 2010-02-04 17:39
上节介绍的“劳拉”文件格式,对修复损坏的Office文档有什么帮助呢?其实Office文档的修复过程, 与硬盘的FAT 文件系统的数据恢复过程十分相似。以修复一个损坏的 Word 文件为例,其过程如下表7-4 所示:

1.
通过文件块头确定目录链根的开始块序号;

2.
提取文件根目录结构;

3.
通过目录链表定位要挽救的OLE 对象的位置;

4.
拷贝文件未损坏的OLE 对象;

5
对得到的OLE 对象进行组合,重新构造一个新的Word 文档。

表7-4 Word 文档的修复过程

得到的未损坏的OLE 对象越多, 被修复的Word 文档就越理想。如果想自己编写文件修复程序,这里提供一个参考程序ole.c ,能够给出很多有参考价值的信息。

程序:ole.c

.
编程语言: Borland C++

.
功能:列出一个 Word 文件的内部目录结构

#include
#include
#include
#include
#include
#include
#include
#include

#define MIN(a,b) ((a)nsize == 0) return;
if(pps_node->previous != NULL) unravel(pps_node->previous,level);
pps_node->level = level;
printf("PPS %s: %*x: ->%s\n",pps_type,level*3,pps_node->index,pps_node->name);
if(pps_node->directory != NULL) unravel(pps_node->directory,level+1);
if(pps_node->next != NULL) unravel(pps_node->next,level);
}

int main(int argc, char **argv)
{
FILE *input = NULL;
FILE *OLEfile = NULL;
FILE *sbfile = NULL;
FILE *infile = NULL;
char Target[64];
int debug = 0, BlockSize = 0, Offset = 0;
int c, i, j, k, len, bytes;
char *s, *p, *t;
char *Block, *BDepot, *SDepot, *Depot, *Root;
char Name[64];
unsigned long int FilePos=0x00000000;
long int num_bbd_blocks;
long int root_list, sbd_list;
long int pps_size, pps_start = -1;
long int linkto;
int root_entry;
pps_entry **pps_list;

if(argc < 2) {
fprintf(stderr,"No input file name\n");
exit (12);
}
fprintf(stderr,"File given was %s\n",argv[1]);
input = fopen(argv[1], "rb");
if(input==NULL) {
fprintf(stderr,"Error opening file %s\n",argv[1]);
exit (12);
}
if(argc < 3) {
fprintf(stderr,"Listing contents\n");
strncpy(Target,"UnLiKeLy",8);
} else {
strncpy(Target, argv[2], 64);
fprintf(stderr, "Extracting %s...\n", Target);
}

/* peek into file to guess file type */
c = getc (input);
ungetc(c,input);

if(isprint(c)) {
fprintf(stderr,"File looks like a plain text file.\n");
return 8;
/* check for MS OLE wrapper */
} else if(c == 0xd0) {
Block =(char *) malloc(512);
/* read header block */
if(fread(Block,512,1,input) != 1 ) {
fprintf(stderr,"1 =========> Input file has faulty OLE format\n");
exit (5);
}
num_bbd_blocks=LongInt(Block+0x2c);
BDepot =(char *) malloc(512*num_bbd_blocks);
s = BDepot;
root_list[0]= LongInt(Block+0x30);
sbd_list[0] = LongInt(Block+0x3c);
if(debug) fprintf(stderr,"num_bbd_blocks %ld, root start %ld, sbd start %ld\n",num_bbd_blocks,root_list[0],sbd_list[0]);

/* read big block Depot */
for(i=0; i Input file has faulty bbd\n");
exit (5);
}
s += 0x200;
}

/* Extract the sbd block list */
for(len = 1; len < MAXBLOCKS; len++){
sbd_list = LongInt(BDepot+(sbd_list*4));
if(sbd_list == -2) break;
}
if(len >= MAXBLOCKS) fprintf(stderr,"Help too many sbd blocks\n");
SDepot =(char *) malloc(512*len);
s = SDepot;

/* Read in Small Block Depot */
for(i=0; i Input file has faulty OLE format\n");
return 5;
}
s += 0x200;
}
/* Extract the root block list */
for(len = 1; len < MAXBLOCKS; len++){
root_list = LongInt(BDepot+(root_list*4));
fprintf(stderr,"root block %d\n",len
下一页 (1/4)
回帖(39):
39楼:Office文档是平常最重要、最普遍的需要修复文件
38楼:谢谢老师谢谢老师谢谢老师
37楼:学习下,看看是否有用先!

全部回帖(39)»
最新回帖
收藏本帖
发新帖