使用C#程序处理PowerPoint文件中的字符串
最近, 有同事偶然發現Microsoft? Office PowerPoint可以被看作是一個壓縮包,然后通過WinRAR解壓出來一組XML文件。解壓出來的文件包括:
一個索引文件名稱為:[Content_Types].xml,
一個名為ppt的文件夾,在其內有兩個重要的子文件夾:slides 和notesSlides
其中, [Content_Types].xml記錄了每一張Slide的相對路徑,每一個Slide note的相對路徑。其內容如下圖:
我們發現PPT中的所有內容都被記錄在XML的<a:t></a:t>節點中, 所以,我們通過把所有a:t節點的內容導出,然后對內容進行修改,修改后再替換回原文件,這樣將這一組文件進行壓縮,生成了修改后的PowerPoint文件,該過程為PowerPoint的內容本地化提供了便捷途徑。
這種做法相比較于調用Microsoft.Office.Interop.PowerPoint中的API的做法來說, 保留了原文的100%的格式,不需要后期PPT刷格式的操作。
以下是我們寫的C#代碼, 思路是將每張Slide的字符串導出到一個txt文件,通過trados翻譯txt文件中的字符串,然后將修改后內容導入到PPT包內相應的XML文件中。
PPTZIPCommon
class PPTZIPCommon{/// <summary>/// read file [Content_Types].xml/// </summary>/// <param name="root"></param>/// <param name="SlideFiles">return slides </param>/// <param name="NotesFiles">return slide notes</param>internal static void ReadContentTypes(string root, ref List<string> SlideFiles, ref List<string> NotesFiles){string ct_file = @"[Content_Types].xml";string ct_fullName = Path.Combine(root, ct_file);if (!File.Exists(ct_fullName)){MessageBox.Show(string.Format("the [Content_Types].xml not exist in {0}", root));return;}XmlDocument xml_doc = new XmlDocument();xml_doc.Load(ct_fullName);XmlElement rootElement = xml_doc.DocumentElement;string slide_types = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";string notes_types = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";XmlNodeList nodes = rootElement.ChildNodes;foreach (XmlElement node in nodes){if (node.Attributes["ContentType"].Value == slide_types){string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @"\");string file = Path.Combine(root, relatedPath);SlideFiles.Add(file);}else if (node.Attributes["ContentType"].Value == notes_types){string relatedPath = node.Attributes["PartName"].Value.Remove(0, 1).Replace(@"/", @"\");string file = Path.Combine(root, relatedPath);NotesFiles.Add(file);}}}internal static string GetPPTNameFromFullPath(string scanFolder){int lastIndexOfSlash = scanFolder.LastIndexOf(@"\");if (lastIndexOfSlash == scanFolder.Length - 1){scanFolder = scanFolder.Remove(lastIndexOfSlash);}string lastString = scanFolder.Substring(scanFolder.LastIndexOf(@"\") + 1);string[] names = lastString.Split(new string[] { "." }, StringSplitOptions.RemoveEmptyEntries);return names[0];}}PPTZIP
class PPTZIP{private static List<string> SlideFiles = new List<string>();private static List<string> NotesFiles = new List<string>();/// <summary>/// collect together all the <a:t>...</a:t> strings, put it in txt file/// txt file be saved to output\<original PPT name>_<fileName>.txt/// </summary>/// <param name="file">xml file that contains <a:t>...</a:t></param>/// <param name="output">the txt file be saved to the output folder</param>/// <param name="pptName">original PowerPoint file name</param>private static void ReadATContent2TXT(string file, string output, string pptName){StringBuilder sb = new StringBuilder();using (StreamReader reader = new StreamReader(file)){string content = reader.ReadToEnd();string pattern = @"<a:t>.[^<>]+</a:t>";MatchCollection mc = Regex.Matches(content, pattern);for (int i = 0; i < mc.Count; i++){sb.AppendLine(string.Format("{0}^", mc[i].Value.Substring(5, mc[i].Value.LastIndexOf("<") - 5)));}}FileInfo fi = new FileInfo(file);string txtFile = Path.Combine(output, pptName+"_"+fi.Name + ".txt");using (StreamWriter writer = new StreamWriter(txtFile)){writer.Write(sb.ToString().Trim());writer.Flush();writer.Close();}}public static void Export2TXTs(string scanFolder){string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder);PPTZIPCommon.ReadContentTypes(scanFolder, ref SlideFiles, ref NotesFiles);if (null != SlideFiles && SlideFiles.Count > 0){foreach (var file in SlideFiles){string outputfolder = Path.Combine(scanFolder, "SlideTXTs");if (!Directory.Exists(outputfolder))Directory.CreateDirectory(outputfolder);string transFolder = Path.Combine(scanFolder, "SlideTXTs_Trans");if (!Directory.Exists(transFolder))Directory.CreateDirectory(transFolder);ReadATContent2TXT(file, outputfolder, ppt_name);}}if (null != NotesFiles && NotesFiles.Count > 0){foreach (var file in NotesFiles){string outputfolder = Path.Combine(scanFolder, "NotesTXTs");if (!Directory.Exists(outputfolder))Directory.CreateDirectory(outputfolder);string transFolder = Path.Combine(scanFolder, "NotesTXTs_Trans");if (!Directory.Exists(transFolder))Directory.CreateDirectory(transFolder);ReadATContent2TXT(file, outputfolder,ppt_name);}}}}PPTZIPWriter
class PPTZIPWriter{private static List<string> SlideFiles = new List<string>();private static List<string> NotesFiles = new List<string>(); private static void Replace(string file, List<string> original, List<string> translated){string content = string.Empty;using (StreamReader reader = new StreamReader(file)){content = reader.ReadToEnd();for (int i = 0; i < original.Count; i++){content = content.Replace(string.Format("<a:t>{0}</a:t>", original[i]), string.Format("<a:t>{0}</a:t>", translated[i]));}reader.Close();}using (StreamWriter writer = new StreamWriter(file)){writer.Write(content);writer.Flush();writer.Close();}}public static void Import2PPT(string scanFolder, string lan){string ppt_name = PPTZIPCommon.GetPPTNameFromFullPath(scanFolder);// fullfill the two lists: SlideFiles and NotesFilesPPTZIPCommon.ReadContentTypes(scanFolder,ref SlideFiles, ref NotesFiles);string srcFolder = "SlideTXTs";string trgFolder = "SlideTXTs_Trans";string srcFullPath = Path.Combine(scanFolder, srcFolder);string trgFullPath = Path.Combine(scanFolder, trgFolder);foreach (var file in SlideFiles){ReplaceATContent(file, srcFullPath, trgFullPath, ppt_name, lan);}string srcFolderNotes = "NotesTXTs";string trgFolderNotes = "NotesTXTs_Trans";string srcFullPath_trans = Path.Combine(scanFolder, srcFolderNotes);string trgFullPath_trans = Path.Combine(scanFolder, trgFolderNotes);foreach (var file in NotesFiles){ReplaceATContent(file, srcFullPath_trans, trgFullPath_trans, ppt_name, lan);}}private static void ReplaceATContent(string file, string srcFolder, string trgFolder, string pptName, string lan){if (!(Directory.Exists(srcFolder) && Directory.Exists(trgFolder))){MessageBox.Show("SlideTXTs/NotesTXTs or SlideTXTs_Trans/NotesTXTs_Trans not exist");return;}FileInfo fi = new FileInfo(file);string srcFileName = string.Format("{0}_{1}.txt",pptName,fi.Name);string srcFileFullPath = Path.Combine(srcFolder, srcFileName);string trgFileName= string.Empty;if(lan==string.Empty)trgFileName = string.Format("{0}_{1}.txt", pptName, fi.Name);elsetrgFileName = string.Format("{0}_{1}_{2}.txt",pptName, fi.Name,lan);string trgFileFullPath = Path.Combine(trgFolder, trgFileName);if (!(File.Exists(srcFileFullPath) && File.Exists(trgFileFullPath))){MessageBox.Show(string.Format(@"File {0} not replaced",file));return;}List<string> originalString = new List<string>();using (StreamReader reader = new StreamReader(srcFileFullPath)){string content = reader.ReadToEnd().Trim();string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries);for (int i = 0; i < strings.Length; i++){originalString.Add(strings[i].Contains("\r\n") ? strings[i].Remove(0, 2) : strings[i]);}}List<string> translatedString = new List<string>();using (StreamReader reader = new StreamReader(trgFileFullPath)){string content = reader.ReadToEnd().Trim();string[] strings = content.Split(new string[] { "^" }, StringSplitOptions.RemoveEmptyEntries);for (int i = 0; i < strings.Length; i++){translatedString.Add(strings[i].Contains("\r\n") ? strings[i].Remove(0, 2) : strings[i]);}}if (originalString.Count != translatedString.Count){MessageBox.Show(string.Format(@"translation string count not match:{0}",file));return;}Replace(file, originalString, translatedString);} }?
轉載于:https://www.cnblogs.com/qixue/p/4497691.html
總結
以上是生活随笔為你收集整理的使用C#程序处理PowerPoint文件中的字符串的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: XML、集合、JSP综合练习
- 下一篇: c# char unsigned_dll