Is it possible to have feature to sanitize a PDF file from malicious embedded script?
Reference: Hacking With PDF
Is it possible to have feature to sanitize a PDF file from malicious embedded script?
Reference: Hacking With PDF
Hi,
GemBox.Pdf currently doesn’t have the feature to sanitize a PDF file from malicious embedded script.
But based on the link you provided, it seems that all JavaScript and URI actions contained in the PDF document should be inspected and modified if considered malicious.
The following extension method does exactly that:
static class GemBoxPdfExtensions
{
private static readonly PdfName
// Entries common to all action dictionaries: https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf#page=422
SName = PdfName.Create("S"),
// Additional entries specific to a URI action: https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf#page=432
URIName = PdfName.Create("URI"),
// Additional entries specific to a JavaScript action: https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf#page=438
JavaScriptName = PdfName.Create("JavaScript"),
JSName = PdfName.Create("JS");
public static void ModifyJavaScriptAndURIActions(this PdfDictionary dictionary, Func<PdfDictionary, string, string, string?> evaluator) => dictionary.ModifyJavaScriptAndURIActions(evaluator, new HashSet<PdfIndirectObject>());
private static void ModifyJavaScriptAndURIActions(this PdfDictionary dictionary, Func<PdfDictionary, string, string, string?> evaluator, HashSet<PdfIndirectObject> indirectObjects)
{
if (dictionary.TryGetDirectValue(SName, out PdfBasicObject subtypeValue))
{
if (URIName.Equals(subtypeValue) && dictionary.TryGetDirectValue(URIName, out PdfBasicObject uriValue) && uriValue.ObjectType == PdfBasicObjectType.String)
{
var replacement = evaluator(dictionary, "URI", ((PdfString)uriValue).ToString(PdfEncoding.ASCII));
if (replacement is not null)
dictionary[URIName] = replacement.Length == 0 ? PdfString.Create(string.Empty) : PdfString.Create(replacement, PdfEncoding.ASCII, PdfStringForm.Literal);
}
else if (JavaScriptName.Equals(subtypeValue) && dictionary.TryGetDirectValue(JSName, out PdfBasicObject jsValue) && jsValue.ObjectType is PdfBasicObjectType.String or PdfBasicObjectType.Stream)
{
if (jsValue.ObjectType == PdfBasicObjectType.Stream)
using (var ms = new MemoryStream())
{
using (var jsStream = ((PdfStream)jsValue).Open(PdfStreamDataMode.Read, PdfStreamDataState.Decoded))
jsStream.CopyTo(ms);
jsValue = PdfString.Create(new string(Array.ConvertAll(ms.ToArray(), b => (char)b)), PdfEncoding.Byte, PdfStringForm.Literal);
}
var replacement = evaluator(dictionary, "JavaScript", jsValue.ToString());
if (replacement is not null)
dictionary[JSName] = PdfString.Create(replacement);
}
}
foreach (var entry in dictionary)
entry.Value.ModifyJavaScriptAndURIActions(evaluator, indirectObjects);
}
private static void ModifyJavaScriptAndURIActions(this PdfArray array, Func<PdfDictionary, string, string, string?> evaluator, HashSet<PdfIndirectObject> indirectObjects)
{
for (int i = 0; i < array.Count; ++i)
array[i].ModifyJavaScriptAndURIActions(evaluator, indirectObjects);
}
private static void ModifyJavaScriptAndURIActions(this PdfBasicObject value, Func<PdfDictionary, string, string, string?> evaluator, HashSet<PdfIndirectObject> indirectObjects)
{
if (value.ObjectType == PdfBasicObjectType.IndirectObject)
{
var indirectObject = (PdfIndirectObject)value;
if (!indirectObjects.Add(indirectObject))
return;
value = indirectObject.Value;
}
if (value.ObjectType == PdfBasicObjectType.Stream)
value = ((PdfStream)value).Dictionary;
switch (value.ObjectType)
{
case PdfBasicObjectType.Dictionary:
((PdfDictionary)value).ModifyJavaScriptAndURIActions(evaluator, indirectObjects);
break;
case PdfBasicObjectType.Array:
((PdfArray)value).ModifyJavaScriptAndURIActions(evaluator, indirectObjects);
break;
}
}
private static bool TryGetDirectValue(this PdfDictionary dictionary, PdfName key, out PdfBasicObject? value)
{
if (dictionary.TryGetValue(key, out value))
{
if (value.ObjectType == PdfBasicObjectType.IndirectObject)
value = ((PdfIndirectObject)value).Value;
return true;
}
value = null;
return false;
}
}
The following helper methods create a PDF document with some JavaScript and URI actions and save it to a PDF file on your Desktop:
private static void CreatePdfDocumentWithJavaScriptAndURIActions()
{
using (var document = new PdfDocument())
{
var page = document.Pages.Add();
DrawTextAndAddLink(page, "app.alert('XSS');", new PdfPoint(100, 100)).Actions.AddRunJavaScript("app.alert('XSS');");
DrawTextAndAddLink(page, "https://twitter.com/0xCyberY", new PdfPoint(100, 200)).Actions.AddOpenWebLink("https://twitter.com/0xCyberY");
DrawTextAndAddLink(page, "app.launchURL(\"https://twitter.com/0xCyberY\", true);", new PdfPoint(100, 300)).Actions.AddRunJavaScript("app.launchURL(\"https://twitter.com/0xCyberY\", true);");
document.Save(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "MaliciousFile.pdf"));
}
}
private static PdfLinkAnnotation DrawTextAndAddLink(PdfPage page, string text, PdfPoint location)
{
using (var formattedText = new PdfFormattedText())
{
formattedText.Color = PdfColors.Blue;
formattedText.Underline = new PdfTextDecoration();
formattedText.Append(text);
var cropBox = page.CropBox;
double width = formattedText.Width, height = formattedText.Height;
var origin = new PdfPoint(cropBox.Left + location.X, cropBox.Top - location.Y - height);
page.Content.DrawText(formattedText, origin);
return page.Annotations.AddLink(origin.X, origin.Y, width, height);
}
}
The following is the Main method that uses a helper method to create a test PDF file on your Desktop and then sanitize it by replacing JavaScript and URI values with an empty string:
static void Main()
{
CreatePdfDocumentWithJavaScriptAndURIActions();
using (var document = PdfDocument.Load(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "MaliciousFile.pdf")))
{
document.GetDictionary().ModifyJavaScriptAndURIActions((dictionary, actionType, value) =>
{
switch (actionType)
{
case "URI":
// Inspect the value and if you want to keep it return null, if you want to replace it, return a non-null.
Console.WriteLine($"URI action with URI '{value}' was found.");
return string.Empty;
case "JavaScript":
// Inspect the value and if you want to keep it return null, if you want to replace it, return a non-null.
Console.WriteLine($"JavaScript action with JS '{value}' was found.");
return string.Empty;
default:
return null;
}
});
document.Save(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "MaliciousFile-sanitized.pdf"));
}
}
I hope this helps!
Regards,
Stipo