1
+ using Azure ;
2
+ using Azure . AI . DocumentIntelligence ;
3
+ using System ;
4
+ using System . Text ;
5
+ using System . IO ;
6
+ using System . Threading . Tasks ;
7
+ using Build5Nines . SharpVector ;
8
+
9
+ // This sample demonstrates how to use the Document Intelligence client library to analyze a document using the prebuilt-read model.
10
+ string endpoint = "https://<resource-name>.cognitiveservices.azure.com/" ;
11
+ string apiKey = "<your-key>" ;
12
+ string filePath = "document.pdf" ; // Can be .pdf, .docx, .jpg, etc.
13
+
14
+ // Create timers to measure how long it takes to run the code
15
+ var overallTimer = new System . Diagnostics . Stopwatch ( ) ;
16
+ var stepTimer = new System . Diagnostics . Stopwatch ( ) ;
17
+ overallTimer . Start ( ) ;
18
+
19
+
20
+ // Create a DocumentIntelligenceClient
21
+ var credential = new AzureKeyCredential ( apiKey ) ;
22
+ var client = new DocumentIntelligenceClient ( new Uri ( endpoint ) , credential ) ;
23
+
24
+ var vdb = new BasicMemoryVectorDatabase ( ) ;
25
+
26
+
27
+
28
+
29
+
30
+ // Read the file into a BinaryData object
31
+ Console . WriteLine ( "Reading file..." ) ;
32
+ stepTimer . Start ( ) ;
33
+
34
+ using var stream = File . OpenRead ( filePath ) ;
35
+ byte [ ] buffer = new byte [ stream . Length ] ;
36
+ await stream . ReadAsync ( buffer , 0 , buffer . Length ) ;
37
+ var binaryData = BinaryData . FromBytes ( buffer ) ;
38
+
39
+ stepTimer . Stop ( ) ;
40
+ Console . WriteLine ( $ "File loaded into memory: { stepTimer . ElapsedMilliseconds } ms") ;
41
+
42
+ Console . WriteLine ( "Analyzing document with Azure Document Intelligence..." ) ;
43
+ stepTimer . Restart ( ) ;
44
+
45
+ // Analyze the document using the prebuilt-read model
46
+ var operation = await client . AnalyzeDocumentAsync (
47
+ WaitUntil . Completed ,
48
+ "prebuilt-read" ,
49
+ binaryData ) ;
50
+
51
+ var docResult = operation . Value ;
52
+
53
+ stepTimer . Stop ( ) ;
54
+ Console . WriteLine ( $ "Document analysis completed: { stepTimer . ElapsedMilliseconds } ms") ;
55
+
56
+ stepTimer . Restart ( ) ;
57
+ Console . WriteLine ( "Loading SharpVector database..." ) ;
58
+
59
+ foreach ( var page in docResult . Pages )
60
+ {
61
+ var sb = new StringBuilder ( ) ;
62
+ foreach ( var line in page . Lines )
63
+ {
64
+ sb . AppendLine ( line . Content ) ;
65
+ }
66
+
67
+ // Add the text to the vector database
68
+ // Let's use the Page Number as the metadata
69
+ // Note: In a real-world scenario, you might want to use more meaningful metadata
70
+ var textMetadata = page . PageNumber . ToString ( ) ;
71
+ vdb . AddText ( sb . ToString ( ) , textMetadata ) ;
72
+ }
73
+
74
+ stepTimer . Stop ( ) ;
75
+ Console . WriteLine ( $ "SharpVector database loaded: { stepTimer . ElapsedMilliseconds } ms") ;
76
+
77
+
78
+
79
+
80
+
81
+ // Console.WriteLine("");
82
+ // Console.WriteLine("Loading PDF File into vector database...");
83
+ // stepTimer.Restart();
84
+ // // read pdf file with PdfPig locally
85
+ // var vdb2 = new BasicMemoryVectorDatabase();
86
+ // using (var pdfDocument = UglyToad.PdfPig.PdfDocument.Open(filePath))
87
+ // {
88
+ // foreach (var page in pdfDocument.GetPages())
89
+ // {
90
+ // // Add the text to the vector database
91
+ // // Let's use the Page Number as the metadata
92
+ // // Note: In a real-world scenario, you might want to use more meaningful metadata
93
+ // var metadata = page.Number.ToString();
94
+ // vdb.AddText(page.Text, metadata);
95
+ // }
96
+ // }
97
+ // stepTimer.Stop();
98
+ // Console.WriteLine($"Vector database loaded: {stepTimer.ElapsedMilliseconds} ms");
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+ Console . WriteLine ( "" ) ;
108
+ Console . WriteLine ( "Searching in SharpVector database for \" Azure ML\" with similarity score > 0.5..." ) ;
109
+ stepTimer . Restart ( ) ;
110
+
111
+ var query = "Azure ML" ;
112
+ var semanticResults = vdb . Search (
113
+ query ,
114
+ threshold : 0.5f // Set a threshold for the similarity score to only match results above this value
115
+ ) ;
116
+
117
+ stepTimer . Stop ( ) ;
118
+ Console . WriteLine ( $ "Search completed: { stepTimer . ElapsedMilliseconds } ms") ;
119
+
120
+
121
+ Console . WriteLine ( "Top Matching Results:" ) ;
122
+ foreach ( var result in semanticResults . Texts )
123
+ {
124
+ //var text = result.Text;
125
+ var metadata = result . Metadata ;
126
+ var similarity = result . VectorComparison ;
127
+ Console . WriteLine ( $ " - Page: { metadata } - Similarity: { similarity } ") ;
128
+ }
129
+
130
+
131
+ Console . WriteLine ( "" ) ;
132
+ Console . WriteLine ( "Searching in SharpVector database for \" Why use a Cloud Adoption Framework strategy\" , top 3 results..." ) ;
133
+ stepTimer . Restart ( ) ;
134
+
135
+ query = "Why use a Cloud Adoption Framework strategy" ;
136
+ semanticResults = vdb . Search (
137
+ query ,
138
+ pageCount : 3 // Set the number of top results to return
139
+ ) ;
140
+
141
+ stepTimer . Stop ( ) ;
142
+ Console . WriteLine ( $ "Search completed: { stepTimer . ElapsedMilliseconds } ms") ;
143
+
144
+
145
+ Console . WriteLine ( "Top Matching Results:" ) ;
146
+ foreach ( var result in semanticResults . Texts )
147
+ {
148
+ //var text = result.Text;
149
+ var metadata = result . Metadata ;
150
+ var similarity = result . VectorComparison ;
151
+ Console . WriteLine ( $ " - Page: { metadata } - Similarity: { similarity } ") ;
152
+ }
153
+
154
+ overallTimer . Stop ( ) ;
155
+ Console . WriteLine ( "" ) ;
156
+ Console . WriteLine ( $ "Overall processing time: { overallTimer . ElapsedMilliseconds } ms") ;
0 commit comments