mirror of
https://github.com/mukunku/ParquetViewer.git
synced 2026-06-21 12:48:09 +00:00
Compare commits
100 commits
main
...
v4.0.0-rel
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
273666202c | ||
|
|
58da66fec6 | ||
|
|
58237ae76f | ||
|
|
2f2767a7ac | ||
|
|
e0f4ffbd15 | ||
|
|
42688c8ea7 | ||
|
|
d9da431090 | ||
|
|
f21b342530 | ||
|
|
afc1716411 | ||
|
|
3c8990d9c8 | ||
|
|
491d949625 | ||
|
|
66e65efdb0 | ||
|
|
d6280f25cf | ||
|
|
9523ef3730 | ||
|
|
498d01b481 | ||
|
|
a6d74d4f54 | ||
|
|
e3eb7d1ed0 | ||
|
|
cbc5195820 | ||
|
|
4cdd0a63a6 | ||
|
|
b22374120f | ||
|
|
8405eab596 | ||
|
|
775e1c8d6e | ||
|
|
a882b13f3c | ||
|
|
d17b84b4ae | ||
|
|
6f99e53d16 | ||
|
|
823ca7c4a7 | ||
|
|
8af083d0b8 | ||
|
|
898bdc0e6b | ||
|
|
eb2c595b1e | ||
|
|
3ac6830186 | ||
|
|
eb16181c97 | ||
|
|
93084d405d | ||
|
|
83022a780b | ||
|
|
256fbe3ba4 | ||
|
|
cfdb9f4ad6 | ||
|
|
0ce5aea68c | ||
|
|
32def4efcd | ||
|
|
d651996fe1 | ||
|
|
ab60c83fac | ||
|
|
5598829b91 | ||
|
|
7ca9a26bbb | ||
|
|
5684fbd153 | ||
|
|
98e65f843e | ||
|
|
2dad1993b0 | ||
|
|
5c2107c6e4 | ||
|
|
8228a8344f | ||
|
|
c42d37e6e3 | ||
|
|
76b5ecd4ce | ||
|
|
a903a96e85 | ||
|
|
2a348133f8 | ||
|
|
86c431fcd6 | ||
|
|
1e07a838c2 | ||
|
|
006f63d02b | ||
|
|
b4af8c13da | ||
|
|
1216238c54 | ||
|
|
c2bd64939f | ||
|
|
2ac3ddbded | ||
|
|
ff298b8db6 | ||
|
|
fb94ce4cf4 | ||
|
|
333c5e36d2 | ||
|
|
f6994de30b |
||
|
|
d64e338a48 | ||
|
|
4517f61083 | ||
|
|
4ddd292fa5 | ||
|
|
d9d88d0380 | ||
|
|
d81faf32e4 | ||
|
|
43dc91a12f | ||
|
|
f139165091 | ||
|
|
19f3922eb6 | ||
|
|
7a7b1ea686 | ||
|
|
468751d9a7 | ||
|
|
0e2992a3b7 | ||
|
|
7477130a66 | ||
|
|
9b9e88cb84 | ||
|
|
c5ef222246 | ||
|
|
573be69574 | ||
|
|
098aab64a1 | ||
|
|
8c029a7b8b | ||
|
|
19aff50a99 | ||
|
|
13c6442469 | ||
|
|
150fad330a | ||
|
|
c63a9c581b | ||
|
|
0ce20851b6 | ||
|
|
e78b6fb99c | ||
|
|
6037e8818a | ||
|
|
f8916dda84 | ||
|
|
0e5bdc70cb | ||
|
|
076bcdee0d | ||
|
|
dd5f389ff3 | ||
|
|
1c30767d2e | ||
|
|
c48e7ceedc | ||
|
|
c1706b638c | ||
|
|
5df24f8fc7 | ||
|
|
8c7661cae9 | ||
|
|
7114689c3f | ||
|
|
1df9edc6f6 | ||
|
|
9593df5d34 | ||
|
|
02ab78410a | ||
|
|
aaf363ac61 | ||
|
|
27fbf32c4d |
119 changed files with 7076 additions and 2436 deletions
23
.github/ISSUE_TEMPLATE/translation_template.csv
vendored
23
.github/ISSUE_TEMPLATE/translation_template.csv
vendored
|
|
@ -59,6 +59,7 @@
|
|||
"MainForm.resx","newToolStripMenuItem.Text",,"&New","Yeni",""
|
||||
"MainForm.resx","openFolderToolStripMenuItem.Text",,"&Open Folder","Klasör Aç",""
|
||||
"MainForm.resx","openFolderToolStripMenuItem.ToolTipText",,"All parquet files in the folder must have the same schema","Klasördeki tüm parquet dosyaları aynı veri şemasına sahip olmalıdır",""
|
||||
"MainForm.resx","openQueryEditorToolToolStripMenuItem.Text",,"Query Editor (Beta)","Sorgu Editörü (Beta)",""
|
||||
"MainForm.resx","openToolStripMenuItem.Text",,"&Open File","Dosya Aç",""
|
||||
"MainForm.resx","outOfStatusBarLabel.Text",,"Out of:","Toplam:",""
|
||||
"MainForm.resx","recordsTextStatusBarLabel.Text",,"Results","Sonuç",""
|
||||
|
|
@ -75,9 +76,16 @@
|
|||
"MainForm.resx","userGuideToolStripMenuItem.Text",,"User Guide","Kullanıcı Kılavuzu",""
|
||||
"MetadataViewer.resx","$this.Text",,"Parquet Metadata Viewer","Parquet Metadata Önizleyicisi",""
|
||||
"MetadataViewer.resx","closeButton.Text",,"Close","Kapat",""
|
||||
"MetadataViewer.resx","copyRawThriftMetadataButton.Text",,"Copy Raw Metadata","Ham Metadatayı Kopyala",""
|
||||
"MetadataViewer.resx","copyRawThriftMetadataButton.ToolTip",,"Exports full, raw Thrift metadata to the clipboard.","Ham Thrift metadatasını panoya kopyalar",""
|
||||
"MetadataViewer.resx","loadingTab.Text",,"Loading...","Yükleniyor...",""
|
||||
"QueryEditor.resx","$this.Text",,"ParquetViewer - Query Editor (Powered by DuckDB)","ParquetViewer - Sorgu Editörü (DuckDB Desteğiyle)",""
|
||||
"QueryEditor.resx","copyTextMenuItem.Text",,"Copy","Kopyala",""
|
||||
"QueryEditor.resx","executeQueryButton.Text",,"Execute","İşle",""
|
||||
"QueryEditor.resx","pasteTextMenuItem.Text",,"Paste","Yapıştır",""
|
||||
"QueryEditor.resx","queryExecutionStatusLabel.Text",,"Running:","İşleniyor:",""
|
||||
"QueryEditor.resx","querySyntaxDocsButton.Text",,"Query Syntax","Sorgu Söz Dizimi",""
|
||||
"QueryEditor.resx","toolStripStatusLabel1.Text",,"Showing:","Gösterilen:",""
|
||||
"QueryEditor.resx","toolStripStatusLabel2.Text",,"Results","Sonuç",""
|
||||
"QueryEditor.resx","zoomPercentageDropDown.Text",,"Query Zoom: 100%","Sorgu Zumu: 100%",""
|
||||
"QuickPeekForm.resx","$this.Text",,"Quick Peek","Hızlı Önizleme",""
|
||||
"QuickPeekForm.resx","copyToClipboardToolStripMenuItem.Text",,"Copy to clipboard","Panoya kopyala",""
|
||||
"QuickPeekForm.resx","saveImageToFileButton.Text",,"Save as PNG","PNG olarak Kaydet",""
|
||||
|
|
@ -90,6 +98,7 @@
|
|||
"Errors.resx","CopyToClipboardErrorTitle","Shown when copying to the clipboard fails","Copy to clipboard failed","Panoya kopyalama başarısız oldu",""
|
||||
"Errors.resx","DecimalValueTooLargeErrorMessageFormat","Shown when a field contains decimal values outside the supported range for .NET","Field `{0}` with type DECIMAL({1}, {2}) contains values outside ParquetViewer's supported range between DECIMAL({3}, {4}) and DECIMAL({3}, 0)","DECIMAL({1}, {2}) tipli alan `{0}` ParquetViewer'ın desteklediği aralığın dışında bulunuyor: DECIMAL({3}, {4}) ve DECIMAL({3}, 0)",""
|
||||
"Errors.resx","DecimalValueTooLargeErrorTitle","Shown when a field contains decimal values outside the supported range for .NET","Decimal value too large","Decimal değeri çok büyük",""
|
||||
"Errors.resx","DecimalValueUnknownSizeTooLargeErrorMessageFormat","Shown when a field contains decimal values outside the supported range for .NET AND we don't know it's name, scale, or precision","Encountered DECIMAL data outside ParquetViewer's supported range between DECIMAL({3}, {4}) and DECIMAL({3}, 0)","ParquetViewer'ın desteklediği aralığın, DECIMAL({3}, {4}) ve DECIMAL({3}, 0), dışında bir DECIMAL değerine rastlanıldı",""
|
||||
"Errors.resx","ExportFailedErrorTitle","Shown when an exception is encountered during a file export","File export failed","Dosyaya aktarım başarısız oldu",""
|
||||
"Errors.resx","FieldListGenerationError","Generic error shown when the field list in the field selection dialog cannot be rendered for some reason","Something went wrong while generating the field list.","Alan listesi oluşturulurken bir hata oldu.",""
|
||||
"Errors.resx","FileAssociationFailedErrorMessageFormat","Shown when file association fails in the Help page","Something went wrong (Error code: {0}). Try running ParquetViewer as administrator and try again. ","Birşey ters gitti (Hata kodu: {0}). ParquetViewer'ı yönetici olarak çaliştırıp tekrar deneyin.",""
|
||||
|
|
@ -99,6 +108,8 @@
|
|||
"Errors.resx","InvalidDateFormatErrorTitle","Shown in the custom date format dialog when the user tries to save an invalid date","Invalid Date Format","Geçersiz Tarih Formatı",""
|
||||
"Errors.resx","InvalidQueryErrorMessage","Thrown when an invalid query is executed","The query doesn't seem to be valid. Please try again.","Sorgu geçerli gözükmüyor.",""
|
||||
"Errors.resx","InvalidQueryErrorTitle","Shown when the user inputs an invalid filter query and clicks Execute","Invalid Query","Geçersiz Sorgu",""
|
||||
"Errors.resx","ListsWithNullsErrorMessage","Shown when using the Query Editor and a List field in the result contains nulls","Lists with null values are not supported. Relevant rows have been removed.","null içeren Listeler desteklenmemektedir.",""
|
||||
"Errors.resx","ListsWithNullsErrorTitle","Shown when using the Query Editor and a List field in the result contains nulls","Could not read all records","Tüm kayıtlar okunamadı",""
|
||||
"Errors.resx","MalformedFieldErrorMessageFormat","Shown to users when ParquetViewer fails to read a specific field. It should prompt the user to open a bug ticket if they believe the file is valid","{0}
|
||||
|
||||
If you think the file is valid please consider opening an issue in the GitHub repo. See: Help → About","{0}
|
||||
|
|
@ -115,6 +126,8 @@ Eğer dosyanın aslında geçerli olduğunu düşünüyorsanız lutfen projenin
|
|||
"Errors.resx","NoValidParquetFilesFoundErrorMessage","Shown when the user tries to open a folder and all found parquet files are invalid","No valid parquet files found in folder. Invalid parquet files:","Geçerli parquet dosyası bulunamadı. Bulunan geçersiz dosyalar:",""
|
||||
"Errors.resx","OpenFileNoLongerExistsErrorMessageFormat","Shown in the unlikely event where the open file(s) were somehow deleted","The specified file/folder no longer exists: {0}Please try opening a new file or folder.","Belirtilen dosya/klasör artık bulunamıyor: {0}Lütfen başka bir dosya veya klasör açın.",""
|
||||
"Errors.resx","ParquetSchemaReadErrorMessage","Generic error shown when ParquetViewer is unable to read the schema of a parquet file","Could not read parquet schema.","Parquet şeması okunamadı.",""
|
||||
"Errors.resx","QueryExecutionErrorTitle","Shown in the Query Editor when the query fails to run due to an unexpected exception","Error executing query","Sorgu işlenirken bir hata oluştu",""
|
||||
"Errors.resx","RenderResultsErrorTitle","Shown when values can't be converted/shown in the Query Editor page","Error rendering results","Sonuçlar gösterilirken bir hata oluştu",""
|
||||
"Errors.resx","SelectAtLeastOneFieldErrorMessage","Shown when the user tries to close the field selection dialog without selecting any fields","Please select at least one field","En az bir alan seçin",""
|
||||
"Errors.resx","SelectAtLeastOneFieldErrorTitle","Shown when the user tries to close the field selection dialog without selecting any fields","Error","Hata",""
|
||||
"Errors.resx","SomeInvalidParquetFilesFoundErrorMessage","Shown when the user tries to open a folder and some of the found parquet files are invalid","Some files could not be loaded. Invalid Parquet files:","Bazı parquet dosyaları yüklenemedi. Bulunan geçersiz dosyalar:",""
|
||||
|
|
@ -148,6 +161,8 @@ You can always change this setting later from the Help menu.","Anonim kullanım
|
|||
|
||||
Tercihinizi Yardım menüsunden her zaman değiştirebilirsiniz.",""
|
||||
"Strings.resx","AnalyticsConsentPromptTitle","Message box title that is shown when asking the user if we can gather analytics data","Share Anonymous Usage Data?","Anonim Kullanım Verisi Toplansın mı?",""
|
||||
"Strings.resx","ByteArraysNotSupportedErrorMessage","Shown when a byte[] type is in the results in the Query Editor window. As these types are currently not supported by DuckDB.","Unfortunately byte[] types are currently unsupported in query results. Values will be shown as null.","Ne yazık ki byte[] alanlar sorgu sonuçlarında gösterilememektedir. Değerleri null olarak gösterilecektir.",""
|
||||
"Strings.resx","ByteArraysNotSupportedErrorTitle","Shown when a byte[] type is in the results in the Query Editor window. As these types are currently not supported by DuckDB.","Byte arrays not supported","Byte[] tipli sonuçlar desteklenmemektedir",""
|
||||
"Strings.resx","CancelButtonText","Text to be shown on cancel buttons","Cancel","İptal Et",""
|
||||
"Strings.resx","CancelInitiatedLabelText","Text to be shown between the time a user initiates a cancel operation and when it completes.","Cancelling...","İptal ediliyor...",""
|
||||
"Strings.resx","CantGoBackLinkButtonText","Text shown when the quick peek window's link to go back to the source cell is clicked but the source cell no longer exists","can't go back","geri gidilemiyor",""
|
||||
|
|
@ -209,6 +224,8 @@ Bu ayarı Yardim → Hakkinda sayfasında da yapabilirsiniz.",""
|
|||
"Strings.resx","MetadataSuccessfullyExportedToFileMessageFormat","Shown when raw thrift metadata is successfully exported to a file","Metadata successfully exported to: {0}","Metadata başarıyla kaydedildi: {0}",""
|
||||
"Strings.resx","MetadataSuccessfullyExportedToFileMessageTitle","Shown when raw thrift metadata is successfully exported to a file","Export complete","Kayıt başarılı",""
|
||||
"Strings.resx","PrivacyPolicyLabelText","Title to be shown before the privacy policy","Privacy policy","Gizlilik politikası",""
|
||||
"Strings.resx","QueryFinishedStatusText","Shown in the Query Editor status bar when the query finishes executing","Finished in:","Tamamlandı:",""
|
||||
"Strings.resx","QueryRunningStatusText","Shown in the Query Editor tool when the query is running","Running:","İşleniyor:",""
|
||||
"Strings.resx","QuerySyntaxHelpText","Shown when the user clicks on the Filter Query (?): link button","NULL CHECK:
|
||||
WHERE field_name IS NULL
|
||||
WHERE field_name IS NOT NULL
|
||||
|
|
@ -249,6 +266,7 @@ BIRDEN FAZLA KOŞUL:
|
|||
|
||||
Daha fazla bilgi için: 'Hakkında → Kullanıcı Kılavuzu'",""
|
||||
"Strings.resx","QuerySyntaxHelpTitle","Shown when the user clicks on the Filter Query (?): link button","Filter Query Syntax Examples","Sorgu Filtresi Söz Dizim Örnekleri",""
|
||||
"Strings.resx","QueryZoomStatusTextFormat","Text format used in the Query Editor tool to display the current zoom level","Query Zoom: {0}%","Sorgu Zumu: {0}%",""
|
||||
"Strings.resx","RecordsToBeExportedTitleFormat","Title shown on the save file dialog when the user is exporting data","{0} records will be exported","{0} kayıt kaydedilecektir",""
|
||||
"Strings.resx","SaveImageAsButtonText","Shown on the save image to file button in the quick peek form","Save image as {0}","Resmi {0} olarak kaydet",""
|
||||
"Strings.resx","SaveImageToFileButtonTextFormat","Text template for the button to save an image preview to disk","Save as {0}","{0} olarak Kaydet",""
|
||||
|
|
@ -264,4 +282,3 @@ Onun yerine sonuçlari {2} dosyasına aktarmak ister misiniz?",""
|
|||
"Strings.resx","TooManyFieldsErrorFormat","Shown on the Field Selection Dialog when there are too many fields to filter by","Too many fields: {0}","Desteklenmeyen sayıda alan: {0}",""
|
||||
"Strings.resx","TypeText","Shown in the title bar of quick peek windows for image previews","Type","Format",""
|
||||
"Strings.resx","UnsupportedFieldCountTextFormat","Shown in the field selection dialog to indicate how many fields are unsupported by ParquetViewer","Unsupported: {0}","Desteklenmeyen: {0}",""
|
||||
"Strings.resx","UnsupportedFieldText","Shown in the field selection dialog next to field names to indicate the field is not supported for viewing with ParquetViewer","(Unsupported)","(Desteklenmiyor)",""
|
||||
|
|
|
|||
|
22
.github/workflows/build-test-publish.yaml
vendored
22
.github/workflows/build-test-publish.yaml
vendored
|
|
@ -108,20 +108,22 @@ jobs:
|
|||
search-text: 'private const string AMPLITUDE_API_KEY = "";'
|
||||
replacement-text: 'private const string AMPLITUDE_API_KEY = "${{ secrets.AMPLITUDE_API_KEY }}";'
|
||||
|
||||
- name: Build & Publish Regular Release
|
||||
run: |
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release -f net8.0-windows --nologo -o publish -r win-x64 --no-self-contained
|
||||
Get-Item "./publish/ParquetViewer.exe" | Select-Object Name, Length
|
||||
# With DuckDB, the regular release is close to 40MB which defeats the purpose of having this version of the release.
|
||||
# Starting with v4.0.0 we're switching to only publishing the self-contained version.
|
||||
#- name: Build & Publish Regular Release
|
||||
# run: |
|
||||
# dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release -f net10.0-windows --nologo -o publish -r win-x64 --no-self-contained
|
||||
# Get-Item "./publish/ParquetViewer.exe" | Select-Object Name, Length
|
||||
|
||||
- name: Build & Publish SelfContained Release
|
||||
run: |
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release_SelfContained -f net8.0-windows --nologo -o publish_selfcontained -r win-x64 --self-contained
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release_SelfContained -f net10.0-windows --nologo -o publish_selfcontained -r win-x64 --self-contained
|
||||
Get-Item "./publish_selfcontained/ParquetViewer.exe" | Select-Object Name, Length
|
||||
|
||||
- name: Prepare executables for upload
|
||||
run: |
|
||||
Move-Item -Path "publish/ParquetViewer.exe" -Destination "./ParquetViewer.exe"
|
||||
Move-Item -Path "publish_selfcontained/ParquetViewer.exe" -Destination "./ParquetViewer_SelfContained.exe"
|
||||
#Move-Item -Path "publish/ParquetViewer.exe" -Destination "./ParquetViewer.exe"
|
||||
Move-Item -Path "publish_selfcontained/ParquetViewer.exe" -Destination "./ParquetViewer.exe"
|
||||
|
||||
- name: Upload unsigned artifact for signing
|
||||
id: upload-unsigned-artifact
|
||||
|
|
@ -129,12 +131,12 @@ jobs:
|
|||
with:
|
||||
path: |
|
||||
ParquetViewer.exe
|
||||
ParquetViewer_SelfContained.exe
|
||||
#ParquetViewer_SelfContained.exe
|
||||
|
||||
- name: Remove unsigned exe's for safety
|
||||
run: |
|
||||
Remove-Item -Path "ParquetViewer.exe"
|
||||
Remove-Item -Path "ParquetViewer_SelfContained.exe"
|
||||
#Remove-Item -Path "ParquetViewer_SelfContained.exe"
|
||||
|
||||
# Documentation: https://about.signpath.io/documentation/trusted-build-systems/github
|
||||
- name: Submit signing request to SignPath.io
|
||||
|
|
@ -150,7 +152,7 @@ jobs:
|
|||
|
||||
- uses: ncipollo/release-action@v1
|
||||
with:
|
||||
artifacts: "signed-package/ParquetViewer.exe,signed-package/ParquetViewer_SelfContained.exe"
|
||||
artifacts: "signed-package/ParquetViewer.exe" #",signed-package/ParquetViewer_SelfContained.exe"
|
||||
body: "PR: #${{ env.PR_NUMBER }}"
|
||||
allowUpdates: ${{ env.BRANCH_NAME != 'main' }}
|
||||
omitBodyDuringUpdate: true
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ jobs:
|
|||
Write-Host "Found English file: $englishFilePath"
|
||||
[xml]$englishContent = Get-Content -Path $englishFilePath
|
||||
$englishContent.root.data | ForEach-Object {
|
||||
if (-not $_.HasAttribute('type')) {
|
||||
if ((-not $_.HasAttribute('type')) -and (-not $_.HasAttribute('mimetype'))) {
|
||||
$englishData[$_.name] = $_.value
|
||||
$englishComments[$_.name] = $_.comment
|
||||
}
|
||||
|
|
|
|||
230
src/.editorconfig
Normal file
230
src/.editorconfig
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
root = true
|
||||
|
||||
# C# files
|
||||
[*.cs]
|
||||
|
||||
#### Core EditorConfig Options ####
|
||||
|
||||
# Indentation and spacing
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
tab_width = 4
|
||||
|
||||
# New line preferences
|
||||
end_of_line = crlf
|
||||
insert_final_newline = false
|
||||
|
||||
#### .NET Coding Conventions ####
|
||||
|
||||
# Organize usings
|
||||
dotnet_separate_import_directive_groups = false
|
||||
dotnet_sort_system_directives_first = false
|
||||
file_header_template = unset
|
||||
|
||||
# this. and Me. preferences
|
||||
dotnet_style_qualification_for_event = false
|
||||
dotnet_style_qualification_for_field = false
|
||||
dotnet_style_qualification_for_method = false
|
||||
dotnet_style_qualification_for_property = false
|
||||
|
||||
# Language keywords vs BCL types preferences
|
||||
dotnet_style_predefined_type_for_locals_parameters_members = true
|
||||
dotnet_style_predefined_type_for_member_access = true
|
||||
|
||||
# Parentheses preferences
|
||||
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_operators = never_if_unnecessary
|
||||
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity
|
||||
|
||||
# Modifier preferences
|
||||
dotnet_style_require_accessibility_modifiers = for_non_interface_members
|
||||
|
||||
# Expression-level preferences
|
||||
dotnet_style_coalesce_expression = true
|
||||
dotnet_style_collection_initializer = true
|
||||
dotnet_style_explicit_tuple_names = true
|
||||
dotnet_style_namespace_match_folder = true
|
||||
dotnet_style_null_propagation = true
|
||||
dotnet_style_object_initializer = true
|
||||
dotnet_style_operator_placement_when_wrapping = beginning_of_line
|
||||
dotnet_style_prefer_auto_properties = true
|
||||
dotnet_style_prefer_collection_expression = when_types_loosely_match
|
||||
dotnet_style_prefer_compound_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_return = true
|
||||
dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed
|
||||
dotnet_style_prefer_inferred_anonymous_type_member_names = true
|
||||
dotnet_style_prefer_inferred_tuple_names = true
|
||||
dotnet_style_prefer_is_null_check_over_reference_equality_method = true
|
||||
dotnet_style_prefer_simplified_boolean_expressions = true
|
||||
dotnet_style_prefer_simplified_interpolation = true
|
||||
|
||||
# Field preferences
|
||||
dotnet_style_readonly_field = true
|
||||
|
||||
# Parameter preferences
|
||||
dotnet_code_quality_unused_parameters = all:silent
|
||||
|
||||
# Suppression preferences
|
||||
dotnet_remove_unnecessary_suppression_exclusions = none
|
||||
|
||||
# New line preferences
|
||||
dotnet_style_allow_multiple_blank_lines_experimental = true
|
||||
dotnet_style_allow_statement_immediately_after_block_experimental = true
|
||||
|
||||
#### C# Coding Conventions ####
|
||||
|
||||
# var preferences
|
||||
csharp_style_var_elsewhere = false
|
||||
csharp_style_var_for_built_in_types = false
|
||||
csharp_style_var_when_type_is_apparent = false
|
||||
|
||||
# Expression-bodied members
|
||||
csharp_style_expression_bodied_accessors = true
|
||||
csharp_style_expression_bodied_constructors = false
|
||||
csharp_style_expression_bodied_indexers = true
|
||||
csharp_style_expression_bodied_lambdas = true
|
||||
csharp_style_expression_bodied_local_functions = false
|
||||
csharp_style_expression_bodied_methods = false
|
||||
csharp_style_expression_bodied_operators = false
|
||||
csharp_style_expression_bodied_properties = true
|
||||
|
||||
# Pattern matching preferences
|
||||
csharp_style_pattern_matching_over_as_with_null_check = true
|
||||
csharp_style_pattern_matching_over_is_with_cast_check = true
|
||||
csharp_style_prefer_extended_property_pattern = true
|
||||
csharp_style_prefer_not_pattern = true
|
||||
csharp_style_prefer_pattern_matching = true
|
||||
csharp_style_prefer_switch_expression = true
|
||||
|
||||
# Null-checking preferences
|
||||
csharp_style_conditional_delegate_call = true
|
||||
|
||||
# Modifier preferences
|
||||
csharp_prefer_static_local_function = true
|
||||
csharp_preferred_modifier_order = public,private,protected,internal,file,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,required,volatile,async
|
||||
csharp_style_prefer_readonly_struct = true
|
||||
csharp_style_prefer_readonly_struct_member = true
|
||||
|
||||
# Code-block preferences
|
||||
csharp_prefer_braces = true
|
||||
csharp_prefer_simple_using_statement = true
|
||||
csharp_style_namespace_declarations = block_scoped
|
||||
csharp_style_prefer_method_group_conversion = true
|
||||
csharp_style_prefer_primary_constructors = true
|
||||
csharp_style_prefer_top_level_statements = true
|
||||
|
||||
# Expression-level preferences
|
||||
csharp_prefer_simple_default_expression = true
|
||||
csharp_style_deconstructed_variable_declaration = true
|
||||
csharp_style_implicit_object_creation_when_type_is_apparent = true
|
||||
csharp_style_inlined_variable_declaration = true
|
||||
csharp_style_prefer_index_operator = true
|
||||
csharp_style_prefer_local_over_anonymous_function = true
|
||||
csharp_style_prefer_null_check_over_type_check = true
|
||||
csharp_style_prefer_range_operator = true
|
||||
csharp_style_prefer_tuple_swap = true
|
||||
csharp_style_prefer_utf8_string_literals = true
|
||||
csharp_style_throw_expression = true
|
||||
csharp_style_unused_value_assignment_preference = discard_variable
|
||||
csharp_style_unused_value_expression_statement_preference = discard_variable
|
||||
|
||||
# 'using' directive preferences
|
||||
csharp_using_directive_placement = outside_namespace
|
||||
|
||||
# New line preferences
|
||||
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true
|
||||
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true
|
||||
csharp_style_allow_embedded_statements_on_same_line_experimental = true
|
||||
|
||||
#### C# Formatting Rules ####
|
||||
|
||||
# New line preferences
|
||||
csharp_new_line_before_catch = true
|
||||
csharp_new_line_before_else = true
|
||||
csharp_new_line_before_finally = true
|
||||
csharp_new_line_before_members_in_anonymous_types = true
|
||||
csharp_new_line_before_members_in_object_initializers = true
|
||||
csharp_new_line_before_open_brace = all
|
||||
csharp_new_line_between_query_expression_clauses = true
|
||||
|
||||
# Indentation preferences
|
||||
csharp_indent_block_contents = true
|
||||
csharp_indent_braces = false
|
||||
csharp_indent_case_contents = true
|
||||
csharp_indent_case_contents_when_block = true
|
||||
csharp_indent_labels = one_less_than_current
|
||||
csharp_indent_switch_labels = true
|
||||
|
||||
# Space preferences
|
||||
csharp_space_after_cast = false
|
||||
csharp_space_after_colon_in_inheritance_clause = true
|
||||
csharp_space_after_comma = true
|
||||
csharp_space_after_dot = false
|
||||
csharp_space_after_keywords_in_control_flow_statements = true
|
||||
csharp_space_after_semicolon_in_for_statement = true
|
||||
csharp_space_around_binary_operators = before_and_after
|
||||
csharp_space_around_declaration_statements = false
|
||||
csharp_space_before_colon_in_inheritance_clause = true
|
||||
csharp_space_before_comma = false
|
||||
csharp_space_before_dot = false
|
||||
csharp_space_before_open_square_brackets = false
|
||||
csharp_space_before_semicolon_in_for_statement = false
|
||||
csharp_space_between_empty_square_brackets = false
|
||||
csharp_space_between_method_call_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_call_name_and_opening_parenthesis = false
|
||||
csharp_space_between_method_call_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_name_and_open_parenthesis = false
|
||||
csharp_space_between_method_declaration_parameter_list_parentheses = false
|
||||
csharp_space_between_parentheses = false
|
||||
csharp_space_between_square_brackets = false
|
||||
|
||||
# Wrapping preferences
|
||||
csharp_preserve_single_line_blocks = true
|
||||
csharp_preserve_single_line_statements = true
|
||||
|
||||
#### Naming styles ####
|
||||
|
||||
# Naming rules
|
||||
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i
|
||||
|
||||
dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.types_should_be_pascal_case.symbols = types
|
||||
dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case
|
||||
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case
|
||||
|
||||
# Symbol specifications
|
||||
|
||||
dotnet_naming_symbols.interface.applicable_kinds = interface
|
||||
dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.interface.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
|
||||
dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.types.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
|
||||
dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.non_field_members.required_modifiers =
|
||||
|
||||
# Naming styles
|
||||
|
||||
dotnet_naming_style.pascal_case.required_prefix =
|
||||
dotnet_naming_style.pascal_case.required_suffix =
|
||||
dotnet_naming_style.pascal_case.word_separator =
|
||||
dotnet_naming_style.pascal_case.capitalization = pascal_case
|
||||
|
||||
dotnet_naming_style.begins_with_i.required_prefix = I
|
||||
dotnet_naming_style.begins_with_i.required_suffix =
|
||||
dotnet_naming_style.begins_with_i.word_separator =
|
||||
dotnet_naming_style.begins_with_i.capitalization = pascal_case
|
||||
|
|
@ -5,6 +5,8 @@
|
|||
<ItemGroup>
|
||||
<PackageVersion Include="Apache.Arrow" Version="22.1.0" />
|
||||
<PackageVersion Include="dotnet-file-associator" Version="0.1.4" />
|
||||
<PackageVersion Include="DuckDB.NET.Data.Full" Version="1.4.3" />
|
||||
<PackageVersion Include="FCTB" Version="2.16.24" />
|
||||
<PackageVersion Include="MiniExcel" Version="2.0.0-preview.2" />
|
||||
<PackageVersion Include="MSTest.TestAdapter" Version="4.0.2" />
|
||||
<PackageVersion Include="MSTest.TestFramework" Version="4.0.2" />
|
||||
|
|
|
|||
43
src/ParquetViewer.Engine.DuckDB/DuckDBHandle.cs
Normal file
43
src/ParquetViewer.Engine.DuckDB/DuckDBHandle.cs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class DuckDBHandle : IDisposable
|
||||
{
|
||||
public string ParquetFilePath { get; }
|
||||
public DuckDBConnection Connection { get; }
|
||||
|
||||
private DuckDBHandle(DuckDBConnection connection, string parquetPath)
|
||||
{
|
||||
ParquetFilePath = parquetPath;
|
||||
Connection = connection;
|
||||
}
|
||||
|
||||
public static async Task<DuckDBHandle> OpenAsync(string parquetPath)
|
||||
{
|
||||
if (!File.Exists(parquetPath)) //handles null
|
||||
throw new FileNotFoundException(parquetPath);
|
||||
|
||||
var connection = new DuckDBConnection("Data Source=:memory:");
|
||||
try
|
||||
{
|
||||
await connection.OpenAsync();
|
||||
return new DuckDBHandle(connection, parquetPath);
|
||||
}
|
||||
catch
|
||||
{
|
||||
connection.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
Connection.Dispose();
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
135
src/ParquetViewer.Engine.DuckDB/DuckDBHelper.cs
Normal file
135
src/ParquetViewer.Engine.DuckDB/DuckDBHelper.cs
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
using DuckDB.NET.Data;
|
||||
using DuckDB.NET.Native;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Numerics;
|
||||
using System.Text;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal static class DuckDBHelper
|
||||
{
|
||||
internal record DuckDBField(string Name, DuckDBType DuckDBType, Type Type);
|
||||
|
||||
public static async Task<List<DuckDBField>> GetFields(DuckDBHandle db)
|
||||
{
|
||||
var fields = new List<DuckDBField>();
|
||||
using var result = await db.Connection.QueryAsync($"DESCRIBE TABLE '{db.ParquetFilePath}';");
|
||||
await foreach (var row in result)
|
||||
{
|
||||
var columnName = row.GetString(0);
|
||||
var columnTypeName = row.GetString(1);
|
||||
var (duckDBType, clrType) = ParseDuckDBType(columnTypeName, columnTypeName);
|
||||
|
||||
fields.Add(new(columnName, duckDBType, clrType));
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
public static (DuckDBType DuckDBType, Type Type) ParseDuckDBType(string duckDBTypeName, string? columnTypeName)
|
||||
{
|
||||
//Sometimes the duckdb type is reported as NULL, in which case we need to fall back to the column type name.
|
||||
//Values here seem to match the Parquet format's supported types: https://parquet.apache.org/docs/file-format/types/
|
||||
if (duckDBTypeName.Trim('"') == "NULL" && columnTypeName is not null)
|
||||
{
|
||||
return columnTypeName switch
|
||||
{
|
||||
"BOOLEAN" => (DuckDBType.Boolean, typeof(bool)),
|
||||
"INT32" => (DuckDBType.Integer, typeof(int)),
|
||||
"INT64" => (DuckDBType.BigInt, typeof(long)),
|
||||
"INT96" => (DuckDBType.HugeInt, typeof(BigInteger)),
|
||||
"FLOAT" => (DuckDBType.Float, typeof(float)),
|
||||
"DOUBLE" => (DuckDBType.Double, typeof(double)),
|
||||
"FIXED_LEN_BYTE_ARRAY" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
"BYTE_ARRAY" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(columnTypeName), $"Unsupported Parquet column type: {columnTypeName}"),
|
||||
};
|
||||
}
|
||||
|
||||
// This mapping is based on https://duckdb.net/docs/type-mapping.html
|
||||
// It handles simple types and parameterized types by checking the start of the string.
|
||||
if (duckDBTypeName.EndsWith("[]")) return (DuckDBType.List, typeof(List<>));
|
||||
|
||||
if (duckDBTypeName.StartsWith("DECIMAL")) return (DuckDBType.Decimal, typeof(decimal));
|
||||
if (duckDBTypeName.StartsWith("VARCHAR")) return (DuckDBType.Varchar, typeof(string));
|
||||
if (duckDBTypeName.StartsWith("LIST")) return (DuckDBType.List, typeof(List<>));
|
||||
if (duckDBTypeName.StartsWith("MAP")) return (DuckDBType.Map, typeof(Dictionary<,>));
|
||||
if (duckDBTypeName.StartsWith("STRUCT")) return (DuckDBType.Struct, typeof(ValueTuple));
|
||||
if (duckDBTypeName.StartsWith("ENUM")) return (DuckDBType.Enum, typeof(string));
|
||||
if (duckDBTypeName.StartsWith("TIMESTAMP")) return (DuckDBType.Timestamp, typeof(DateTime));
|
||||
|
||||
return duckDBTypeName switch
|
||||
{
|
||||
"BOOLEAN" => (DuckDBType.Boolean, typeof(bool)),
|
||||
"TINYINT" => (DuckDBType.TinyInt, typeof(sbyte)),
|
||||
"SMALLINT" => (DuckDBType.SmallInt, typeof(short)),
|
||||
"INTEGER" => (DuckDBType.Integer, typeof(int)),
|
||||
"BIGINT" => (DuckDBType.BigInt, typeof(long)),
|
||||
"HUGEINT" => (DuckDBType.BigInt, typeof(BigInteger)),
|
||||
"UTINYINT" => (DuckDBType.UnsignedTinyInt, typeof(byte)),
|
||||
"USMALLINT" => (DuckDBType.UnsignedSmallInt, typeof(ushort)),
|
||||
"UINTEGER" => (DuckDBType.UnsignedInteger, typeof(uint)),
|
||||
"UBIGINT" => (DuckDBType.UnsignedBigInt, typeof(ulong)),
|
||||
"UHUGEINT" => (DuckDBType.HugeInt, typeof(BigInteger)),
|
||||
"DOUBLE" => (DuckDBType.Double, typeof(double)),
|
||||
"FLOAT" or "REAL" => (DuckDBType.Float, typeof(float)),
|
||||
"BLOB" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
"DATE" => (DuckDBType.Date, typeof(DateOnly)),
|
||||
"TIME" => (DuckDBType.Time, typeof(TimeSpan)),
|
||||
"INTERVAL" => (DuckDBType.Interval, typeof(TimeSpan)),
|
||||
"UUID" => (DuckDBType.Uuid, typeof(Guid)),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(duckDBTypeName), $"Unsupported DuckDB type: {duckDBTypeName}({columnTypeName})")
|
||||
};
|
||||
}
|
||||
|
||||
//DuckDB flattens the schema so we need to rebuild it into a tree structure.
|
||||
public static async Task<ParquetSchemaElement> GetParquetSchemaTreeAsync(DuckDBHandle db)
|
||||
{
|
||||
var result = await db.Connection.QueryAsync($"SELECT * FROM parquet_schema('{db.ParquetFilePath}');");
|
||||
var enumerator = result.GetAsyncEnumerator();
|
||||
|
||||
if (!await enumerator.MoveNextAsync())
|
||||
{
|
||||
throw new InvalidDataException("Failed to retrieve Parquet schema.");
|
||||
}
|
||||
|
||||
var rootNode = ParquetSchemaElement.FromRow(enumerator.Current);
|
||||
await ReadChildrenAsync(rootNode, enumerator);
|
||||
return rootNode;
|
||||
|
||||
async Task ReadChildrenAsync(ParquetSchemaElement parent, IAsyncEnumerator<DuckDBDataReader> enumerator)
|
||||
{
|
||||
for (int i = 0; i < parent.NumChildren; i++)
|
||||
{
|
||||
if (!await enumerator.MoveNextAsync())
|
||||
{
|
||||
throw new InvalidDataException($"Premature end to parquet schema for field `{parent.Path}`.");
|
||||
}
|
||||
|
||||
var childNode = ParquetSchemaElement.FromRow(enumerator.Current);
|
||||
parent.Children.Add(childNode);
|
||||
await ReadChildrenAsync(childNode, enumerator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<Dictionary<string, string>> GetCustomMetadataAsync(DuckDBHandle db)
|
||||
{
|
||||
var query = $"SELECT * FROM parquet_kv_metadata('{db.ParquetFilePath}');";
|
||||
var metadata = new Dictionary<string, string>();
|
||||
using var result = await db.Connection.QueryAsync(query);
|
||||
await foreach (var row in result)
|
||||
{
|
||||
var keyStream = await row.GetFieldValueAsync<Stream>(1);
|
||||
var valueStream = await row.GetFieldValueAsync<Stream>(2);
|
||||
|
||||
using var keyReader = new StreamReader(keyStream, Encoding.UTF8);
|
||||
string key = await keyReader.ReadToEndAsync();
|
||||
|
||||
using var valueReader = new StreamReader(valueStream, Encoding.UTF8);
|
||||
string value = await valueReader.ReadToEndAsync();
|
||||
metadata[key] = value;
|
||||
}
|
||||
return metadata;
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/ParquetViewer.Engine.DuckDB/ExtensionMethods.cs
Normal file
24
src/ParquetViewer.Engine.DuckDB/ExtensionMethods.cs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal static class ExtensionMethods
|
||||
{
|
||||
public static async Task<QueryResult> QueryAsync(this DuckDBConnection db, string sql)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(db);
|
||||
ArgumentNullException.ThrowIfNull(sql);
|
||||
|
||||
if (db.State == System.Data.ConnectionState.Closed)
|
||||
{
|
||||
await db.OpenAsync();
|
||||
}
|
||||
|
||||
using var command = db.CreateCommand();
|
||||
command.CommandText = sql;
|
||||
|
||||
var reader = command.ExecuteReader();
|
||||
return new QueryResult(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
463
src/ParquetViewer.Engine.DuckDB/ParquetEngine.cs
Normal file
463
src/ParquetViewer.Engine.DuckDB/ParquetEngine.cs
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
using DuckDB.NET.Data;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Data;
|
||||
using static ParquetViewer.Engine.DuckDB.DuckDBHelper;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetEngine : IParquetEngine
|
||||
{
|
||||
private readonly List<DuckDBHandle> _dbs;
|
||||
private readonly List<ParquetMetadata> _metadatas;
|
||||
|
||||
public string Path { get; set; }
|
||||
|
||||
public List<string> Fields => this._fields.Select(f => f.Name).ToList();
|
||||
|
||||
public long RecordCount { get; }
|
||||
|
||||
public int NumberOfPartitions => this._dbs.Count;
|
||||
|
||||
public Dictionary<string, string> CustomMetadata { get; }
|
||||
|
||||
public IParquetMetadata Metadata => this._metadatas.First();
|
||||
|
||||
private List<DuckDBField> _fields;
|
||||
|
||||
private static int GetFieldsHashCode(List<DuckDBField> fields)
|
||||
{
|
||||
var hashCode = new HashCode();
|
||||
foreach (var field in fields)
|
||||
{
|
||||
hashCode.Add(field.Name);
|
||||
hashCode.Add(field.Type);
|
||||
hashCode.Add(field.DuckDBType);
|
||||
}
|
||||
return hashCode.ToHashCode();
|
||||
}
|
||||
|
||||
private ParquetEngine(string filePath, DuckDBHandle db, ParquetMetadata metadata, List<DuckDBField> fields, long recordCount, Dictionary<string, string> customMetadata)
|
||||
{
|
||||
this._dbs = [db];
|
||||
this.Path = filePath;
|
||||
this._metadatas = [metadata];
|
||||
this._fields = FilterOutFieldsThatDontExist(fields, metadata);
|
||||
this.RecordCount = recordCount;
|
||||
this.CustomMetadata = customMetadata;
|
||||
}
|
||||
|
||||
private ParquetEngine(string folderPath, List<DuckDBHandle> dbs, List<ParquetMetadata> metadatas, List<DuckDBField> fields, long recordCount, Dictionary<string, string> customMetadata)
|
||||
{
|
||||
this._dbs = dbs;
|
||||
this.Path = folderPath;
|
||||
this._metadatas = metadatas;
|
||||
this._fields = FilterOutFieldsThatDontExist(fields, this._metadatas.First());
|
||||
this.RecordCount = recordCount;
|
||||
this.CustomMetadata = customMetadata;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DuckDB sometimes returns fields from the DESCRIBE TABLE query that don't actually exist in the Parquet file.
|
||||
/// </summary>
|
||||
/// <returns>Returns a new list with fields that actually exist in the parquet file.</returns>
|
||||
/// <remarks>Fixes PARTITIONED_PARQUET_FILE_TEST</remarks>
|
||||
private static List<DuckDBField> FilterOutFieldsThatDontExist(List<DuckDBField> fields, ParquetMetadata metadata)
|
||||
{
|
||||
var fieldsThatExist = new List<DuckDBField>();
|
||||
foreach (var field in fields)
|
||||
{
|
||||
if (metadata.SchemaTree.Children.Cast<IParquetSchemaElement>().Any(f => f.Path == field.Name))
|
||||
{
|
||||
fieldsThatExist.Add(field);
|
||||
}
|
||||
}
|
||||
return fieldsThatExist;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(parquetFilePath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(parquetFilePath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException(parquetFilePath);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
var db = await DuckDBHandle.OpenAsync(parquetFilePath);
|
||||
try
|
||||
{
|
||||
var parquetMetadata = await ParquetMetadata.FromDuckDBAsync(db);
|
||||
var fields = await DuckDBHelper.GetFields(db);
|
||||
var customMetadata = await DuckDBHelper.GetCustomMetadataAsync(db);
|
||||
return new ParquetEngine(parquetFilePath, db, parquetMetadata, fields.ToList(), parquetMetadata.RowCount, customMetadata);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
db.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<int, List<DuckDBHandle>>();
|
||||
foreach (var file in Helpers.ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var db = await DuckDBHandle.OpenAsync(file);
|
||||
var fileFields = await DuckDBHelper.GetFields(db);
|
||||
var fieldsHashCode = GetFieldsHashCode(fileFields);
|
||||
if (!fileGroups.ContainsKey(fieldsHashCode))
|
||||
{
|
||||
fileGroups.Add(fieldsHashCode, new List<DuckDBHandle>());
|
||||
}
|
||||
|
||||
fileGroups[fieldsHashCode].Add(db);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(System.IO.Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
Helpers.EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
var fieldsByFile = new List<List<string>>();
|
||||
foreach (var db in fileGroups.Values)
|
||||
{
|
||||
var groupFields = await DuckDBHelper.GetFields(db.First());
|
||||
fieldsByFile.Add(groupFields.Select(f => f.Name).ToList());
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fieldsByFile);
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
Helpers.EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
//We have only one schema across all files and are good to go
|
||||
List<DuckDBHandle> dbs = fileGroups.Values.First();
|
||||
|
||||
var metadatas = new List<ParquetMetadata>();
|
||||
foreach (var db in dbs)
|
||||
{
|
||||
var metadata = await ParquetMetadata.FromDuckDBAsync(db);
|
||||
metadatas.Add(metadata);
|
||||
}
|
||||
|
||||
var totalRecordCount = metadatas.Sum(m => m.RowCount);
|
||||
var fields = await DuckDBHelper.GetFields(dbs.First());
|
||||
var customMetadata = await DuckDBHelper.GetCustomMetadataAsync(dbs.First());
|
||||
|
||||
return new ParquetEngine(folderPath, dbs, metadatas, fields, totalRecordCount, customMetadata);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Helpers.EZDispose(this._dbs);
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<DuckDBDataReader> QueryDataAsync(List<string> selectedFields, int offset, int recordCount)
|
||||
{
|
||||
var fields = string.Join(", ", selectedFields.Select(MakeColumnSafe));
|
||||
foreach ((DuckDBHandle db, ParquetMetadata metadata) in Helpers.PairEnumerables(this._dbs, this._metadatas))
|
||||
{
|
||||
EnsureFileExists(db.ParquetFilePath);
|
||||
|
||||
if (recordCount <= 0)
|
||||
yield break;
|
||||
|
||||
if (offset >= metadata.RowCount)
|
||||
{
|
||||
offset -= metadata.RowCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
var query = $"SELECT {fields} " +
|
||||
$"FROM read_parquet('{db.ParquetFilePath}') " +
|
||||
$"LIMIT {recordCount} " +
|
||||
$"OFFSET {offset};";
|
||||
|
||||
offset = 0;
|
||||
|
||||
using var result = await db.Connection.QueryAsync(query);
|
||||
await foreach (var row in result)
|
||||
{
|
||||
yield return row;
|
||||
recordCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken, IProgress<int>? progress = null)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(recordCount, nameof(recordCount));
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(offset, nameof(offset));
|
||||
|
||||
var result = CreateEmptyDataTable(selectedFields);
|
||||
result.BeginLoadData();
|
||||
await foreach (var row in this.QueryDataAsync(selectedFields, offset, recordCount))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var values = new object[row.FieldCount];
|
||||
try
|
||||
{
|
||||
row.GetValues(values);
|
||||
}
|
||||
catch (OverflowException ex) when (ex.Message == "Value was either too large or too small for a Decimal.")
|
||||
{
|
||||
throw new DecimalOverflowException(ex);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
//Convert values to our types
|
||||
for (var columnIndex = 0; columnIndex < row.FieldCount; columnIndex++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fieldName = selectedFields.ElementAt(columnIndex);
|
||||
var parquetSchemaElement = (ParquetSchemaElement)this._metadatas.First().SchemaTree.Children.First(f => f.Path == fieldName);
|
||||
values[columnIndex] = ConvertValueTypeIfNeeded(values[columnIndex], parquetSchemaElement);
|
||||
}
|
||||
|
||||
//supposedly this is the fastest way to load data into a datatable https://stackoverflow.com/a/17123914/1458738
|
||||
result.LoadDataRow(values, false);
|
||||
|
||||
progress?.Report(row.FieldCount);
|
||||
}
|
||||
result.EndLoadData();
|
||||
|
||||
return (bool shouldLogProgress) =>
|
||||
{
|
||||
if (shouldLogProgress)
|
||||
{
|
||||
//We don't have any post-processing. So just report the total.
|
||||
progress?.Report(result.Rows.Count * result.Columns.Count);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
object ConvertValueTypeIfNeeded(object? value, ParquetSchemaElement? parquetSchemaElement)
|
||||
{
|
||||
if (value is null || value == DBNull.Value || parquetSchemaElement is null)
|
||||
return DBNull.Value;
|
||||
|
||||
if (parquetSchemaElement.FieldType == FieldTypeId.List)
|
||||
{
|
||||
var list = (IList)value;
|
||||
ParquetSchemaElement? listItemField = null;
|
||||
if (parquetSchemaElement.Children.Count > 0)
|
||||
{
|
||||
var listField = parquetSchemaElement.GetListField();
|
||||
if (listField.Children.Count == 0) //Assume 2-tier list variation (fixes: TWO_TIER_TEPEATED_LIST_FIELDS_TEST)
|
||||
{
|
||||
listItemField = listField;
|
||||
}
|
||||
else
|
||||
{
|
||||
listItemField = listField.GetListItemField();
|
||||
}
|
||||
}
|
||||
else if (parquetSchemaElement.IsPrimitive) //2-tier list (fixes: TWO_TIER_TEPEATED_LIST_FIELDS_TEST)
|
||||
{
|
||||
var newestList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newestList.Add(item);
|
||||
}
|
||||
return new ListValue(newestList, parquetSchemaElement.ClrType);
|
||||
}
|
||||
|
||||
var newList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newList.Add(ConvertValueTypeIfNeeded(item, listItemField));
|
||||
}
|
||||
|
||||
return new ListValue(newList, listItemField!.ClrType!);
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
var @struct = (Dictionary<string, object?>)value;
|
||||
var dataTable = new DataTableLite(1);
|
||||
foreach (var fieldName in @struct.Keys)
|
||||
{
|
||||
var field = parquetSchemaElement.GetSingleOrByName(fieldName);
|
||||
if (field.FieldType == FieldTypeId.List)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(ListValue), field);
|
||||
}
|
||||
else if (field.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(StructValue), field);
|
||||
}
|
||||
else if (field.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(MapValue), field);
|
||||
}
|
||||
else //Primitive
|
||||
{
|
||||
dataTable.AddColumn(fieldName, field.ClrType, field);
|
||||
}
|
||||
}
|
||||
dataTable.NewRow();
|
||||
var fieldIndex = 0;
|
||||
foreach (var keyValuePair in @struct)
|
||||
{
|
||||
var field = parquetSchemaElement.GetSingleOrByName(keyValuePair.Key);
|
||||
dataTable.Rows[0][fieldIndex] = ConvertValueTypeIfNeeded(keyValuePair.Value ?? DBNull.Value, field);
|
||||
fieldIndex++;
|
||||
}
|
||||
|
||||
return new StructValue(dataTable.GetRowAt(0));
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
var map = (IDictionary)value;
|
||||
var mapField = parquetSchemaElement.GetMapKeyValueField();
|
||||
var mapKeyField = mapField.GetMapKeyField();
|
||||
var mapValueField = mapField.GetMapValueField();
|
||||
|
||||
var count = Math.Max(map.Keys.Count, map.Values.Count);
|
||||
var keys = new ArrayList(count);
|
||||
var values = new ArrayList(count);
|
||||
foreach ((object? key, object? value) pair in
|
||||
Helpers.PairEnumerables(map.Keys.Cast<object?>(), map.Values.Cast<object?>(), DBNull.Value))
|
||||
{
|
||||
keys.Add(ConvertValueTypeIfNeeded(pair.key, mapKeyField));
|
||||
values.Add(ConvertValueTypeIfNeeded(pair.value, mapValueField));
|
||||
}
|
||||
|
||||
return new MapValue(keys, mapKeyField.ClrType,
|
||||
values, mapValueField.ClrType);
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Primitive //2-tier list
|
||||
&& parquetSchemaElement.RepetitionType == RepetitionTypeId.Repeated)
|
||||
{
|
||||
var list = (IList)value;
|
||||
|
||||
var newList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newList.Add(ConvertValueTypeIfNeeded(item, null));
|
||||
}
|
||||
|
||||
return new ListValue(newList, parquetSchemaElement.ClrType);
|
||||
}
|
||||
else if (parquetSchemaElement.IsByteArrayType)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
((Stream)value).CopyTo(ms);
|
||||
return new ByteArrayValue(ms.ToArray());
|
||||
}
|
||||
else //primitive value
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private DataTable CreateEmptyDataTable(List<string> selectedFields)
|
||||
{
|
||||
var dataTable = new DataTable();
|
||||
foreach (var field in this._fields)
|
||||
{
|
||||
if (!selectedFields.Contains(field.Name))
|
||||
continue;
|
||||
|
||||
var schemaField = (ParquetSchemaElement)this.Metadata.SchemaTree.GetChild(field.Name);
|
||||
if (schemaField.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(StructValue)));
|
||||
}
|
||||
else if (schemaField.FieldType == FieldTypeId.List)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(ListValue)));
|
||||
}
|
||||
else if (schemaField.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(MapValue)));
|
||||
}
|
||||
else if (schemaField.IsByteArrayType)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(ByteArrayValue)));
|
||||
}
|
||||
else //Primitive type
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, field.Type));
|
||||
}
|
||||
}
|
||||
return dataTable;
|
||||
}
|
||||
|
||||
private void EnsureFileExists(string filePath)
|
||||
{
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
throw new FileNotFoundException($"Parquet file no longer exists at: {this.Path}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string MakeColumnSafe(string columnName)
|
||||
{
|
||||
// Enclose in double quotes and escape existing double quotes
|
||||
var safeName = columnName.Replace("\"", "\"\"");
|
||||
return $"\"{safeName}\"";
|
||||
}
|
||||
|
||||
public async Task WriteDataToParquetFileAsync(DataTable dataTable, string path, CancellationToken cancellationToken,
|
||||
IProgress<int> progress, Dictionary<string, string>? customMetadata)
|
||||
=> throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
250
src/ParquetViewer.Engine.DuckDB/ParquetMetadata.cs
Normal file
250
src/ParquetViewer.Engine.DuckDB/ParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetMetadata : IParquetMetadata
|
||||
{
|
||||
public int ParquetVersion { get; }
|
||||
|
||||
public int RowGroupCount { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public string CreatedBy { get; }
|
||||
|
||||
public ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
|
||||
public IParquetSchemaElement SchemaTree { get; }
|
||||
|
||||
private ParquetMetadata(IParquetSchemaElement schemaTree, ICollection<IRowGroupMetadata> rowGroups,
|
||||
int recordCount, int parquetVersion, string createdBy, int rowGroupCount)
|
||||
{
|
||||
this.SchemaTree = schemaTree;
|
||||
this.RowGroups = rowGroups;
|
||||
this.RowCount = recordCount;
|
||||
this.ParquetVersion = parquetVersion;
|
||||
this.CreatedBy = createdBy;
|
||||
this.RowGroupCount = rowGroupCount;
|
||||
}
|
||||
|
||||
public static async Task<ParquetMetadata> FromDuckDBAsync(DuckDBHandle db)
|
||||
{
|
||||
var schemaTree = await DuckDBHelper.GetParquetSchemaTreeAsync(db);
|
||||
|
||||
#region RowGroups
|
||||
var rowGroupColumns = new List<(RowGroupMetadataResult RowGroup, RowGroupColumnMetadata Column)>();
|
||||
using var result = await db.Connection.QueryAsync($"SELECT * FROM parquet_metadata('{db.ParquetFilePath}');");
|
||||
await foreach (var row in result)
|
||||
{
|
||||
string fileName = row.GetString(0);
|
||||
|
||||
long rowGroupId = row.GetInt64(1);
|
||||
long rowGroupNumRows = row.GetInt64(2);
|
||||
long rowGroupNumColumns = row.GetInt64(3);
|
||||
long rowGroupBytes = row.GetInt64(4);
|
||||
long? rowGroupCompressedBytes = row.IsDBNull(28) ? null : row.GetInt64(28);
|
||||
long? fileOffset = row.IsDBNull(6) ? null : row.GetInt64(6);
|
||||
var rowGroupMetadataResult = new RowGroupMetadataResult(rowGroupId, rowGroupNumRows, rowGroupNumColumns, rowGroupBytes, rowGroupCompressedBytes ?? -1, fileOffset ?? -1);
|
||||
|
||||
long columnId = row.GetInt64(5);
|
||||
long numValues = row.GetInt64(7);
|
||||
|
||||
string pathInSchema = row.GetString(8);
|
||||
string type = row.GetString(9);
|
||||
|
||||
string? statsMin = row.IsDBNull(10) ? null : row.GetString(10);
|
||||
string? statsMax = row.IsDBNull(11) ? null : row.GetString(11);
|
||||
|
||||
long? statsNullCount = row.IsDBNull(12) ? null : row.GetInt64(12);
|
||||
long? statsDistinctCount = row.IsDBNull(13) ? null : row.GetInt64(13);
|
||||
|
||||
string? statsMinValue = row.IsDBNull(14) ? null : row.GetString(14);
|
||||
string? statsMaxValue = row.IsDBNull(15) ? null : row.GetString(15);
|
||||
|
||||
string compression = row.GetString(16);
|
||||
string encodings = row.GetString(17);
|
||||
|
||||
long? indexPageOffset = row.IsDBNull(18) ? null : row.GetInt64(18);
|
||||
long? dictionaryPageOffset = row.IsDBNull(19) ? null : row.GetInt64(19);
|
||||
long dataPageOffset = row.GetInt64(20);
|
||||
|
||||
long totalCompressedSize = row.GetInt64(21);
|
||||
long totalUncompressedSize = row.GetInt64(22);
|
||||
|
||||
long? bloomFilterOffset = row.IsDBNull(24) ? null : row.GetInt64(24);
|
||||
long? bloomFilterLength = row.IsDBNull(25) ? null : row.GetInt64(25);
|
||||
|
||||
bool? minIsExact = row.IsDBNull(26) ? null : row.GetBoolean(26);
|
||||
bool? maxIsExact = row.IsDBNull(27) ? null : row.GetBoolean(27);
|
||||
|
||||
var rowGroupColumnMetadata = new RowGroupColumnMetadata(
|
||||
(int)columnId,
|
||||
pathInSchema,
|
||||
type,
|
||||
(int)numValues,
|
||||
totalUncompressedSize,
|
||||
totalCompressedSize,
|
||||
dataPageOffset,
|
||||
indexPageOffset,
|
||||
dictionaryPageOffset,
|
||||
new RowGroupColumnStatistics(
|
||||
statsMin,
|
||||
statsMax,
|
||||
statsNullCount,
|
||||
statsDistinctCount,
|
||||
statsMinValue,
|
||||
statsMaxValue,
|
||||
minIsExact,
|
||||
maxIsExact),
|
||||
bloomFilterOffset,
|
||||
bloomFilterLength);
|
||||
|
||||
rowGroupColumns.Add((rowGroupMetadataResult, rowGroupColumnMetadata));
|
||||
}
|
||||
|
||||
List<IRowGroupMetadata> rowGroups = rowGroupColumns.GroupBy(rgc => rgc.RowGroup.rowGroupId).Select(group =>
|
||||
{
|
||||
var rowGroupId = group.Key;
|
||||
long? firstFileOffset = null;
|
||||
RowGroupMetadataResult? rowGroupMetadataResult = null;
|
||||
List<RowGroupColumnMetadata> columnMetadatas = new();
|
||||
foreach (var column in group)
|
||||
{
|
||||
firstFileOffset ??= column.RowGroup.fileOffset;
|
||||
rowGroupMetadataResult = column.RowGroup;
|
||||
columnMetadatas.Add(column.Column);
|
||||
}
|
||||
|
||||
if (rowGroupMetadataResult is null)
|
||||
return null;
|
||||
|
||||
return new RowGroupMetadata(
|
||||
(int)rowGroupId,
|
||||
(int)rowGroupMetadataResult.rowGroupNumRows,
|
||||
(int)rowGroupMetadataResult.rowGroupNumColumns,
|
||||
firstFileOffset ?? -1,
|
||||
rowGroupMetadataResult.rowGroupBytes,
|
||||
columnMetadatas.Sum(cm => cm.TotalCompressedSize ?? 0),
|
||||
columnMetadatas);
|
||||
}).Where(rg => rg is not null)!.ToList<IRowGroupMetadata>();
|
||||
#endregion
|
||||
|
||||
#region File Metadata
|
||||
using var metadataResult = await db.Connection.QueryAsync($"SELECT * FROM parquet_file_metadata('{db.ParquetFilePath}');");
|
||||
var fileMetadata = await metadataResult.GetSingleAsync();
|
||||
var createdBy = fileMetadata.IsDBNull(1) ? null : fileMetadata.GetString(1);
|
||||
var numRows = fileMetadata.GetInt64(2);
|
||||
var numRowGroups = fileMetadata.GetInt64(3);
|
||||
var parquetVersion = fileMetadata.GetInt64(4);
|
||||
var encryptionAlgorithm = fileMetadata.IsDBNull(5) ? null : fileMetadata.GetString(5);
|
||||
var footerSigningKeyMetadata = fileMetadata.IsDBNull(6) ? null : fileMetadata.GetString(6);
|
||||
#endregion
|
||||
|
||||
var metadata = new ParquetMetadata(schemaTree, rowGroups, (int)numRows, (int)parquetVersion, createdBy ?? string.Empty, (int)numRowGroups);
|
||||
return metadata;
|
||||
}
|
||||
|
||||
private record RowGroupMetadataResult(long rowGroupId, long rowGroupNumRows, long rowGroupNumColumns, long rowGroupBytes, long rowGroupCompressedBytes, long fileOffset);
|
||||
}
|
||||
|
||||
public class RowGroupMetadata : IRowGroupMetadata
|
||||
{
|
||||
public int Ordinal { get; }
|
||||
public int RowCount { get; }
|
||||
public int ColumnCount { get; }
|
||||
public ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
public ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
public long FileOffset { get; }
|
||||
public long TotalByteSize { get; }
|
||||
public long TotalCompressedSize { get; }
|
||||
|
||||
public RowGroupMetadata(int ordinal, int rowCount, int columnCount, long fileOffset, long totalByteSize, long totalCompressedSize, List<RowGroupColumnMetadata> columnMetadatas)
|
||||
{
|
||||
this.Ordinal = ordinal;
|
||||
this.RowCount = rowCount;
|
||||
this.ColumnCount = columnCount;
|
||||
this.FileOffset = fileOffset;
|
||||
this.TotalByteSize = totalByteSize;
|
||||
this.TotalCompressedSize = totalCompressedSize;
|
||||
this.SortingColumns = null; //DuckDB doesn't seem to have info on this
|
||||
this.Columns = columnMetadatas.ToList<IRowGroupColumnMetadata>();
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnMetadata : IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
|
||||
public string? PathInSchema { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
|
||||
public int? NumValues { get; }
|
||||
|
||||
public long? TotalUncompressedSize { get; }
|
||||
|
||||
public long? TotalCompressedSize { get; }
|
||||
|
||||
public long? DataPageOffset { get; }
|
||||
|
||||
public long? IndexPageOffset { get; }
|
||||
|
||||
public long? DictionaryPageOffset { get; }
|
||||
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
|
||||
public long? BloomFilterOffset { get; }
|
||||
|
||||
public long? BloomFilterLength { get; }
|
||||
|
||||
public RowGroupColumnMetadata(
|
||||
int? columnId,
|
||||
string? pathInSchema,
|
||||
string? type,
|
||||
int? numValues,
|
||||
long? totalUncompressedSize,
|
||||
long? totalCompressedSize,
|
||||
long? dataPageOffset,
|
||||
long? indexPageOffset,
|
||||
long? dictionaryPageOffset,
|
||||
RowGroupColumnStatistics? statistics,
|
||||
long? bloomFilterOffset,
|
||||
long? bloomFilterLength)
|
||||
{
|
||||
ColumnId = columnId;
|
||||
PathInSchema = pathInSchema;
|
||||
Type = type;
|
||||
NumValues = numValues;
|
||||
TotalUncompressedSize = totalUncompressedSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
DataPageOffset = dataPageOffset;
|
||||
IndexPageOffset = indexPageOffset;
|
||||
DictionaryPageOffset = dictionaryPageOffset;
|
||||
Statistics = statistics;
|
||||
BloomFilterOffset = bloomFilterOffset;
|
||||
BloomFilterLength = bloomFilterLength;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnStatistics : IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
|
||||
public RowGroupColumnStatistics(object? min, object? max, long? nullCount, long? distinctCount, object? minValue, object? maxValue, bool? isMinValueExact, bool? isMaxValueExact)
|
||||
{
|
||||
Min = min;
|
||||
Max = max;
|
||||
NullCount = nullCount;
|
||||
DistinctCount = distinctCount;
|
||||
MinValue = minValue;
|
||||
MaxValue = maxValue;
|
||||
IsMinValueExact = isMinValueExact;
|
||||
IsMaxValueExact = isMaxValueExact;
|
||||
}
|
||||
}
|
||||
}
|
||||
250
src/ParquetViewer.Engine.DuckDB/ParquetSchemaElement.cs
Normal file
250
src/ParquetViewer.Engine.DuckDB/ParquetSchemaElement.cs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
using DuckDB.NET.Data;
|
||||
using DuckDB.NET.Native;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetSchemaElement : IParquetSchemaElement<ParquetSchemaElement>
|
||||
{
|
||||
public string Path { get; }
|
||||
|
||||
public ICollection<ParquetSchemaElement> Children { get; }
|
||||
|
||||
public bool IsPrimitive => this._clrType is not null;
|
||||
|
||||
public Type ClrType => this._clrType ?? this.FieldType switch
|
||||
{
|
||||
FieldTypeId.List => typeof(ListValue),
|
||||
FieldTypeId.Map => typeof(MapValue),
|
||||
FieldTypeId.Struct => typeof(StructValue),
|
||||
_ => throw new InvalidOperationException("Cannot determine CLR type for primitive field without ClrType information."),
|
||||
};
|
||||
|
||||
public FieldTypeId FieldType => this.ConvertedType switch
|
||||
{
|
||||
"LIST" => FieldTypeId.List,
|
||||
"MAP" => FieldTypeId.Map,
|
||||
"STRUCT" => FieldTypeId.Struct,
|
||||
_ => GuessFieldType(),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// DuckDB isn't good with metadata resolution it seems. So we have to guess the field type based on available metadata.
|
||||
/// </summary>
|
||||
private FieldTypeId GuessFieldType()
|
||||
{
|
||||
if (this._clrType is not null || (this.NumChildren ?? 0) <= 0)
|
||||
{
|
||||
if (this._repetitionType == RepetitionTypeId.Repeated)
|
||||
return FieldTypeId.List;
|
||||
else
|
||||
return FieldTypeId.Primitive;
|
||||
}
|
||||
|
||||
if (this.NumChildren == 2)
|
||||
{
|
||||
try
|
||||
{
|
||||
this.GetMapKeyValueField();
|
||||
return FieldTypeId.Map;
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
if (this.NumChildren == 1 && this._repetitionType == RepetitionTypeId.Repeated)
|
||||
return FieldTypeId.List;
|
||||
|
||||
return FieldTypeId.Struct;
|
||||
}
|
||||
|
||||
public RepetitionTypeId? RepetitionType => this._repetitionType;
|
||||
|
||||
public bool IsByteArrayType => _clrType == typeof(ByteArrayValue);
|
||||
|
||||
ICollection<IParquetSchemaElement> IParquetSchemaElement.Children => this.Children.ToList<IParquetSchemaElement>();
|
||||
|
||||
public string? Type => this._underlyingType;
|
||||
|
||||
private string? _underlyingType;
|
||||
public int? TypeLength { get; }
|
||||
private RepetitionTypeId? _repetitionType;
|
||||
public int? NumChildren { get; }
|
||||
public string? ConvertedType { get; }
|
||||
public int? Scale { get; }
|
||||
public int? Precision { get; }
|
||||
private string? _fieldId;
|
||||
public object? LogicalType { get; }
|
||||
private DuckDBType? _duckDbType;
|
||||
private Type? _clrType;
|
||||
|
||||
public ParquetSchemaElement(
|
||||
string path,
|
||||
string? underlyingType,
|
||||
int? typeLength,
|
||||
RepetitionTypeId? repetitionType,
|
||||
long? numChildren,
|
||||
string? convertedType,
|
||||
long? scale,
|
||||
long? precision,
|
||||
string? fieldId,
|
||||
string? logicalType,
|
||||
DuckDBType? duckDbType,
|
||||
Type? ClrType)
|
||||
{
|
||||
this.Children = new List<ParquetSchemaElement>();
|
||||
this.Path = path;
|
||||
this._underlyingType = underlyingType;
|
||||
this.TypeLength = typeLength;
|
||||
this._repetitionType = repetitionType;
|
||||
this.NumChildren = (int?)numChildren;
|
||||
this.ConvertedType = convertedType;
|
||||
this.Scale = (int?)scale;
|
||||
this.Precision = (int?)precision;
|
||||
this._fieldId = fieldId;
|
||||
this.LogicalType = logicalType;
|
||||
this._duckDbType = duckDbType;
|
||||
this._clrType = ClrType;
|
||||
}
|
||||
|
||||
public static ParquetSchemaElement FromRow(DuckDBDataReader row)
|
||||
{
|
||||
string columnName = row.GetString(1);
|
||||
string? columnTypeName = row.IsDBNull(2) ? null : row.GetString(2);
|
||||
string? typeLengthString = row.IsDBNull(3) ? null : row.GetString(3);
|
||||
string? repetitionTypeName = row.IsDBNull(4) ? null : row.GetString(4);
|
||||
long? numChildren = row.IsDBNull(5) ? null : row.GetInt64(5);
|
||||
string? convertedType = row.IsDBNull(6) ? null : row.GetString(6);
|
||||
long? scale = row.IsDBNull(7) ? null : row.GetInt64(7);
|
||||
long? precision = row.IsDBNull(8) ? null : row.GetInt64(8);
|
||||
string? fieldId = row.IsDBNull(9) ? null : row.GetString(9);
|
||||
string? logicalType = row.IsDBNull(10) ? null : row.GetString(10);
|
||||
string? duckDbTypeName = row.IsDBNull(11) ? null : row.GetString(11); //Note: This field isn't returned for complex types like LIST, MAP, STRUCT unfortunately
|
||||
|
||||
int? typeLength = int.TryParse(typeLengthString, out var typeLengthValue) ? typeLengthValue : null;
|
||||
|
||||
DuckDBType? duckDBType = null;
|
||||
Type? clrType = null;
|
||||
if (duckDbTypeName is not null)
|
||||
{
|
||||
(duckDBType, clrType) = DuckDBHelper.ParseDuckDBType(duckDbTypeName, columnTypeName);
|
||||
}
|
||||
|
||||
RepetitionTypeId? repetitionType = null;
|
||||
if (repetitionTypeName is not null)
|
||||
{
|
||||
repetitionType = repetitionTypeName.ToUpperInvariant() switch
|
||||
{
|
||||
"REQUIRED" => RepetitionTypeId.Required,
|
||||
"OPTIONAL" => RepetitionTypeId.Optional,
|
||||
"REPEATED" => RepetitionTypeId.Repeated,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(repetitionTypeName), $"Unsupported repetition type: {repetitionTypeName}")
|
||||
};
|
||||
}
|
||||
|
||||
var element = new ParquetSchemaElement(
|
||||
columnName,
|
||||
columnTypeName,
|
||||
typeLength,
|
||||
repetitionType,
|
||||
numChildren,
|
||||
convertedType,
|
||||
scale,
|
||||
precision,
|
||||
fieldId,
|
||||
logicalType,
|
||||
duckDBType,
|
||||
clrType);
|
||||
|
||||
return element;
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetSingleOrByName(string name)
|
||||
{
|
||||
if (this.Children.Count == 0)
|
||||
{
|
||||
throw new MalformedFieldException($"Field `{Path}` has no children. Expected '{name}'.");
|
||||
}
|
||||
|
||||
if (this.Children.Count == 1)
|
||||
{
|
||||
return this.Children.First();
|
||||
}
|
||||
else
|
||||
{
|
||||
return this.Children.FirstOrDefault(c => c.Path == name)
|
||||
?? throw new MalformedFieldException($"Field `{Path}` has no child named '{name}'");
|
||||
}
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetListField()
|
||||
{
|
||||
var field = this.GetSingleOrByName("list");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetListItemField()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (this.Children.Count == 0)
|
||||
{
|
||||
//Assume this is a 2-tier list...
|
||||
return this;
|
||||
}
|
||||
|
||||
var field = this.GetSingleOrByName("item");
|
||||
return field;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new UnsupportedFieldException($"Cannot load field `{this.Path}`. Invalid List type.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetMapKeyValueField()
|
||||
{
|
||||
var field = this.GetSingleOrByName("key_value");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetMapKeyField()
|
||||
{
|
||||
var field = this.GetChildCI("key");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetMapValueField()
|
||||
{
|
||||
var field = this.GetChildCI("value");
|
||||
return field;
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetChildCI(string name) =>
|
||||
Children.First((f) => f.Path.Equals(name, StringComparison.InvariantCultureIgnoreCase));
|
||||
|
||||
public ParquetSchemaElement GetChild(string name)
|
||||
=> Children.First((f) => f.Path.Equals(name));
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChildCI(string name)
|
||||
=> GetChildCI(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChild(string name)
|
||||
=> GetChild(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListField()
|
||||
=> GetListField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListItemField()
|
||||
=> GetListItemField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetSingleOrByName(string name)
|
||||
=> GetSingleOrByName(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyValueField()
|
||||
=> GetMapKeyValueField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyField()
|
||||
=> GetMapKeyField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapValueField()
|
||||
=> GetMapValueField();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="DuckDB.NET.Data.Full" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
49
src/ParquetViewer.Engine.DuckDB/QueryResult.cs
Normal file
49
src/ParquetViewer.Engine.DuckDB/QueryResult.cs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal class QueryResult : IAsyncEnumerable<DuckDBDataReader>, IDisposable
|
||||
{
|
||||
private readonly DuckDBDataReader _reader;
|
||||
|
||||
public QueryResult(DuckDBDataReader reader)
|
||||
{
|
||||
_reader = reader;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
_reader.DisposeAsync();
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
public async Task<DuckDBDataReader> GetSingleAsync()
|
||||
{
|
||||
if (await _reader.ReadAsync())
|
||||
{
|
||||
return _reader;
|
||||
}
|
||||
throw new InvalidOperationException("No rows found.");
|
||||
}
|
||||
|
||||
public async IAsyncEnumerator<DuckDBDataReader> GetAsyncEnumerator(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!await _reader.ReadAsync())
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
yield return _reader;
|
||||
|
||||
while (await _reader.ReadAsync())
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
yield return _reader;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
60
src/ParquetViewer.Engine.ParquetNET/Helpers.cs
Normal file
60
src/ParquetViewer.Engine.ParquetNET/Helpers.cs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
internal static class Helpers
|
||||
{
|
||||
#region Dubious Functions
|
||||
//This logic is a cluster f... right now. It blends https://www.aloneguid.uk/posts/2023/04/parquet-empty-vs-null
|
||||
//with some of my understanding of how the dremel algorithm works. No way will it work for all cases.
|
||||
|
||||
public static bool IsNull(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] <= field.CurrentDefinitionLevel - 1;
|
||||
|
||||
public static bool IsEmpty(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] == field.CurrentDefinitionLevel
|
||||
&& field.DataField?.MaxDefinitionLevel != dataColumn.DefinitionLevels[index] /*Fixes STRUCT_TYPE_TEST*/;
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Some parquet writers don't write null entries into the data array for empty and null lists.
|
||||
/// This throws off our logic so lets find all empty/null lists and add a null entry into
|
||||
/// the data array to align it with the repetition/definition levels.
|
||||
/// </summary>
|
||||
/// <param name="dataColumn">The parquet data column</param>
|
||||
public static IEnumerable<object> GetDataWithPaddedNulls(this Parquet.Data.DataColumn dataColumn, ParquetSchemaElement field)
|
||||
{
|
||||
var dataEnumerable = dataColumn.Data.Cast<object?>().Select(d => d ?? DBNull.Value);
|
||||
|
||||
int levelCount = dataColumn.DefinitionLevels?.Length ?? 0;
|
||||
if (levelCount > dataColumn.Data.Length)
|
||||
{
|
||||
dataEnumerable = GetDataWithPaddedNulls();
|
||||
|
||||
IEnumerable<object> GetDataWithPaddedNulls()
|
||||
{
|
||||
var index = -1;
|
||||
foreach (var data in dataColumn.Data)
|
||||
{
|
||||
index++;
|
||||
|
||||
while (dataColumn.IsEmpty(index, field) || dataColumn.IsNull(index, field))
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
|
||||
yield return data ?? DBNull.Value;
|
||||
}
|
||||
|
||||
//Need to handle case where last N rows are null/empty
|
||||
while (levelCount > index + 1)
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dataEnumerable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
using ParquetViewer.Engine.Types;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
internal class ListValueBuilder
|
||||
public class ListValueBuilder
|
||||
{
|
||||
private int[] _repetitionLevels;
|
||||
private int[] _definitionLevels;
|
||||
|
|
@ -17,7 +18,10 @@ namespace ParquetViewer.Engine
|
|||
ArgumentNullException.ThrowIfNull(data);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
|
||||
_type = type;
|
||||
if (type == typeof(byte[]))
|
||||
_type = typeof(ByteArrayValue);
|
||||
else
|
||||
_type = type;
|
||||
|
||||
//We assume they all have the same length
|
||||
_definitionLevels = definitionLevels;
|
||||
|
|
@ -58,7 +62,11 @@ namespace ParquetViewer.Engine
|
|||
var listValue = ReadListValue(rowRange, numberOfListParents, () =>
|
||||
{
|
||||
//TODO: optimize to avoid skipping all rows every time
|
||||
return _data.Skip(rowRange.Start.Value).Take(rowRange.End.Value - rowRange.Start.Value).ToArray();
|
||||
return _data
|
||||
.Select(data => data is byte[] bytes ? new ByteArrayValue(bytes) : data) //Need to handle byte array type separately
|
||||
.Skip(rowRange.Start.Value)
|
||||
.Take(rowRange.End.Value - rowRange.Start.Value)
|
||||
.ToArray();
|
||||
},
|
||||
(int index) =>
|
||||
{
|
||||
|
|
@ -213,4 +221,4 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,17 +1,20 @@
|
|||
using Parquet;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public partial class ParquetEngine
|
||||
{
|
||||
public static readonly string TotalRecordCountExtendedPropertyKey = "TOTAL_RECORD_COUNT";
|
||||
|
||||
public async Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken, IProgress<int>? progress = null)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(recordCount, nameof(recordCount));
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(offset, nameof(offset));
|
||||
|
||||
long recordsLeftToRead = recordCount;
|
||||
DataTableLite result = BuildDataTable(null, selectedFields, Math.Min(recordCount, (int)this.RecordCount));
|
||||
|
||||
|
|
@ -30,7 +33,6 @@ namespace ParquetViewer.Engine
|
|||
return (logProgress) =>
|
||||
{
|
||||
var datatable = result.ToDataTable(cancellationToken, logProgress ? progress : null);
|
||||
datatable.ExtendedProperties[TotalRecordCountExtendedPropertyKey] = result.DataSetSize;
|
||||
return datatable;
|
||||
};
|
||||
}
|
||||
|
|
@ -82,28 +84,30 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var field = column.ParentSchema.GetChild(column.Name);
|
||||
switch (field.FieldType)
|
||||
var field = column.ParentSchema.Children.FirstOrDefault(c => c.Path == column.Name) as ParquetSchemaElement;
|
||||
switch (field?.FieldType)
|
||||
{
|
||||
case ParquetSchemaElement.FieldTypeId.Primitive:
|
||||
case FieldTypeId.Primitive:
|
||||
await ReadPrimitiveField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.List:
|
||||
case FieldTypeId.List:
|
||||
var listField = field.GetListField();
|
||||
var itemField = listField.GetListItemField();
|
||||
var fieldIndex = dataTable.Columns[field.Path]!.Ordinal;
|
||||
await ReadListField(dataTable, groupReader, rowBeginIndex, itemField, fieldIndex,
|
||||
skipRecords, readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.Map:
|
||||
case FieldTypeId.Map:
|
||||
await ReadMapField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.Struct:
|
||||
case FieldTypeId.Struct:
|
||||
await ReadStructField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
default:
|
||||
throw new InvalidDataException($"`{column.Name}`");
|
||||
}
|
||||
|
||||
isFirstColumn = false;
|
||||
|
|
@ -151,7 +155,7 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
else if (fieldType == typeof(ByteArrayValue))
|
||||
{
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new ByteArrayValue(field.Path, (byte[])value);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new ByteArrayValue((byte[])value);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -170,7 +174,7 @@ namespace ParquetViewer.Engine
|
|||
var lastMilestone = "Start";
|
||||
try
|
||||
{
|
||||
if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.List)
|
||||
if (itemField.FieldType == FieldTypeId.List)
|
||||
{
|
||||
var nestedListField = itemField.GetListField();
|
||||
var nestedItemField = nestedListField.GetListItemField();
|
||||
|
|
@ -179,7 +183,7 @@ namespace ParquetViewer.Engine
|
|||
await ReadListField(dataTable, groupReader, rowBeginIndex, nestedItemField, fieldIndex: 0,
|
||||
skipRecords, readRecords, isFirstColumn, cancellationToken, progress);
|
||||
}
|
||||
else if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.Primitive)
|
||||
else if (itemField.FieldType == FieldTypeId.Primitive)
|
||||
{
|
||||
int rowIndex = rowBeginIndex;
|
||||
|
||||
|
|
@ -212,7 +216,7 @@ namespace ParquetViewer.Engine
|
|||
progress?.Report(1);
|
||||
}
|
||||
}
|
||||
else if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.Struct)
|
||||
else if (itemField.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
//Read struct data as a new datatable
|
||||
DataTableLite structFieldTable = BuildDataTable(itemField, itemField.Children.Select(f => f.Path).ToList(), (int)readRecords);
|
||||
|
|
@ -243,13 +247,24 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
|
||||
var columnValues = (ListValue)valueArray[columnOrdinal];
|
||||
for (var rowValueIndex = 0; rowValueIndex < columnValues.Length; rowValueIndex++)
|
||||
|
||||
if (columnValues.Data.Count == 0 && columnOrdinal != 0) //All values are null
|
||||
{
|
||||
for (var i = 0; i < newStructFieldTable.Rows.Count; i++)
|
||||
{
|
||||
newStructFieldTable.Rows[i][columnOrdinal] = DBNull.Value;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (var rowValueIndex = 0; rowValueIndex < columnValues.Data.Count; rowValueIndex++)
|
||||
{
|
||||
lastMilestone = $"#{rowIndex}-{columnOrdinal}-{rowValueIndex}";
|
||||
|
||||
var columnValue = columnValues.Data[rowValueIndex] ?? throw new SystemException("Column value missing during pivot");
|
||||
#region Hack for LIST_OF_STRUCT_OF_LIST_OF_STRUCT test
|
||||
if (columnValue is StructValue structValue && structValue.IsList)
|
||||
if (columnValue is StructValueExt structValue && structValue.IsList)
|
||||
{
|
||||
//We need to convert `columnValue` from struct to a list of structs as it was a nested structure
|
||||
var areTypesAsExpected = newStructFieldTable.Columns.Values.ElementAt(columnOrdinal).Type == typeof(ListValue);
|
||||
|
|
@ -258,14 +273,19 @@ namespace ParquetViewer.Engine
|
|||
throw new UnsupportedFieldException("Failed to pivot list of structs.");
|
||||
}
|
||||
|
||||
var nestedStructFieldTable = PivotTable(structValue.Data.Row, structValue.Data.Table.Clone());
|
||||
if (structValue.Data is not DataRowLite dataRowLite)
|
||||
{
|
||||
throw new InvalidDataException("Struct data wasn't the expected type.");
|
||||
}
|
||||
|
||||
var nestedStructFieldTable = PivotTable(structValue.Data.Row, dataRowLite.Table.Clone());
|
||||
var listValues = new ArrayList(nestedStructFieldTable.Rows.Count);
|
||||
for (var i = 0; i < nestedStructFieldTable.Rows.Count; i++)
|
||||
{
|
||||
var row = nestedStructFieldTable.GetRowAt(i);
|
||||
listValues.Add(new StructValue(itemField.Path, row));
|
||||
listValues.Add(new StructValueExt(row));
|
||||
}
|
||||
columnValue = new ListValue(listValues, typeof(StructValue));
|
||||
columnValue = new ListValue(listValues, typeof(StructValueExt));
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
|
@ -296,7 +316,7 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
else
|
||||
{
|
||||
listValues.Add(new StructValue(itemField.Path, dataRow) { IsList = itemField.NumberOfListParents > 1 });
|
||||
listValues.Add(new StructValueExt(dataRow) { IsList = itemField.NumberOfListParents > 1 });
|
||||
}
|
||||
}
|
||||
return listValues;
|
||||
|
|
@ -307,7 +327,7 @@ namespace ParquetViewer.Engine
|
|||
if (isFirstColumn)
|
||||
dataTable.NewRow();
|
||||
|
||||
dataTable.Rows[rowIndex][fieldIndex] = new ListValue(listValues, typeof(StructValue));
|
||||
dataTable.Rows[rowIndex][fieldIndex] = new ListValue(listValues, typeof(StructValueExt));
|
||||
rowIndex++;
|
||||
}
|
||||
}
|
||||
|
|
@ -342,7 +362,7 @@ namespace ParquetViewer.Engine
|
|||
var keyDataEnumerable = keyDataColumn.GetDataWithPaddedNulls(keyField);
|
||||
var valueDataEnumerable = valueDataColumn.GetDataWithPaddedNulls(valueField);
|
||||
|
||||
var dataEnumerable = Helpers.PairEnumerables(keyDataEnumerable, valueDataEnumerable, DBNull.Value);
|
||||
var dataEnumerable = Engine.Helpers.PairEnumerables(keyDataEnumerable, valueDataEnumerable, DBNull.Value);
|
||||
|
||||
var levelCount = Math.Max(keyDataColumn.RepetitionLevels?.Length ?? 0, valueDataColumn.RepetitionLevels?.Length ?? 0);
|
||||
var fieldIndex = dataTable.Columns[field.Path]!.Ordinal;
|
||||
|
|
@ -447,7 +467,7 @@ namespace ParquetViewer.Engine
|
|||
else
|
||||
{
|
||||
var dataRow = structFieldTable.GetRowAt(i);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new StructValue(field.Path, dataRow);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new StructValueExt(dataRow);
|
||||
}
|
||||
rowIndex++;
|
||||
}
|
||||
|
|
@ -477,23 +497,23 @@ namespace ParquetViewer.Engine
|
|||
|
||||
private DataTableLite BuildDataTable(ParquetSchemaElement? parent, List<string> fields, int expectedRecordCount)
|
||||
{
|
||||
parent ??= this.ParquetSchemaTree;
|
||||
parent ??= (ParquetSchemaElement)this.Metadata.SchemaTree;
|
||||
DataTableLite dataTable = new(expectedRecordCount);
|
||||
foreach (var field in fields)
|
||||
{
|
||||
var schema = parent.GetChild(field);
|
||||
if (schema.FieldType == ParquetSchemaElement.FieldTypeId.List
|
||||
if (schema.FieldType == FieldTypeId.List
|
||||
|| schema.DataField?.IsArray == true)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(ListValue), parent);
|
||||
}
|
||||
else if (schema.FieldType == ParquetSchemaElement.FieldTypeId.Map)
|
||||
else if (schema.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(MapValue), parent);
|
||||
}
|
||||
else if (schema.FieldType == ParquetSchemaElement.FieldTypeId.Struct)
|
||||
else if (schema.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(StructValue), parent);
|
||||
dataTable.AddColumn(field, typeof(StructValueExt), parent);
|
||||
}
|
||||
else if (schema.SchemaElement.Type == Parquet.Meta.Type.BYTE_ARRAY
|
||||
&& schema.SchemaElement.LogicalType is null
|
||||
|
|
@ -501,9 +521,20 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
dataTable.AddColumn(field, typeof(ByteArrayValue), parent);
|
||||
}
|
||||
else if (schema.DataField is DateTimeDataField dateField)
|
||||
{
|
||||
if (dateField.DateTimeFormat == DateTimeFormat.Date)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(DateOnly), parent);
|
||||
}
|
||||
else
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(DateTime), parent);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var clrType = schema.DataField?.ClrType ?? throw new MalformedFieldException($"`{(parent is not null ? parent + "/" : string.Empty)}/{field}` has no data field");
|
||||
var clrType = schema.ClrType ?? throw new MalformedFieldException($"`{(parent is not null ? parent + "/" : string.Empty)}/{field}` has no data field");
|
||||
dataTable.AddColumn(field, clrType, parent);
|
||||
}
|
||||
}
|
||||
|
|
@ -535,4 +566,4 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
282
src/ParquetViewer.Engine.ParquetNET/ParquetEngine.cs
Normal file
282
src/ParquetViewer.Engine.ParquetNET/ParquetEngine.cs
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
using Parquet;
|
||||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public partial class ParquetEngine : IParquetEngine, IDisposable
|
||||
{
|
||||
private readonly ParquetReader[] _parquetFiles;
|
||||
private long? _recordCount;
|
||||
|
||||
private ParquetReader _defaultReader => _parquetFiles.FirstOrDefault() ?? throw new ParquetEngineException("No parquet readers available");
|
||||
|
||||
private FileMetaData _thriftMetadata => _defaultReader.Metadata ?? throw new ParquetEngineException("No thrift metadata was found");
|
||||
|
||||
private ParquetSchema _schema => _defaultReader.Schema;
|
||||
|
||||
public Dictionary<string, string> CustomMetadata => _defaultReader.CustomMetadata;
|
||||
|
||||
public long RecordCount => _recordCount ??= _parquetFiles.Sum(pf => pf.Metadata?.NumRows ?? 0);
|
||||
|
||||
public int NumberOfPartitions => _parquetFiles.Length;
|
||||
|
||||
public List<string> Fields => _defaultReader.Schema.Fields.Select(f => f.Name).ToList();
|
||||
|
||||
public string Path { get; }
|
||||
|
||||
ParquetMetadata? _metadata = null;
|
||||
public IParquetMetadata Metadata => _metadata ??= new ParquetMetadata(_thriftMetadata, BuildParquetSchemaTree(), (int)RecordCount);
|
||||
|
||||
private ParquetEngine(string fileOrFolderPath, params ParquetReader[] parquetFiles)
|
||||
{
|
||||
_parquetFiles = parquetFiles ?? throw new ArgumentNullException(nameof(parquetFiles), "No parquet readers provided");
|
||||
Path = fileOrFolderPath;
|
||||
}
|
||||
|
||||
private ParquetSchemaElement BuildParquetSchemaTree()
|
||||
{
|
||||
var thriftSchema = _thriftMetadata.Schema ?? throw new ParquetException("No thrift metadata was found");
|
||||
var schemaElements = thriftSchema.GetEnumerator();
|
||||
var thriftSchemaTree = ReadSchemaTree(ref schemaElements);
|
||||
|
||||
foreach (var dataField in _schema.GetDataFields())
|
||||
{
|
||||
var field = thriftSchemaTree.GetChild(dataField.Path.FirstPart ?? throw new MalformedFieldException($"Field has no schema path: `{dataField.Name}`"));
|
||||
for (var i = 1; i < dataField.Path.Length; i++)
|
||||
{
|
||||
field = field.GetChild(dataField.Path[i]);
|
||||
}
|
||||
field.DataField = dataField; //if it doesn't have a child it's a datafield (I hope)
|
||||
}
|
||||
|
||||
return thriftSchemaTree;
|
||||
}
|
||||
|
||||
private static ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
|
||||
{
|
||||
if (!schemaElements.MoveNext())
|
||||
throw new ParquetException("Invalid parquet schema");
|
||||
|
||||
var current = schemaElements.Current;
|
||||
var parquetSchemaElement = new ParquetSchemaElement(current);
|
||||
for (int i = 0; i < current.NumChildren; i++)
|
||||
{
|
||||
parquetSchemaElement.AddChild(ReadSchemaTree(ref schemaElements));
|
||||
}
|
||||
return parquetSchemaElement;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string fileOrFolderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find file or folder at location: {fileOrFolderPath}");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(parquetFilePath, new() { UseDateOnlyTypeForDates = true }, cancellationToken);
|
||||
return new ParquetEngine(parquetFilePath, parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new FileReadException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<ParquetSchema, List<ParquetReader>>();
|
||||
foreach (var file in Engine.Helpers.ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(file, new() { UseDateOnlyTypeForDates = true }, cancellationToken);
|
||||
if (!fileGroups.ContainsKey(parquetReader.Schema))
|
||||
{
|
||||
fileGroups.Add(parquetReader.Schema, new List<ParquetReader>());
|
||||
}
|
||||
|
||||
fileGroups[parquetReader.Schema].Add(parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(System.IO.Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
Engine.Helpers.EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fileGroups.Keys.ToList()
|
||||
.Select(schema => schema.Fields.Select(f => f.Name).ToList()).ToList());
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
Engine.Helpers.EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
return new ParquetEngine(folderPath, fileGroups.Values.First().ToArray());
|
||||
}
|
||||
|
||||
private IEnumerable<(long RemainingOffset, ParquetReader ParquetReader)> GetReaders(long offset)
|
||||
{
|
||||
foreach (var parquetFile in _parquetFiles)
|
||||
{
|
||||
if (offset >= parquetFile.Metadata?.NumRows)
|
||||
{
|
||||
offset -= parquetFile.Metadata.NumRows;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return (offset, parquetFile);
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task WriteDataToParquetFileAsync(DataTable dataTable, string path,
|
||||
CancellationToken cancellationToken, IProgress<int> progress, Dictionary<string, string>? customMetadata)
|
||||
{
|
||||
var fields = new List<Field>(dataTable.Columns.Count);
|
||||
foreach (DataColumn column in dataTable.Columns)
|
||||
{
|
||||
fields.Add(this._schema.Fields
|
||||
.Where(field => field.Name.Equals(column.ColumnName, StringComparison.InvariantCulture))
|
||||
.First());
|
||||
}
|
||||
var parquetSchema = new ParquetSchema(fields);
|
||||
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
using var parquetWriter = await ParquetWriter.CreateAsync(parquetSchema, fs, cancellationToken: cancellationToken);
|
||||
parquetWriter.CompressionLevel = System.IO.Compression.CompressionLevel.Optimal;
|
||||
if (customMetadata is not null)
|
||||
parquetWriter.CustomMetadata = customMetadata;
|
||||
|
||||
const int MAX_ROWS_PER_ROWGROUP = 100_000; //Without batching we sometimes get "OverflowException: Array dimensions exceeded supported range" from Parquet.NET
|
||||
var batchIndex = 0;
|
||||
var isLastBatch = false;
|
||||
while (!isLastBatch)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
using var rowGroup = parquetWriter.CreateRowGroup();
|
||||
foreach (var dataField in parquetSchema.DataFields)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var type = dataField.IsNullable ? GetNullableVersion(dataField.ClrType) : dataField.ClrType;
|
||||
var values = GetColumnValues(dataTable, type, dataField.Name, batchIndex * MAX_ROWS_PER_ROWGROUP, MAX_ROWS_PER_ROWGROUP);
|
||||
var dataColumn = new Parquet.Data.DataColumn(dataField, values);
|
||||
await rowGroup.WriteColumnAsync(dataColumn, cancellationToken);
|
||||
progress.Report(values.Length); //No way to report progress for each row, so do it by column
|
||||
isLastBatch = values.Length < MAX_ROWS_PER_ROWGROUP;
|
||||
}
|
||||
batchIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => Engine.Helpers.EZDispose(_parquetFiles);
|
||||
|
||||
private static System.Type GetNullableVersion(System.Type sourceType) => sourceType == null
|
||||
? throw new ArgumentNullException(nameof(sourceType))
|
||||
: !sourceType.IsValueType
|
||||
|| (sourceType.IsGenericType
|
||||
&& sourceType.GetGenericTypeDefinition() == typeof(Nullable<>))
|
||||
? sourceType
|
||||
: typeof(Nullable<>).MakeGenericType(sourceType);
|
||||
|
||||
private static Array GetColumnValues(DataTable dataTable, System.Type type, string columnName, int skipCount, int fetchCount)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(dataTable);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(skipCount, 0);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fetchCount, 0);
|
||||
|
||||
if (!dataTable.Columns.Contains(columnName))
|
||||
throw new ArgumentException($"Column `{columnName}` does not exist in the datatable");
|
||||
|
||||
var recordCountAfterSkip = dataTable.Rows.Count - skipCount;
|
||||
var recordCountToRead = fetchCount > recordCountAfterSkip ? recordCountAfterSkip : fetchCount;
|
||||
var values = Array.CreateInstance(type, recordCountToRead);
|
||||
var index = 0;
|
||||
foreach (DataRow row in dataTable.Rows)
|
||||
{
|
||||
if (skipCount-- > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = row[columnName];
|
||||
if (value == DBNull.Value)
|
||||
value = null;
|
||||
else if (value is IByteArrayValue byteArray)
|
||||
value = byteArray.Data;
|
||||
else if (value is IListValue || value is IMapValue || value is IStructValue)
|
||||
throw new NotSupportedException("List, Map, and Struct types are currently not supported.");
|
||||
|
||||
values.SetValue(value, index++);
|
||||
|
||||
if (--fetchCount <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
305
src/ParquetViewer.Engine.ParquetNET/ParquetMetadata.cs
Normal file
305
src/ParquetViewer.Engine.ParquetNET/ParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
using Parquet.Meta;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class ParquetMetadata : IParquetMetadata
|
||||
{
|
||||
public int ParquetVersion { get; }
|
||||
|
||||
public int RowGroupCount { get; }
|
||||
|
||||
public string CreatedBy { get; }
|
||||
|
||||
public ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
|
||||
public IParquetSchemaElement SchemaTree { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public ParquetMetadata(FileMetaData thriftMetadata, ParquetSchemaElement schemaTree, int recordCount)
|
||||
{
|
||||
RowCount = recordCount;
|
||||
RowGroupCount = thriftMetadata.RowGroups.Count;
|
||||
ParquetVersion = thriftMetadata.Version;
|
||||
CreatedBy = thriftMetadata.CreatedBy ?? string.Empty;
|
||||
SchemaTree = schemaTree;
|
||||
|
||||
List<RowGroupMetadata> rowGroupMetadataList = new();
|
||||
var rowGroupIndex = -1;
|
||||
foreach (var rowGroup in thriftMetadata.RowGroups)
|
||||
{
|
||||
rowGroupIndex++;
|
||||
|
||||
List<RowGroupColumnMetadata> columnMetadataList = new();
|
||||
var columnIndex = -1;
|
||||
foreach (var column in rowGroup.Columns)
|
||||
{
|
||||
columnIndex++;
|
||||
if (column.MetaData is null)
|
||||
continue;
|
||||
|
||||
ParquetSchemaElement? field = null;
|
||||
try
|
||||
{
|
||||
var currentNode = schemaTree;
|
||||
foreach (var path in column.MetaData.PathInSchema)
|
||||
{
|
||||
currentNode = currentNode.GetChild(path);
|
||||
}
|
||||
field = currentNode;
|
||||
}
|
||||
catch
|
||||
{
|
||||
/*swallow*/
|
||||
}
|
||||
|
||||
var columnMetadata = new RowGroupColumnMetadata(
|
||||
columnIndex,
|
||||
string.Join("/", column.MetaData.PathInSchema),
|
||||
column.MetaData.Type.ToString(),
|
||||
(int)column.MetaData.NumValues,
|
||||
column.MetaData.TotalUncompressedSize,
|
||||
column.MetaData.TotalCompressedSize,
|
||||
column.MetaData.DataPageOffset,
|
||||
column.MetaData.IndexPageOffset,
|
||||
column.MetaData.DictionaryPageOffset,
|
||||
column.MetaData.Statistics is not null ? new RowGroupColumnStatistics(
|
||||
column.MetaData.Statistics.Min,
|
||||
column.MetaData.Statistics.Max,
|
||||
column.MetaData.Statistics.NullCount,
|
||||
column.MetaData.Statistics.DistinctCount,
|
||||
column.MetaData.Statistics.MinValue,
|
||||
column.MetaData.Statistics.MaxValue,
|
||||
column.MetaData.Statistics.IsMinValueExact,
|
||||
column.MetaData.Statistics.IsMaxValueExact,
|
||||
field
|
||||
) : null,
|
||||
column.MetaData.BloomFilterOffset,
|
||||
column.MetaData.BloomFilterLength);
|
||||
|
||||
columnMetadataList.Add(columnMetadata);
|
||||
}
|
||||
|
||||
rowGroupMetadataList.Add(new RowGroupMetadata(
|
||||
rowGroup.Ordinal.HasValue ? (int)rowGroup.Ordinal.Value : rowGroupIndex,
|
||||
(int)rowGroup.NumRows,
|
||||
rowGroup.Columns.Count,
|
||||
rowGroup.SortingColumns?.Select(sc => new SortingColumnMetadata(sc.ColumnIdx, sc.Descending, sc.NullsFirst))
|
||||
.Cast<ISortingColumnMetadata>().ToList(),
|
||||
columnMetadataList.ToList<IRowGroupColumnMetadata>(),
|
||||
rowGroup.FileOffset ?? 0,
|
||||
rowGroup.TotalByteSize,
|
||||
rowGroup.TotalCompressedSize ?? 0));
|
||||
}
|
||||
|
||||
RowGroups = rowGroupMetadataList.ToList<IRowGroupMetadata>();
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupMetadata : IRowGroupMetadata
|
||||
{
|
||||
public int Ordinal { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public int ColumnCount { get; }
|
||||
|
||||
public ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
|
||||
public ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
|
||||
public long FileOffset { get; }
|
||||
|
||||
public long TotalByteSize { get; }
|
||||
|
||||
public long TotalCompressedSize { get; }
|
||||
|
||||
public RowGroupMetadata(int ordinal, int rowCount, int columnCount, ICollection<ISortingColumnMetadata>? sortingColumnMetadata,
|
||||
ICollection<IRowGroupColumnMetadata>? columns, long fileOffset, long totalByteSize, long totalCompressedSize)
|
||||
{
|
||||
Ordinal = ordinal;
|
||||
RowCount = rowCount;
|
||||
ColumnCount = columnCount;
|
||||
SortingColumns = sortingColumnMetadata;
|
||||
Columns = columns;
|
||||
FileOffset = fileOffset;
|
||||
TotalByteSize = totalByteSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
}
|
||||
}
|
||||
|
||||
public class SortingColumnMetadata : ISortingColumnMetadata
|
||||
{
|
||||
public int ColumnIdx { get; }
|
||||
public bool Descending { get; }
|
||||
public bool NullsFirst { get; }
|
||||
|
||||
public SortingColumnMetadata(int columnIdx, bool descending, bool nullsFirst)
|
||||
{
|
||||
ColumnIdx = columnIdx;
|
||||
Descending = descending;
|
||||
NullsFirst = nullsFirst;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnMetadata : IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
|
||||
public string? PathInSchema { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
|
||||
public int? NumValues { get; }
|
||||
|
||||
public long? TotalUncompressedSize { get; }
|
||||
|
||||
public long? TotalCompressedSize { get; }
|
||||
|
||||
public long? DataPageOffset { get; }
|
||||
|
||||
public long? IndexPageOffset { get; }
|
||||
|
||||
public long? DictionaryPageOffset { get; }
|
||||
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
|
||||
public long? BloomFilterOffset { get; }
|
||||
|
||||
public long? BloomFilterLength { get; }
|
||||
|
||||
public RowGroupColumnMetadata(
|
||||
int? columnId,
|
||||
string? pathInSchema,
|
||||
string? type,
|
||||
int? numValues,
|
||||
long? totalUncompressedSize,
|
||||
long? totalCompressedSize,
|
||||
long? dataPageOffset,
|
||||
long? indexPageOffset,
|
||||
long? dictionaryPageOffset,
|
||||
RowGroupColumnStatistics? statistics,
|
||||
long? bloomFilterOffset,
|
||||
long? bloomFilterLength)
|
||||
{
|
||||
ColumnId = columnId;
|
||||
PathInSchema = pathInSchema;
|
||||
Type = type;
|
||||
NumValues = numValues;
|
||||
TotalUncompressedSize = totalUncompressedSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
DataPageOffset = dataPageOffset;
|
||||
IndexPageOffset = indexPageOffset;
|
||||
DictionaryPageOffset = dictionaryPageOffset;
|
||||
Statistics = statistics;
|
||||
BloomFilterOffset = bloomFilterOffset;
|
||||
BloomFilterLength = bloomFilterLength;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnStatistics : IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
|
||||
public RowGroupColumnStatistics(object? min, object? max, long? nullCount, long? distinctCount,
|
||||
object? minValue, object? maxValue, bool? isMinValueExact, bool? isMaxValueExact, ParquetSchemaElement? field)
|
||||
{
|
||||
if (min is not null && minValue is not null && Engine.Helpers.ByteArraysEqual(min as byte[], minValue as byte[]) == 0)
|
||||
min = null; //don't show the same data twice in the deprecated field
|
||||
if (max is not null && maxValue is not null && Engine.Helpers.ByteArraysEqual(max as byte[], maxValue as byte[]) == 0)
|
||||
max = null; //don't show the same data twice in the deprecated field
|
||||
|
||||
Min = field is not null ? TryDeserializeValue(min as byte[], field) : min;
|
||||
Max = field is not null ? TryDeserializeValue(max as byte[], field) : max;
|
||||
NullCount = nullCount;
|
||||
DistinctCount = distinctCount;
|
||||
MinValue = field is not null ? TryDeserializeValue(minValue as byte[], field) : minValue;
|
||||
MaxValue = field is not null ? TryDeserializeValue(maxValue as byte[], field) : maxValue;
|
||||
IsMinValueExact = isMinValueExact;
|
||||
IsMaxValueExact = isMaxValueExact;
|
||||
}
|
||||
|
||||
private object? TryDeserializeValue(byte[]? value, ParquetSchemaElement field)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (value == null || value.Length == 0)
|
||||
return value;
|
||||
|
||||
var type = field.ClrType;
|
||||
|
||||
if (type == typeof(string))
|
||||
return System.Text.Encoding.UTF8.GetString(value);
|
||||
|
||||
if (type == typeof(byte))
|
||||
return BitConverter.ToUInt32(value, 0);
|
||||
|
||||
if (type == typeof(sbyte))
|
||||
return BitConverter.ToInt32(value, 0);
|
||||
|
||||
if (type == typeof(short))
|
||||
return BitConverter.ToInt16(value, 0);
|
||||
|
||||
if (type == typeof(ushort))
|
||||
return BitConverter.ToUInt16(value, 0);
|
||||
|
||||
if (type == typeof(int))
|
||||
return BitConverter.ToInt32(value, 0);
|
||||
|
||||
if (type == typeof(uint))
|
||||
return BitConverter.ToUInt32(value, 0);
|
||||
|
||||
if (type == typeof(long))
|
||||
return BitConverter.ToInt64(value, 0);
|
||||
|
||||
if (type == typeof(ulong))
|
||||
return BitConverter.ToUInt64(value, 0);
|
||||
|
||||
if (type == typeof(float))
|
||||
return BitConverter.ToSingle(value, 0);
|
||||
|
||||
if (type == typeof(double))
|
||||
return BitConverter.ToDouble(value, 0);
|
||||
|
||||
if (type == typeof(bool))
|
||||
return BitConverter.ToBoolean(value, 0);
|
||||
|
||||
if (type == typeof(DateTime))
|
||||
{
|
||||
var ticks = BitConverter.ToInt64(value, 0);
|
||||
var timeUnit = field.SchemaElement.LogicalType?.TIMESTAMP?.Unit;
|
||||
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
return DateTime.UnixEpoch.AddMilliseconds(ticks);
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
return DateTime.UnixEpoch.AddMicroseconds(ticks);
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
return DateTime.UnixEpoch.AddMicroseconds(ticks / 1000);
|
||||
else
|
||||
return ticks;
|
||||
}
|
||||
|
||||
if (type == typeof(DateOnly))
|
||||
return DateOnly.FromDateTime(DateTime.UnixEpoch)
|
||||
.AddDays(BitConverter.ToInt32(value, 0));
|
||||
|
||||
if (type == typeof(Guid))
|
||||
return new Guid(value);
|
||||
|
||||
//give up
|
||||
return value;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,13 @@
|
|||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class ParquetSchemaElement
|
||||
public class ParquetSchemaElement : IParquetSchemaElement
|
||||
{
|
||||
public string Path => SchemaElement.Name;
|
||||
public string PathWithParent => string.Concat(this.Parent?.Parent is not null /*exclude root node*/ ? (this.Parent.Path + "/") : string.Empty, Path);
|
||||
|
|
@ -40,9 +43,9 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
if (this.DataField is not null)
|
||||
return FieldTypeId.Primitive;
|
||||
else if (this.SchemaElement.LogicalType?.LIST is not null || this.SchemaElement.ConvertedType == ConvertedType.LIST)
|
||||
else if (this.SchemaElement.LogicalType?.LIST is not null || this.SchemaElement.ConvertedType == Parquet.Meta.ConvertedType.LIST)
|
||||
return FieldTypeId.List;
|
||||
else if (this.SchemaElement.LogicalType?.MAP is not null || this.SchemaElement.ConvertedType == ConvertedType.MAP)
|
||||
else if (this.SchemaElement.LogicalType?.MAP is not null || this.SchemaElement.ConvertedType == Parquet.Meta.ConvertedType.MAP)
|
||||
return FieldTypeId.Map;
|
||||
else if (this.SchemaElement.NumChildren > 0) //Struct
|
||||
return FieldTypeId.Struct;
|
||||
|
|
@ -168,27 +171,175 @@ namespace ParquetViewer.Engine
|
|||
return field;
|
||||
}
|
||||
public bool BelongsToListField => this._systemFieldType == SystemFieldTypeId.ListItemNode;
|
||||
public bool BelongsToListOfStructsField =>
|
||||
public bool BelongsToListOfStructsField =>
|
||||
this.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode && this.Parent?.FieldType == FieldTypeId.Struct;
|
||||
public int NumberOfListParents => _parentsExcludingRoot.Count(field => field.SchemaElement.RepetitionType == FieldRepetitionType.REPEATED);
|
||||
public int NumberOfListParents => _parentsExcludingRoot.Count(@field => @field.SchemaElement.RepetitionType == FieldRepetitionType.REPEATED);
|
||||
|
||||
public int CurrentDefinitionLevel => _parentsExcludingRoot.Append(this)
|
||||
.Count(
|
||||
field => field.SchemaElement.RepetitionType == FieldRepetitionType.OPTIONAL
|
||||
|| (field._systemFieldType == SystemFieldTypeId.ListNode && field.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode) //Fixes list-of-lists tests
|
||||
@field => @field.SchemaElement.RepetitionType == FieldRepetitionType.OPTIONAL
|
||||
|| (@field._systemFieldType == SystemFieldTypeId.ListNode && @field.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode) //Fixes list-of-lists tests
|
||||
);
|
||||
|
||||
public bool IsPrimitive => FieldType == FieldTypeId.Primitive;
|
||||
|
||||
ICollection<IParquetSchemaElement> IParquetSchemaElement.Children => this.Children.ToList<IParquetSchemaElement>();
|
||||
|
||||
public System.Type ClrType => this.DataField?.ClrType ?? this.FieldType switch
|
||||
{
|
||||
FieldTypeId.List => typeof(ListValue),
|
||||
FieldTypeId.Map => typeof(MapValue),
|
||||
FieldTypeId.Struct => typeof(StructValueExt),
|
||||
_ => throw new InvalidOperationException("Cannot determine CLR type for primitive field without ClrType information."),
|
||||
};
|
||||
|
||||
public object? LogicalType => LogicalTypeToJSONObject(this.SchemaElement.LogicalType);
|
||||
|
||||
private static object? LogicalTypeToJSONObject(LogicalType? logicalType)
|
||||
{
|
||||
if (logicalType is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else if (logicalType.STRING is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.STRING) };
|
||||
}
|
||||
else if (logicalType.MAP is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.MAP) };
|
||||
}
|
||||
else if (logicalType.LIST is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.LIST) };
|
||||
}
|
||||
else if (logicalType.ENUM is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.ENUM) };
|
||||
}
|
||||
else if (logicalType.DECIMAL is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.DECIMAL),
|
||||
logicalType.DECIMAL.Scale,
|
||||
logicalType.DECIMAL.Precision
|
||||
};
|
||||
}
|
||||
else if (logicalType.DATE is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.DATE) };
|
||||
}
|
||||
else if (logicalType.TIME is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIME),
|
||||
logicalType.TIME.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIME.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.TIMESTAMP is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIMESTAMP),
|
||||
logicalType.TIMESTAMP.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIMESTAMP.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.INTEGER is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.INTEGER),
|
||||
logicalType.INTEGER.BitWidth,
|
||||
logicalType.INTEGER.IsSigned
|
||||
};
|
||||
}
|
||||
else if (logicalType.JSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.JSON) };
|
||||
}
|
||||
else if (logicalType.BSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.BSON) };
|
||||
}
|
||||
else if (logicalType.UUID is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.UUID) };
|
||||
}
|
||||
else if (logicalType.UNKNOWN is not null)
|
||||
{
|
||||
return new { Name = $"{logicalType.UNKNOWN.GetType().Name}" };
|
||||
}
|
||||
else
|
||||
{
|
||||
return new { Name = nameof(logicalType.UNKNOWN) };
|
||||
}
|
||||
}
|
||||
|
||||
static string TimeUnitToString(TimeUnit? timeUnit)
|
||||
{
|
||||
var timeUnitString = string.Empty;
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MILLIS);
|
||||
}
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MICROS);
|
||||
}
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.NANOS);
|
||||
}
|
||||
return timeUnitString;
|
||||
}
|
||||
|
||||
public RepetitionTypeId? RepetitionType => this.SchemaElement.RepetitionType switch
|
||||
{
|
||||
FieldRepetitionType.REQUIRED => RepetitionTypeId.Required,
|
||||
FieldRepetitionType.OPTIONAL => RepetitionTypeId.Optional,
|
||||
FieldRepetitionType.REPEATED => RepetitionTypeId.Repeated,
|
||||
_ => null
|
||||
};
|
||||
|
||||
public int? TypeLength => this.SchemaElement.TypeLength;
|
||||
public int? NumChildren => this.SchemaElement.NumChildren;
|
||||
public string? ConvertedType => this.SchemaElement.ConvertedType?.ToString();
|
||||
public int? Scale => this.SchemaElement.Scale;
|
||||
public int? Precision => this.SchemaElement.Precision;
|
||||
object? IParquetSchemaElement.LogicalType => this.LogicalType;
|
||||
public string? Type => this.SchemaElement.Type?.ToString();
|
||||
|
||||
private Exception GetSystemFieldAccessException(SystemFieldTypeId fieldType)
|
||||
=> new InvalidOperationException($"Can't get {fieldType} node from '{this.Parent?._systemFieldType}' " +
|
||||
$"for `{this.Parent?.Path + '/' + this.Path}` with types '{this.Parent?.FieldType.ToString() + '/' + this.FieldType.ToString()}'");
|
||||
|
||||
public enum FieldTypeId
|
||||
{
|
||||
Primitive,
|
||||
List,
|
||||
Struct,
|
||||
Map
|
||||
}
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChildCI(string name)
|
||||
=> GetChildCI(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChild(string name)
|
||||
=> GetChild(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListField()
|
||||
=> GetListField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListItemField()
|
||||
=> GetListItemField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetSingleOrByName(string name)
|
||||
=> GetSingleOrByName(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyValueField()
|
||||
=> GetMapKeyValueField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyField()
|
||||
=> GetMapKeyField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapValueField()
|
||||
=> GetMapValueField();
|
||||
|
||||
private enum SystemFieldTypeId
|
||||
{
|
||||
|
|
@ -200,4 +351,4 @@ namespace ParquetViewer.Engine
|
|||
MapValueNode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Parquet.Net" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class SimpleProgress : IProgress<int>
|
||||
{
|
||||
|
|
@ -11,4 +11,4 @@
|
|||
ProgressChanged?.Invoke(_progress);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
src/ParquetViewer.Engine.ParquetNET/Types/StructValue.cs
Normal file
14
src/ParquetViewer.Engine.ParquetNET/Types/StructValue.cs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
using ParquetViewer.Engine.Types;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET.Types
|
||||
{
|
||||
public class StructValueExt : StructValue
|
||||
{
|
||||
internal bool IsList { get; set; }
|
||||
|
||||
internal StructValueExt(DataRowLite data) : base(data)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,9 +4,9 @@ using static ParquetViewer.Engine.DataTableLite;
|
|||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
internal class DataTableLite
|
||||
public class DataTableLite
|
||||
{
|
||||
internal record ColumnLite(string Name, Type Type, ParquetSchemaElement ParentSchema, int Ordinal);
|
||||
public record ColumnLite(string Name, Type Type, IParquetSchemaElement ParentSchema, int Ordinal);
|
||||
|
||||
private int _ordinal = 0;
|
||||
private readonly Dictionary<string, ColumnLite> _columns = new();
|
||||
|
|
@ -30,10 +30,12 @@ namespace ParquetViewer.Engine
|
|||
|
||||
public DataTableLite(int expectedRowCount = 1000)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(expectedRowCount, 0);
|
||||
|
||||
this._rows = new(expectedRowCount);
|
||||
}
|
||||
|
||||
public ColumnLite AddColumn(string name, Type type, ParquetSchemaElement parent)
|
||||
public ColumnLite AddColumn(string name, Type type, IParquetSchemaElement parent)
|
||||
{
|
||||
if (_rows.Count > 0)
|
||||
{
|
||||
|
|
@ -132,12 +134,12 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
|
||||
internal class DataRowLite
|
||||
public class DataRowLite : IDataRowLite
|
||||
{
|
||||
public IReadOnlyCollection<string> ColumnNames => Columns.Keys;
|
||||
public Dictionary<string, ColumnLite> Columns { get; }
|
||||
public object[] Row { get; }
|
||||
public DataTableLite Table { get; }
|
||||
|
||||
public DataRowLite(object[] data, IEnumerable<ColumnLite> columns, DataTableLite table)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(data);
|
||||
|
|
@ -152,20 +154,6 @@ namespace ParquetViewer.Engine
|
|||
throw new ArgumentException($"Data length {data.Length} doesn't match number of columns {columns.Count()}", nameof(data));
|
||||
}
|
||||
}
|
||||
|
||||
public DataTable ToDataTable()
|
||||
{
|
||||
var dt = new DataTable();
|
||||
foreach (var column in this.Columns)
|
||||
{
|
||||
dt.Columns.Add(new DataColumn(column.Key, column.Value.Type));
|
||||
}
|
||||
var row = dt.NewRow();
|
||||
row.ItemArray = this.Row;
|
||||
dt.Rows.Add(row);
|
||||
return dt;
|
||||
}
|
||||
|
||||
public object GetValue(string columnName)
|
||||
{
|
||||
if (!this.Columns.ContainsKey(columnName))
|
||||
|
|
@ -185,4 +173,11 @@ namespace ParquetViewer.Engine
|
|||
throw new IndexOutOfRangeException($"Could not get value for column `{columnName}`");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface IDataRowLite
|
||||
{
|
||||
IReadOnlyCollection<string> ColumnNames { get; }
|
||||
object[] Row { get; }
|
||||
object GetValue(string columnName);
|
||||
}
|
||||
}
|
||||
17
src/ParquetViewer.Engine/Enums.cs
Normal file
17
src/ParquetViewer.Engine/Enums.cs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public enum RepetitionTypeId
|
||||
{
|
||||
Required,
|
||||
Optional,
|
||||
Repeated
|
||||
}
|
||||
|
||||
public enum FieldTypeId
|
||||
{
|
||||
Primitive,
|
||||
List,
|
||||
Struct,
|
||||
Map
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
public List<SkippedFile> SkippedFiles { get; private set; }
|
||||
|
||||
internal AllFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Could not open any files in directory.")
|
||||
public AllFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Could not open any files in directory.")
|
||||
{
|
||||
SkippedFiles = new List<SkippedFile>();
|
||||
if (skippedFiles is not null)
|
||||
|
|
@ -23,4 +23,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,10 @@
|
|||
public const int MAX_DECIMAL_PRECISION = 29;
|
||||
public const int MAX_DECIMAL_SCALE = 28;
|
||||
|
||||
public string FieldName { get; }
|
||||
public int Precision { get; }
|
||||
public int Scale { get; }
|
||||
public bool HasDetailedInfo => FieldName is not null || Precision is not null || Scale is not null;
|
||||
public string? FieldName { get; }
|
||||
public int? Precision { get; }
|
||||
public int? Scale { get; }
|
||||
|
||||
public DecimalOverflowException(string fieldName, int precision, int scale, OverflowException overflowEx) : base(overflowEx.Message, overflowEx)
|
||||
{
|
||||
|
|
@ -16,5 +17,10 @@
|
|||
this.Precision = precision;
|
||||
this.Scale = scale;
|
||||
}
|
||||
|
||||
public DecimalOverflowException(OverflowException overflowEx) : base(overflowEx.Message, overflowEx)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public MalformedFieldException(string message, Exception? ex = null) : base(message, ex) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,11 +2,11 @@
|
|||
{
|
||||
public class MultipleSchemasFoundException : Exception
|
||||
{
|
||||
public List<Parquet.Schema.ParquetSchema> Schemas;
|
||||
public List<List<string>> Schemas;
|
||||
|
||||
internal MultipleSchemasFoundException(List<Parquet.Schema.ParquetSchema> parquetSchemas) : base("Multiple schemas found in directory.")
|
||||
public MultipleSchemasFoundException(List<List<string>> parquetSchemas) : base("Multiple schemas found in directory.")
|
||||
{
|
||||
Schemas = parquetSchemas ?? new List<Parquet.Schema.ParquetSchema>();
|
||||
Schemas = parquetSchemas ?? new List<List<string>>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
namespace ParquetViewer.Engine.Exceptions
|
||||
{
|
||||
internal class ParquetEngineException : Exception
|
||||
public class ParquetEngineException : Exception
|
||||
{
|
||||
public ParquetEngineException(string? message = null, Exception? exception = null) : base(message, exception)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
public List<SkippedFile> SkippedFiles { get; private set; }
|
||||
|
||||
internal SomeFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Some files could not be opened.")
|
||||
public SomeFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Some files could not be opened.")
|
||||
{
|
||||
SkippedFiles = new List<SkippedFile>();
|
||||
|
||||
|
|
@ -20,4 +20,4 @@
|
|||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,51 @@
|
|||
using System.Numerics;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Numerics;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
internal static class Helpers
|
||||
public static class Helpers
|
||||
{
|
||||
public static IEnumerable<(object?, object?)> PairEnumerables(IEnumerable<object?> enumerable1, IEnumerable<object?> enumerable2, object? missingIndexValue = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 || hasMore2)
|
||||
{
|
||||
yield return (hasMore1 ? enumerator1.Current : missingIndexValue, hasMore2 ? enumerator2.Current : missingIndexValue);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
}
|
||||
}
|
||||
|
||||
public static IEnumerable<(T, R)> PairEnumerables<T, R>(IEnumerable<T> enumerable1, IEnumerable<R> enumerable2)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 && hasMore2)
|
||||
{
|
||||
yield return (enumerator1.Current, enumerator2.Current);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
}
|
||||
|
||||
if (hasMore1 || hasMore2)
|
||||
{
|
||||
throw new InvalidDataException("Enumerables are of different lengths.");
|
||||
}
|
||||
}
|
||||
|
||||
public static int CompareTo(object? value, object? otherValue)
|
||||
{
|
||||
value ??= DBNull.Value;
|
||||
|
|
@ -29,76 +71,88 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
|
||||
#region Dubious Functions
|
||||
//This logic is a cluster f... right now. It blends https://www.aloneguid.uk/posts/2023/04/parquet-empty-vs-null
|
||||
//with some of my understanding of how the dremel algorithm works. No way will it work for all cases.
|
||||
|
||||
public static bool IsNull(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] <= field.CurrentDefinitionLevel - 1;
|
||||
|
||||
public static bool IsEmpty(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] == field.CurrentDefinitionLevel
|
||||
&& field.DataField?.MaxDefinitionLevel != dataColumn.DefinitionLevels[index] /*Fixes STRUCT_TYPE_TEST*/;
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Some parquet writers don't write null entries into the data array for empty and null lists.
|
||||
/// This throws off our logic so lets find all empty/null lists and add a null entry into
|
||||
/// the data array to align it with the repetition/definition levels.
|
||||
/// </summary>
|
||||
/// <param name="dataColumn">The parquet data column</param>
|
||||
public static IEnumerable<object> GetDataWithPaddedNulls(this Parquet.Data.DataColumn dataColumn, ParquetSchemaElement field)
|
||||
public static void WriteValue(Utf8JsonWriterWithRunningLength jsonWriter, object value, bool truncateForDisplay)
|
||||
{
|
||||
var dataEnumerable = dataColumn.Data.Cast<object?>().Select(d => d ?? DBNull.Value);
|
||||
|
||||
int levelCount = dataColumn.DefinitionLevels?.Length ?? 0;
|
||||
if (levelCount > dataColumn.Data.Length)
|
||||
if (value is null)
|
||||
{
|
||||
dataEnumerable = GetDataWithPaddedNulls();
|
||||
|
||||
IEnumerable<object> GetDataWithPaddedNulls()
|
||||
{
|
||||
var index = -1;
|
||||
foreach (var data in dataColumn.Data)
|
||||
{
|
||||
index++;
|
||||
|
||||
while (dataColumn.IsEmpty(index, field) || dataColumn.IsNull(index, field))
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
|
||||
yield return data ?? DBNull.Value;
|
||||
}
|
||||
|
||||
//Need to handle case where last N rows are null/empty
|
||||
while (levelCount > index + 1)
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
//Value should never be null as we should be replacing all those with DBNull.Value
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
return dataEnumerable;
|
||||
}
|
||||
|
||||
public static IEnumerable<(object?, object?)> PairEnumerables(IEnumerable<object?> enumerable1, IEnumerable<object?> enumerable2, object? missingIndexValue = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 || hasMore2)
|
||||
else if (value == DBNull.Value)
|
||||
{
|
||||
yield return (hasMore1 ? enumerator1.Current : missingIndexValue, hasMore2 ? enumerator2.Current : missingIndexValue);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
jsonWriter.WriteNullValue();
|
||||
}
|
||||
else if (value is string str)
|
||||
{
|
||||
jsonWriter.WriteStringValue(str);
|
||||
}
|
||||
else if (value is bool @bool)
|
||||
{
|
||||
jsonWriter.WriteBooleanValue(@bool);
|
||||
}
|
||||
else if (value.GetType().IsNumber())
|
||||
{
|
||||
jsonWriter.WriteNumberValue(Convert.ToDecimal(value));
|
||||
}
|
||||
else if (value is IStructValue @struct)
|
||||
{
|
||||
var json = @struct.ToJSON(out var success);
|
||||
if (success)
|
||||
jsonWriter.WriteRawValue(json);
|
||||
else
|
||||
jsonWriter.WriteStringValue(json);
|
||||
}
|
||||
else if (value is IMapValue map)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach ((object mapKey, object mapValue) in map)
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
jsonWriter.WritePropertyName("key");
|
||||
WriteValue(jsonWriter, mapKey, truncateForDisplay);
|
||||
jsonWriter.WritePropertyName("value");
|
||||
WriteValue(jsonWriter, mapValue, truncateForDisplay);
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is IListValue list)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach (var item in list)
|
||||
{
|
||||
WriteValue(jsonWriter, item, truncateForDisplay);
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is IByteArrayValue byteArray /*&& truncateForDisplay //should use the entire byte array if
|
||||
* we're not truncating for display? Seems kind of unreasonable
|
||||
* for users to rely on binary data within a Struct value preview.*/)
|
||||
{
|
||||
const int byteArrayMaxStringLength = 24; //arbitrary number that I think looks good
|
||||
var byteArrayAsString = byteArray.ToStringTruncated(byteArrayMaxStringLength);
|
||||
jsonWriter.WriteStringValue(byteArrayAsString);
|
||||
}
|
||||
else if (value is DateTime dt)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dt.ToString(ParquetEngineSettings.DateDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dt.ToString());
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dateOnly.ToString());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Everything else just try to write it as string
|
||||
jsonWriter.WriteStringValue(value.ToString()!);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -107,5 +161,42 @@ namespace ParquetViewer.Engine
|
|||
/// </summary>
|
||||
public static bool IsNumber(this Type type) =>
|
||||
Array.Exists(type.GetInterfaces(), i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(INumber<>));
|
||||
|
||||
public static IEnumerable<string> ListParquetFiles(string folderPath)
|
||||
{
|
||||
var parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories)
|
||||
.Where(file =>
|
||||
file.EndsWith(".parquet") ||
|
||||
file.EndsWith(".parquet.gzip") ||
|
||||
file.EndsWith(".parquet.gz")
|
||||
);
|
||||
|
||||
if (!parquetFiles.Any())
|
||||
{
|
||||
//Check for extensionless files
|
||||
parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories);
|
||||
}
|
||||
|
||||
return parquetFiles.OrderBy(filename => filename);
|
||||
}
|
||||
|
||||
public static void EZDispose(IEnumerable<IDisposable> disposables)
|
||||
{
|
||||
if (disposables is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var disposable in disposables)
|
||||
{
|
||||
try
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
catch { /* Swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
public static int ByteArraysEqual(ReadOnlySpan<byte> a1, ReadOnlySpan<byte> a2) => a1.SequenceCompareTo(a2);
|
||||
}
|
||||
}
|
||||
}
|
||||
20
src/ParquetViewer.Engine/IParquetEngine.cs
Normal file
20
src/ParquetViewer.Engine/IParquetEngine.cs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetEngine : IDisposable
|
||||
{
|
||||
List<string> Fields { get; }
|
||||
long RecordCount { get; }
|
||||
int NumberOfPartitions { get; }
|
||||
Dictionary<string, string> CustomMetadata { get; }
|
||||
string Path { get; }
|
||||
IParquetMetadata Metadata { get; }
|
||||
|
||||
Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount,
|
||||
CancellationToken cancellationToken, IProgress<int>? progress = null);
|
||||
|
||||
Task WriteDataToParquetFileAsync(DataTable dataTable, string path, CancellationToken cancellationToken,
|
||||
IProgress<int> progress, Dictionary<string, string>? customMetadata);
|
||||
}
|
||||
}
|
||||
59
src/ParquetViewer.Engine/IParquetMetadata.cs
Normal file
59
src/ParquetViewer.Engine/IParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetMetadata
|
||||
{
|
||||
int ParquetVersion { get; }
|
||||
int RowGroupCount { get; }
|
||||
int RowCount { get; }
|
||||
string CreatedBy { get; }
|
||||
ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
IParquetSchemaElement SchemaTree { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupMetadata
|
||||
{
|
||||
int Ordinal { get; }
|
||||
int RowCount { get; }
|
||||
int ColumnCount { get; }
|
||||
ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
long FileOffset { get; }
|
||||
long TotalByteSize { get; }
|
||||
long TotalCompressedSize { get; }
|
||||
}
|
||||
|
||||
public interface ISortingColumnMetadata
|
||||
{
|
||||
public int ColumnIdx { get; }
|
||||
public bool Descending { get; }
|
||||
public bool NullsFirst { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
public string? PathInSchema { get; }
|
||||
public string? Type { get; }
|
||||
public int? NumValues { get; }
|
||||
public long? TotalUncompressedSize { get; }
|
||||
public long? TotalCompressedSize { get; }
|
||||
public long? DataPageOffset { get; }
|
||||
public long? IndexPageOffset { get; }
|
||||
public long? DictionaryPageOffset { get; }
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
public long? BloomFilterOffset { get; }
|
||||
public long? BloomFilterLength { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
}
|
||||
}
|
||||
69
src/ParquetViewer.Engine/IParquetSchemaElement.cs
Normal file
69
src/ParquetViewer.Engine/IParquetSchemaElement.cs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetSchemaElement<T> : IParquetSchemaElement where T : IParquetSchemaElement
|
||||
{
|
||||
new string Path { get; }
|
||||
|
||||
new ICollection<T> Children { get; }
|
||||
|
||||
new Type ClrType { get; }
|
||||
|
||||
new FieldTypeId FieldType { get; }
|
||||
|
||||
new RepetitionTypeId? RepetitionType { get; }
|
||||
|
||||
new bool IsPrimitive { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Case insensitive version of <see cref="GetChild(string)"/>
|
||||
/// Only exists to deal with non-standard Parquet implementations
|
||||
/// </summary>
|
||||
new T GetChildCI(string name);
|
||||
new T GetChild(string name);
|
||||
new T GetListField();
|
||||
new T GetListItemField();
|
||||
new T GetSingleOrByName(string name);
|
||||
new T GetMapKeyValueField();
|
||||
new T GetMapKeyField();
|
||||
new T GetMapValueField();
|
||||
}
|
||||
|
||||
public interface IParquetSchemaElement
|
||||
{
|
||||
string Path { get; }
|
||||
|
||||
ICollection<IParquetSchemaElement> Children { get; }
|
||||
|
||||
[JsonIgnore]
|
||||
Type ClrType { get; }
|
||||
|
||||
FieldTypeId FieldType { get; }
|
||||
|
||||
RepetitionTypeId? RepetitionType { get; }
|
||||
|
||||
bool IsPrimitive { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
public int? TypeLength { get; }
|
||||
public int? NumChildren { get; }
|
||||
public string? ConvertedType { get; }
|
||||
public int? Scale { get; }
|
||||
public int? Precision { get; }
|
||||
public object? LogicalType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Case insensitive version of <see cref="GetChild(string)"/>
|
||||
/// Only exists to deal with non-standard Parquet implementations
|
||||
/// </summary>
|
||||
IParquetSchemaElement GetChildCI(string name);
|
||||
IParquetSchemaElement GetChild(string name);
|
||||
IParquetSchemaElement GetListField();
|
||||
IParquetSchemaElement GetListItemField();
|
||||
IParquetSchemaElement GetSingleOrByName(string name);
|
||||
IParquetSchemaElement GetMapKeyValueField();
|
||||
IParquetSchemaElement GetMapKeyField();
|
||||
IParquetSchemaElement GetMapValueField();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
using Parquet;
|
||||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public partial class ParquetEngine : IDisposable
|
||||
{
|
||||
private readonly ParquetReader[] _parquetFiles;
|
||||
private long? _recordCount;
|
||||
|
||||
public long RecordCount => _recordCount ??= _parquetFiles.Sum(pf => pf.Metadata?.NumRows ?? 0);
|
||||
|
||||
public int NumberOfPartitions => _parquetFiles.Length;
|
||||
|
||||
private ParquetReader DefaultReader => _parquetFiles.FirstOrDefault() ?? throw new ParquetEngineException("No parquet readers available");
|
||||
|
||||
public List<string> Fields => DefaultReader.Schema.Fields.Select(f => f.Name).ToList();
|
||||
|
||||
public FileMetaData ThriftMetadata => DefaultReader.Metadata ?? throw new ParquetEngineException("No thrift metadata was found");
|
||||
|
||||
public Dictionary<string, string> CustomMetadata => DefaultReader.CustomMetadata;
|
||||
|
||||
public ParquetSchema Schema => DefaultReader.Schema;
|
||||
|
||||
private ParquetSchemaElement? _parquetSchemaTree;
|
||||
public ParquetSchemaElement ParquetSchemaTree => _parquetSchemaTree ??= BuildParquetSchemaTree();
|
||||
|
||||
public string OpenFileOrFolderPath { get; }
|
||||
|
||||
private ParquetEngine(string fileOrFolderPath, params ParquetReader[] parquetFiles)
|
||||
{
|
||||
_parquetFiles = parquetFiles ?? throw new ArgumentNullException(nameof(parquetFiles), "No parquet readers provided");
|
||||
OpenFileOrFolderPath = fileOrFolderPath;
|
||||
}
|
||||
|
||||
private ParquetSchemaElement BuildParquetSchemaTree()
|
||||
{
|
||||
var thriftSchema = ThriftMetadata.Schema ?? throw new ParquetException("No thrift metadata was found");
|
||||
var schemaElements = thriftSchema.GetEnumerator();
|
||||
var thriftSchemaTree = ReadSchemaTree(ref schemaElements);
|
||||
|
||||
foreach (var dataField in Schema.GetDataFields())
|
||||
{
|
||||
var field = thriftSchemaTree.GetChild(dataField.Path.FirstPart ?? throw new MalformedFieldException($"Field has no schema path: `{dataField.Name}`"));
|
||||
for (var i = 1; i < dataField.Path.Length; i++)
|
||||
{
|
||||
field = field.GetChild(dataField.Path[i]);
|
||||
}
|
||||
field.DataField = dataField; //if it doesn't have a child it's a datafield (I hope)
|
||||
}
|
||||
|
||||
return thriftSchemaTree;
|
||||
}
|
||||
|
||||
private static ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
|
||||
{
|
||||
if (!schemaElements.MoveNext())
|
||||
throw new ParquetException("Invalid parquet schema");
|
||||
|
||||
var current = schemaElements.Current;
|
||||
var parquetSchemaElement = new ParquetSchemaElement(current);
|
||||
for (int i = 0; i < current.NumChildren; i++)
|
||||
{
|
||||
parquetSchemaElement.AddChild(ReadSchemaTree(ref schemaElements));
|
||||
}
|
||||
return parquetSchemaElement;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string fileOrFolderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find file or folder at location: {fileOrFolderPath}");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(parquetFilePath, null, cancellationToken);
|
||||
return new ParquetEngine(parquetFilePath, parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new FileReadException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<ParquetSchema, List<ParquetReader>>();
|
||||
foreach (var file in ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(file, null, cancellationToken);
|
||||
if (!fileGroups.ContainsKey(parquetReader.Schema))
|
||||
{
|
||||
fileGroups.Add(parquetReader.Schema, new List<ParquetReader>());
|
||||
}
|
||||
|
||||
fileGroups[parquetReader.Schema].Add(parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fileGroups.Keys.ToList());
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
return new ParquetEngine(folderPath, fileGroups.Values.First().ToArray());
|
||||
}
|
||||
|
||||
private IEnumerable<(long RemainingOffset, ParquetReader ParquetReader)> GetReaders(long offset)
|
||||
{
|
||||
foreach (var parquetFile in _parquetFiles)
|
||||
{
|
||||
if (offset >= parquetFile.Metadata?.NumRows)
|
||||
{
|
||||
offset -= parquetFile.Metadata.NumRows;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return (offset, parquetFile);
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ListParquetFiles(string folderPath)
|
||||
{
|
||||
var parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories)
|
||||
.Where(file =>
|
||||
file.EndsWith(".parquet") ||
|
||||
file.EndsWith(".parquet.gzip") ||
|
||||
file.EndsWith(".parquet.gz")
|
||||
);
|
||||
|
||||
if (!parquetFiles.Any())
|
||||
{
|
||||
//Check for extensionless files
|
||||
parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories);
|
||||
}
|
||||
|
||||
return parquetFiles.OrderBy(filename => filename);
|
||||
}
|
||||
|
||||
private static void EZDispose(IEnumerable<IDisposable> disposables)
|
||||
{
|
||||
if (disposables is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var disposable in disposables)
|
||||
{
|
||||
try
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
catch { /* Swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => EZDispose(_parquetFiles);
|
||||
}
|
||||
}
|
||||
|
|
@ -9,8 +9,9 @@ namespace ParquetViewer.Engine
|
|||
/// By default Parquet Engine will render Dates using the system culture's format.
|
||||
/// By setting this value a custom date format can be used instead.
|
||||
/// </summary>
|
||||
/// <remarks>Parquet Engine renders dates when converting <see cref="ListValue"/>,
|
||||
/// <see cref="StructValue"/>, and <see cref="MapValue"/> types to string.</remarks>
|
||||
/// <remarks>Parquet Engine renders dates when converting <see cref="IListValue"/>,
|
||||
/// <see cref="IStructValue"/>, and <see cref="IMapValue"/> types to string.</remarks>
|
||||
public static string? DateDisplayFormat { get; set; }
|
||||
public static string? DateOnlyDisplayFormat { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,9 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
|
|
@ -16,7 +13,4 @@
|
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Parquet.Net" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
@ -1,41 +1,39 @@
|
|||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using static ParquetViewer.Engine.Types.IByteArrayValue;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class ByteArrayValue : IComparable<ByteArrayValue>, IComparable
|
||||
public class ByteArrayValue : IByteArrayValue
|
||||
{
|
||||
public string Name { get; }
|
||||
public byte[] Data { get; }
|
||||
|
||||
|
||||
private DisplayFormat[]? _possibleDisplayFormats;
|
||||
public DisplayFormat[] PossibleDisplayFormats =>
|
||||
_possibleDisplayFormats ??= this.CalculatePossibleDisplayFormats();
|
||||
_possibleDisplayFormats ??= CalculatePossibleDisplayFormats();
|
||||
|
||||
public ByteArrayValue(string name, byte[] data)
|
||||
public ByteArrayValue(byte[] data)
|
||||
{
|
||||
this.Name = name;
|
||||
this.Data = data;
|
||||
Data = data;
|
||||
}
|
||||
|
||||
public override string ToString() => BitConverter.ToString(this.Data);
|
||||
public override string ToString() => BitConverter.ToString(Data);
|
||||
|
||||
public int CompareTo(ByteArrayValue? other)
|
||||
public int CompareTo(IByteArrayValue? other)
|
||||
{
|
||||
if (other?.Data is null)
|
||||
return 1;
|
||||
else if (this.Data is null)
|
||||
else if (Data is null)
|
||||
return -1;
|
||||
else
|
||||
return ByteArraysEqual(this.Data, other.Data);
|
||||
return Helpers.ByteArraysEqual(Data, other.Data);
|
||||
}
|
||||
|
||||
private static int ByteArraysEqual(ReadOnlySpan<byte> a1, ReadOnlySpan<byte> a2) => a1.SequenceCompareTo(a2);
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is ByteArrayValue byteArray)
|
||||
if (obj is IByteArrayValue byteArray)
|
||||
return CompareTo(byteArray);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -84,22 +82,6 @@ namespace ParquetViewer.Engine.Types
|
|||
return possibleDisplayFormats.ToArray();
|
||||
}
|
||||
|
||||
public enum DisplayFormat
|
||||
{
|
||||
Hex = 0, //Default hexadecimal format
|
||||
IPv6, // 16 bytes
|
||||
IPv4, // 4 bytes
|
||||
Guid, // 16 bytes
|
||||
Short, // 2 bytes
|
||||
Integer, // 4 bytes
|
||||
Long, // 8 bytes
|
||||
Float, // 4 bytes
|
||||
Double, // 8 bytes
|
||||
ASCII, // ASCII text if printable (any size)
|
||||
Base64, // Base64 encoded string (any size)
|
||||
Size // Size information (any size)
|
||||
}
|
||||
|
||||
#region Type Conversions
|
||||
public bool ToIPv6([NotNullWhen(true)] out IPAddress? ipAddress)
|
||||
{
|
||||
|
|
@ -165,7 +147,7 @@ namespace ParquetViewer.Engine.Types
|
|||
if (Data.Length == 0)
|
||||
return false;
|
||||
|
||||
var printableCount = this.Data.Sum(@byte =>
|
||||
var printableCount = Data.Sum(@byte =>
|
||||
@byte >= ' ' /*32*/ && @byte <= '~' /*126*/ //Printable ASCII range
|
||||
? 1 : 0);
|
||||
|
||||
|
|
@ -305,4 +287,4 @@ namespace ParquetViewer.Engine.Types
|
|||
+ BitConverter.ToString(Data, Data.Length - (maxBytesToRender / 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
45
src/ParquetViewer.Engine/Types/IByteArrayValue.cs
Normal file
45
src/ParquetViewer.Engine/Types/IByteArrayValue.cs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IByteArrayValue : IComparable<IByteArrayValue>, IComparable
|
||||
{
|
||||
byte[] Data { get; }
|
||||
DisplayFormat[] PossibleDisplayFormats { get; }
|
||||
|
||||
public enum DisplayFormat
|
||||
{
|
||||
Hex = 0, //Default hexadecimal format
|
||||
IPv6, // 16 bytes
|
||||
IPv4, // 4 bytes
|
||||
Guid, // 16 bytes
|
||||
Short, // 2 bytes
|
||||
Integer, // 4 bytes
|
||||
Long, // 8 bytes
|
||||
Float, // 4 bytes
|
||||
Double, // 8 bytes
|
||||
ASCII, // ASCII text if printable (any size)
|
||||
Base64, // Base64 encoded string (any size)
|
||||
Size // Size information (any size)
|
||||
}
|
||||
|
||||
string ToStringTruncated(int desiredLength);
|
||||
|
||||
bool ToIPv6([NotNullWhen(true)] out IPAddress? ipAddress);
|
||||
bool ToIPv4([NotNullWhen(true)] out IPAddress? ipAddress);
|
||||
bool ToGuid([NotNullWhen(true)] out Guid? guid);
|
||||
bool ToASCII([NotNullWhen(true)] out string? ascii);
|
||||
bool ToShort([NotNullWhen(true)] out short? @short);
|
||||
bool ToInteger([NotNullWhen(true)] out int? @int);
|
||||
bool ToLong([NotNullWhen(true)] out long? @long);
|
||||
bool ToFloat([NotNullWhen(true)] out float? @float);
|
||||
bool ToDouble([NotNullWhen(true)] out double? @double);
|
||||
void ToBase64(out string base64);
|
||||
}
|
||||
}
|
||||
15
src/ParquetViewer.Engine/Types/IListValue.cs
Normal file
15
src/ParquetViewer.Engine/Types/IListValue.cs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IListValue : IComparable<IListValue>, IComparable, IEnumerable<object>
|
||||
{
|
||||
public IList Data { get; }
|
||||
public Type Type { get; }
|
||||
}
|
||||
}
|
||||
14
src/ParquetViewer.Engine/Types/IMapValue.cs
Normal file
14
src/ParquetViewer.Engine/Types/IMapValue.cs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IMapValue : IComparable<IMapValue>, IComparable, IEnumerable<(object Key, object Value)>
|
||||
{
|
||||
public ArrayList Keys { get; }
|
||||
public Type KeyType { get; }
|
||||
public ArrayList Values { get; }
|
||||
public Type ValueType { get; }
|
||||
(object Key, object Value) GetMapValue(int index);
|
||||
int Length { get; }
|
||||
}
|
||||
}
|
||||
17
src/ParquetViewer.Engine/Types/IStructValue.cs
Normal file
17
src/ParquetViewer.Engine/Types/IStructValue.cs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IStructValue : IComparable<IStructValue>, IComparable
|
||||
{
|
||||
public IDataRowLite Data { get; }
|
||||
|
||||
IReadOnlyCollection<string> FieldNames { get; }
|
||||
|
||||
string ToStringTruncated(int desiredLength);
|
||||
|
||||
DataTable ToDataTable();
|
||||
|
||||
string ToJSON(out bool success, int? desiredLength = null);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +1,13 @@
|
|||
using System.Collections;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public class ListValue : IComparable<ListValue>, IComparable, IEnumerable<object>
|
||||
public class ListValue : IListValue
|
||||
{
|
||||
public IList Data { get; }
|
||||
public Type? Type { get; private set; }
|
||||
|
||||
public int Length => Data.Count;
|
||||
|
||||
public ListValue(Array data)
|
||||
{
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
Type = Data.GetType().GetElementType();
|
||||
}
|
||||
public Type Type { get; }
|
||||
|
||||
public ListValue(ArrayList data, Type type)
|
||||
{
|
||||
|
|
@ -35,14 +29,14 @@ namespace ParquetViewer.Engine.Types
|
|||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
StructValue.WriteValue(jsonWriter, this, false);
|
||||
Helpers.WriteValue(jsonWriter, this, false);
|
||||
}
|
||||
ms.Position = 0;
|
||||
using var sr = new StreamReader(ms);
|
||||
return sr.ReadToEnd();
|
||||
}
|
||||
|
||||
public int CompareTo(ListValue? other)
|
||||
public int CompareTo(IListValue? other)
|
||||
{
|
||||
if (other is null)
|
||||
return 1;
|
||||
|
|
@ -72,7 +66,7 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is ListValue list)
|
||||
if (obj is IListValue list)
|
||||
return CompareTo(list);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -88,4 +82,4 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,15 +1,21 @@
|
|||
using System.Collections;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public class MapValue : IComparable<MapValue>, IComparable, IEnumerable<(object Key, object Value)>
|
||||
public class MapValue : IMapValue
|
||||
{
|
||||
public ArrayList Keys { get; }
|
||||
|
||||
public Type KeyType { get; }
|
||||
|
||||
public ArrayList Values { get; }
|
||||
|
||||
public Type ValueType { get; }
|
||||
|
||||
public int Length => Math.Max(Keys.Count, Values.Count);
|
||||
|
||||
public MapValue(ArrayList keys, Type keyType, ArrayList values, Type valueType)
|
||||
{
|
||||
if (keys is null)
|
||||
|
|
@ -36,49 +42,10 @@ namespace ParquetViewer.Engine.Types
|
|||
ValueType = valueType;
|
||||
}
|
||||
|
||||
public int Length => Keys.Count;
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var mapValuesStringBuilder = new StringBuilder("[");
|
||||
for (var i = 0; i < Length; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
mapValuesStringBuilder.Append(',');
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(FormatString(GetMapValue(i)));
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(']');
|
||||
return mapValuesStringBuilder.ToString();
|
||||
|
||||
static string FormatString((object Key, object Value) map)
|
||||
{
|
||||
string key;
|
||||
if (map.Key is DateTime dt && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
key = dt.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else
|
||||
key = map.Key?.ToString() ?? string.Empty;
|
||||
|
||||
string value;
|
||||
if (map.Value is DateTime dt2 && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
value = dt2.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else
|
||||
value = map.Value?.ToString() ?? string.Empty;
|
||||
|
||||
return $"({key},{value})";
|
||||
}
|
||||
}
|
||||
|
||||
private (object Key, object Value) GetMapValue(int index)
|
||||
=> (Keys[index] ?? DBNull.Value, Values[index] ?? DBNull.Value);
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by Key first, then Value.
|
||||
/// </summary>
|
||||
public int CompareTo(MapValue? other)
|
||||
public int CompareTo(IMapValue? other)
|
||||
{
|
||||
if (other is null)
|
||||
return 1;
|
||||
|
|
@ -113,7 +80,7 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is MapValue mapValue)
|
||||
if (obj is IMapValue mapValue)
|
||||
return CompareTo(mapValue);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -127,6 +94,47 @@ namespace ParquetViewer.Engine.Types
|
|||
}
|
||||
}
|
||||
|
||||
public (object Key, object Value) GetMapValue(int index)
|
||||
=> (Keys[index] ?? DBNull.Value, Values[index] ?? DBNull.Value);
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var mapValuesStringBuilder = new StringBuilder("[");
|
||||
for (var i = 0; i < Length; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
mapValuesStringBuilder.Append(',');
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(FormatString(GetMapValue(i)));
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(']');
|
||||
return mapValuesStringBuilder.ToString();
|
||||
|
||||
static string FormatString((object Key, object Value) map)
|
||||
{
|
||||
string key;
|
||||
if (map.Key is DateTime dt && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
key = dt.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else if (map.Key is DateOnly dateOnly && ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
key = dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat);
|
||||
else
|
||||
key = map.Key?.ToString() ?? string.Empty;
|
||||
|
||||
string value;
|
||||
if (map.Value is DateTime dt2 && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
value = dt2.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else if (map.Value is DateOnly dateOnly && ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
value = dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat);
|
||||
else
|
||||
value = map.Value?.ToString() ?? string.Empty;
|
||||
|
||||
return $"({key},{value})";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
using System.Collections.Immutable;
|
||||
using System.Data;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class StructValue : IComparable<StructValue>, IComparable
|
||||
{
|
||||
public string Name { get; }
|
||||
|
||||
internal DataRowLite Data { get; }
|
||||
|
||||
internal bool IsList { get; set; }
|
||||
|
||||
//TODO: Add a public constructor?
|
||||
internal StructValue(string name, DataRowLite data)
|
||||
{
|
||||
Name = name ?? throw new ArgumentNullException(nameof(name));
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
public override string ToString() => ToJSON(out _);
|
||||
|
||||
public string ToStringTruncated(int desiredLength) => ToJSON(out _, desiredLength);
|
||||
|
||||
private string ToJSON(out bool success, int? desiredLength = null)
|
||||
{
|
||||
try
|
||||
{
|
||||
bool isTruncated = false;
|
||||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
for (var i = 0; i < this.Data.Columns.Count; i++)
|
||||
{
|
||||
string columnName = this.Data.Columns.Values.ElementAt(i).Name
|
||||
//Remove the parent field name from columns when rendering the data as json in the gridview cell.
|
||||
.Replace($"{this.Name}/", string.Empty);
|
||||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object value = this.Data.Row[i];
|
||||
WriteValue(jsonWriter, value, desiredLength is not null);
|
||||
|
||||
if (desiredLength > 0 && jsonWriter.ApproximateStringLengthSoFar > desiredLength)
|
||||
{
|
||||
isTruncated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isTruncated)
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
|
||||
ms.Position = 0;
|
||||
using var reader = new StreamReader(ms);
|
||||
var json = reader.ReadToEnd();
|
||||
if (isTruncated)
|
||||
{
|
||||
json += "[...]";
|
||||
}
|
||||
success = true;
|
||||
return json;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
success = false;
|
||||
return $"Error while serializing Struct field '{Name}': {Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
}
|
||||
|
||||
public DataTable ToDataTable() => this.Data.ToDataTable();
|
||||
|
||||
public static void WriteValue(Utf8JsonWriterWithRunningLength jsonWriter, object value, bool truncateForDisplay)
|
||||
{
|
||||
if (value is null)
|
||||
{
|
||||
//Value should never be null as we should be replacing all those with DBNull.Value
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
else if (value == DBNull.Value)
|
||||
{
|
||||
jsonWriter.WriteNullValue();
|
||||
}
|
||||
else if (value is string str)
|
||||
{
|
||||
jsonWriter.WriteStringValue(str);
|
||||
}
|
||||
else if (value is bool @bool)
|
||||
{
|
||||
jsonWriter.WriteBooleanValue(@bool);
|
||||
}
|
||||
else if (value.GetType().IsNumber())
|
||||
{
|
||||
jsonWriter.WriteNumberValue(Convert.ToDecimal(value));
|
||||
}
|
||||
else if (value is StructValue @struct)
|
||||
{
|
||||
var json = @struct.ToJSON(out var success);
|
||||
if (success)
|
||||
jsonWriter.WriteRawValue(json);
|
||||
else
|
||||
jsonWriter.WriteStringValue(json);
|
||||
}
|
||||
else if (value is MapValue map)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach ((object mapKey, object mapValue) in map)
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
jsonWriter.WritePropertyName("key");
|
||||
WriteValue(jsonWriter, mapKey, truncateForDisplay);
|
||||
jsonWriter.WritePropertyName("value");
|
||||
WriteValue(jsonWriter, mapValue, truncateForDisplay);
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is ListValue list)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach (var item in list)
|
||||
{
|
||||
WriteValue(jsonWriter, item, truncateForDisplay);
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is ByteArrayValue byteArray /*&& truncateForDisplay //should use the entire byte array if
|
||||
* we're not truncating for display? Seems kind of unreasonable
|
||||
* for users to rely on binary data within a Struct value preview.*/)
|
||||
{
|
||||
const int byteArrayMaxStringLength = 24; //arbitrary number that I think looks good
|
||||
var byteArrayAsString = byteArray.ToStringTruncated(byteArrayMaxStringLength);
|
||||
jsonWriter.WriteStringValue(byteArrayAsString);
|
||||
}
|
||||
else if (value is DateTime dt)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dt.ToString(ParquetEngineSettings.DateDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dt.ToString());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Everything else just try to write it as string
|
||||
jsonWriter.WriteStringValue(value.ToString()!);
|
||||
}
|
||||
}
|
||||
|
||||
private IReadOnlyCollection<string> FieldNames => Data.Columns.Keys;
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by field names first, then by values
|
||||
/// </summary>
|
||||
public int CompareTo(StructValue? other)
|
||||
{
|
||||
if (other?.Data is null || other.FieldNames.Count == 0)
|
||||
return 1;
|
||||
|
||||
if (Data is null || FieldNames.Count == 0)
|
||||
return -1;
|
||||
|
||||
var otherColumnNames = string.Join("|", other.FieldNames);
|
||||
var columnNames = string.Join("|", this.FieldNames);
|
||||
|
||||
int schemaComparison = columnNames.CompareTo(otherColumnNames);
|
||||
if (schemaComparison != 0)
|
||||
return schemaComparison;
|
||||
|
||||
int fieldCount = FieldNames.Count;
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
{
|
||||
var otherValue = other.Data.Row[i];
|
||||
var value = Data.Row[i];
|
||||
int comparison = Helpers.CompareTo(value, otherValue);
|
||||
if (comparison != 0)
|
||||
return comparison;
|
||||
}
|
||||
|
||||
return 0; //Both structs appear equal
|
||||
}
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is StructValue @struct)
|
||||
return CompareTo(@struct);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// https://huggingface.co/docs/hub/en/datasets-image#parquet-format
|
||||
/// </summary>
|
||||
/// <returns>True if this is a struct named "image" with "bytes" and "path" fields</returns>
|
||||
public bool IsHuggingFaceImageFormat([NotNullWhen(true)] out byte[]? data)
|
||||
{
|
||||
if (this.Name == "image" //Should we allow other names?
|
||||
&& FieldNames.Count == 2
|
||||
&& FieldNames.Contains("bytes")
|
||||
&& FieldNames.Contains("path")
|
||||
&& this.Data.GetValue("bytes") is ByteArrayValue byteArrayValue)
|
||||
{
|
||||
data = byteArrayValue.Data;
|
||||
return true;
|
||||
}
|
||||
data = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
119
src/ParquetViewer.Engine/Types/StructValueBase.cs
Normal file
119
src/ParquetViewer.Engine/Types/StructValueBase.cs
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class StructValue : IStructValue
|
||||
{
|
||||
public IDataRowLite Data { get; }
|
||||
|
||||
public IReadOnlyCollection<string> FieldNames => Data.ColumnNames;
|
||||
|
||||
public StructValue(IDataRowLite data)
|
||||
{
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by field names first, then by values
|
||||
/// </summary>
|
||||
public int CompareTo(IStructValue? other)
|
||||
{
|
||||
if (other?.Data is null || other.FieldNames.Count == 0)
|
||||
return 1;
|
||||
|
||||
if (Data is null || FieldNames.Count == 0)
|
||||
return -1;
|
||||
|
||||
var otherColumnNames = string.Join("|", other.FieldNames);
|
||||
var columnNames = string.Join("|", FieldNames);
|
||||
|
||||
int schemaComparison = columnNames.CompareTo(otherColumnNames);
|
||||
if (schemaComparison != 0)
|
||||
return schemaComparison;
|
||||
|
||||
int fieldCount = FieldNames.Count;
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
{
|
||||
var otherValue = other.Data.Row[i];
|
||||
var value = Data.Row[i];
|
||||
int comparison = Helpers.CompareTo(value, otherValue);
|
||||
if (comparison != 0)
|
||||
return comparison;
|
||||
}
|
||||
|
||||
return 0; //Both structs appear equal
|
||||
}
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is IStructValue @struct)
|
||||
return CompareTo(@struct);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
public DataTable ToDataTable()
|
||||
{
|
||||
var dt = new DataTable();
|
||||
foreach (var pair in Helpers.PairEnumerables(this.Data.ColumnNames, this.Data.Row))
|
||||
{
|
||||
var columnName = pair.Item1;
|
||||
var value = pair.Item2;
|
||||
var valueType = value != DBNull.Value ? value.GetType() : typeof(object);
|
||||
dt.Columns.Add(new DataColumn(columnName, valueType));
|
||||
}
|
||||
var row = dt.NewRow();
|
||||
row.ItemArray = this.Data.Row;
|
||||
dt.Rows.Add(row);
|
||||
return dt;
|
||||
}
|
||||
|
||||
public override string ToString() => ToJSON(out _);
|
||||
|
||||
public string ToStringTruncated(int desiredLength) => ToJSON(out _, desiredLength);
|
||||
|
||||
public string ToJSON(out bool success, int? desiredLength = null)
|
||||
{
|
||||
try
|
||||
{
|
||||
bool isTruncated = false;
|
||||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
for (var i = 0; i < Data.ColumnNames.Count; i++)
|
||||
{
|
||||
string columnName = Data.ColumnNames.ElementAt(i);
|
||||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object value = Data.Row[i];
|
||||
Helpers.WriteValue(jsonWriter, value, desiredLength is not null);
|
||||
|
||||
if (desiredLength > 0 && jsonWriter.ApproximateStringLengthSoFar > desiredLength)
|
||||
{
|
||||
isTruncated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isTruncated)
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
|
||||
ms.Position = 0;
|
||||
using var reader = new StreamReader(ms);
|
||||
var json = reader.ReadToEnd();
|
||||
if (isTruncated)
|
||||
{
|
||||
json += "[...]";
|
||||
}
|
||||
success = true;
|
||||
return json;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
success = false;
|
||||
return $"Error while serializing Struct field: {Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -98,4 +98,4 @@ namespace ParquetViewer.Engine
|
|||
_writer.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
src/ParquetViewer.Tests/Data/BYTEARRAY_VALUE_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/BYTEARRAY_VALUE_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/DECIMALS_OUTOFRANGE_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/DECIMALS_OUTOFRANGE_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/LIST_OF_NESTED_STRUCTS_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/LIST_OF_NESTED_STRUCTS_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/NESTED_MAPS_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/NESTED_MAPS_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/NESTED_STRUCTS_AND_LISTS.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/NESTED_STRUCTS_AND_LISTS.parquet
Normal file
Binary file not shown.
|
|
@ -1,22 +1,73 @@
|
|||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Data;
|
||||
using System.Text.Json;
|
||||
|
||||
[assembly: Parallelize(Scope = ExecutionScope.MethodLevel)]
|
||||
namespace ParquetViewer.Tests
|
||||
{
|
||||
[TestClass]
|
||||
public class EngineTests
|
||||
public class ParquetNETEngineTests : EngineTests
|
||||
{
|
||||
public EngineTests()
|
||||
public ParquetNETEngineTests() : base(
|
||||
useDuckDBEngine: false,
|
||||
canHandleNullComplexTypes: true,
|
||||
treatsTwoTierListAsStruct: true,
|
||||
"/")
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
[TestClass]
|
||||
public class DuckDBEngineTests : EngineTests
|
||||
{
|
||||
public DuckDBEngineTests() : base(
|
||||
useDuckDBEngine: true,
|
||||
canHandleNullComplexTypes: false,
|
||||
treatsTwoTierListAsStruct: false,
|
||||
", ")
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public abstract class EngineTests
|
||||
{
|
||||
private bool _useDuckDBEngine;
|
||||
private bool _canHandleNullComplexTypes;
|
||||
private bool _treatsTwoTierListAsStruct;
|
||||
private string _schemaPathSeperator;
|
||||
|
||||
public EngineTests(bool useDuckDBEngine, bool canHandleNullComplexTypes, bool treatsTwoTierListAsStruct, string schemaPathSeperator)
|
||||
{
|
||||
//Set a consistent date format for all tests
|
||||
ParquetEngineSettings.DateDisplayFormat = "yyyy-MM-dd HH:mm:ss";
|
||||
ParquetEngineSettings.DateOnlyDisplayFormat = "yyyy-MM-dd";
|
||||
|
||||
this._useDuckDBEngine = useDuckDBEngine;
|
||||
this._canHandleNullComplexTypes = canHandleNullComplexTypes;
|
||||
this._treatsTwoTierListAsStruct = treatsTwoTierListAsStruct;
|
||||
this._schemaPathSeperator = schemaPathSeperator;
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
private async Task<IParquetEngine> OpenFileOrFolderAsync(string path, CancellationToken cancellationToken)
|
||||
{
|
||||
if (this._useDuckDBEngine)
|
||||
{
|
||||
return await Engine.DuckDB.ParquetEngine.OpenFileOrFolderAsync(path, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
return await Engine.ParquetNET.ParquetEngine.OpenFileOrFolderAsync(path, cancellationToken);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task DECIMALS_AND_BOOLS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(30, parquetEngine.RecordCount);
|
||||
Assert.HasCount(337, parquetEngine.Fields);
|
||||
|
|
@ -29,35 +80,35 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(DBNull.Value, dataTable.Rows[21][334]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task DATETIME_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(10, parquetEngine.RecordCount);
|
||||
Assert.HasCount(3, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual("36/2015-16", dataTable.Rows[0][0]);
|
||||
Assert.AreEqual(new DateTime(2015, 07, 14, 0, 0, 0), dataTable.Rows[1][2]);
|
||||
Assert.AreEqual(new DateOnly(2015, 07, 14), dataTable.Rows[1][2]);
|
||||
Assert.AreEqual(new DateTime(2015, 07, 19, 18, 30, 0), dataTable.Rows[9][1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task DATETIME_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(11, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual((long)1, dataTable.Rows[0][0]);
|
||||
Assert.AreEqual(new DateTime(1985, 12, 31, 0, 0, 0), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 2, 0, 0, 0), dataTable.Rows[0][2]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 0, 0, 0), dataTable.Rows[0][3]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 0, 0, 0), dataTable.Rows[0][4]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 1, 0, 0, 0), dataTable.Rows[0][5]);
|
||||
Assert.AreEqual(new DateOnly(1985, 12, 31), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateOnly(1, 1, 2), dataTable.Rows[0][2]);
|
||||
Assert.AreEqual(new DateOnly(9999, 12, 31), dataTable.Rows[0][3]);
|
||||
Assert.AreEqual(new DateOnly(9999, 12, 31), dataTable.Rows[0][4]);
|
||||
Assert.AreEqual(new DateOnly(1, 1, 1), dataTable.Rows[0][5]);
|
||||
Assert.AreEqual(new DateTime(1985, 4, 13, 13, 5, 0), dataTable.Rows[0][6]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 2, 0, 0, 0), dataTable.Rows[0][7]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 23, 59, 59), dataTable.Rows[0][8]);
|
||||
|
|
@ -65,10 +116,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(new DateTime(1, 1, 1, 0, 0, 0), dataTable.Rows[0][10]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task RANDOM_TEST_FILE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE.parquet", default);
|
||||
|
||||
Assert.AreEqual(5, parquetEngine.RecordCount);
|
||||
Assert.HasCount(42, parquetEngine.Fields);
|
||||
|
|
@ -83,10 +134,11 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("DLIx12_SHIPCONF_BW15_20220812020138531.DWL", dataTable.Rows[1][41]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB automatically appends _1 to the dupe column name")]
|
||||
public async Task SAME_COLUMN_NAME_DIFFERENT_CASING_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING.parquet", default);
|
||||
|
||||
Assert.AreEqual(14610, parquetEngine.RecordCount);
|
||||
Assert.HasCount(12, parquetEngine.Fields);
|
||||
|
|
@ -96,17 +148,17 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("Duplicate column 'schema/TransPlan_NORMAL_v2' detected. Column names are case insensitive and must be unique.", ex.Message);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MULTIPLE_SCHEMAS_DETECTED_TEST()
|
||||
{
|
||||
var ex = await Assert.ThrowsAsync<MultipleSchemasFoundException>(() => ParquetEngine.OpenFileOrFolderAsync("Data", default));
|
||||
var ex = await Assert.ThrowsAsync<MultipleSchemasFoundException>(() => OpenFileOrFolderAsync("Data", default));
|
||||
Assert.AreEqual("Multiple schemas found in directory.", ex.Message);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task PARTITIONED_PARQUET_FILE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST", default);
|
||||
|
||||
Assert.AreEqual(2000, parquetEngine.RecordCount);
|
||||
Assert.HasCount(9, parquetEngine.Fields);
|
||||
|
|
@ -128,93 +180,94 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("B000CTP5G2P2", dataTable.Rows[0][8]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task COLUMN_ENDING_IN_PERIOD_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(11, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(202252, dataTable.Rows[0][0]);
|
||||
Assert.IsFalse(dataTable.Rows[0]["Output as FP"] as bool?);
|
||||
Assert.IsFalse((bool)dataTable.Rows[0]["Output as FP"]);
|
||||
Assert.AreEqual((byte)0, dataTable.Rows[0]["Preorder FP equi."]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB can't handle lists with null in them?")]
|
||||
public async Task LIST_TYPE_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("[1,2,3]", dataTable.Rows[0][0].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[\"abc\",\"efg\",\"hij\"]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[1][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(@"[""abc"",""efg"",""hij""]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][0]);
|
||||
Assert.AreEqual("[null,1]", dataTable.Rows[1][0].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[2][1]);
|
||||
Assert.AreEqual(4, ((ListValue)dataTable.Rows[2][1]).Length);
|
||||
Assert.AreEqual("efg", ((ListValue)dataTable.Rows[2][1]).Data![0]);
|
||||
Assert.AreEqual(DBNull.Value, ((ListValue)dataTable.Rows[2][1]).Data![1]);
|
||||
Assert.AreEqual("xyz", ((ListValue)dataTable.Rows[2][1]).Data![3]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[2][1]);
|
||||
Assert.HasCount(4, (IListValue)dataTable.Rows[2][1]);
|
||||
Assert.AreEqual("efg", ((IListValue)dataTable.Rows[2][1]).Data![0]);
|
||||
Assert.AreEqual(DBNull.Value, ((IListValue)dataTable.Rows[2][1]).Data![1]);
|
||||
Assert.AreEqual("xyz", ((IListValue)dataTable.Rows[2][1]).Data![3]);
|
||||
|
||||
//Also try reading with a record offset
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 1, 1, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("[null,1]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_TYPE_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(8, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 2, 4, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
|
||||
Assert.AreEqual("[1,2]", dataTable.Rows[0][1].ToString());
|
||||
Assert.AreEqual(1, ((ListValue)dataTable.Rows[0][1]).Data[0]);
|
||||
Assert.AreEqual(2, ((ListValue)dataTable.Rows[0][1]).Data[1]);
|
||||
Assert.AreEqual(1, ((IListValue)dataTable.Rows[0][1]).Data[0]);
|
||||
Assert.AreEqual(2, ((IListValue)dataTable.Rows[0][1]).Data[1]);
|
||||
|
||||
Assert.AreEqual(string.Empty, dataTable.Rows[1][1].ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[1][1]);
|
||||
|
||||
Assert.AreEqual("[]", dataTable.Rows[2][1].ToString());
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[2][1]).Data.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IListValue)dataTable.Rows[2][1]).Data.Cast<dynamic>());
|
||||
|
||||
Assert.AreEqual("[3,4]", dataTable.Rows[3][1].ToString());
|
||||
Assert.AreEqual(3, ((ListValue)dataTable.Rows[3][1]).Data[0]);
|
||||
Assert.AreEqual(4, ((ListValue)dataTable.Rows[3][1]).Data[1]);
|
||||
Assert.AreEqual(3, ((IListValue)dataTable.Rows[3][1]).Data[0]);
|
||||
Assert.AreEqual(4, ((IListValue)dataTable.Rows[3][1]).Data[1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MAP_TYPE_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 2, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[0][0]);
|
||||
var row = (MapValue)dataTable.Rows[0][0];
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[0][0]);
|
||||
var row = (IMapValue)dataTable.Rows[0][0];
|
||||
Assert.AreEqual("id", row.FirstOrDefault().Key);
|
||||
Assert.AreEqual("something", row.FirstOrDefault().Value);
|
||||
Assert.AreEqual("value2", row.Skip(1).FirstOrDefault().Key);
|
||||
Assert.AreEqual("else", row.Skip(1).FirstOrDefault().Value);
|
||||
Assert.AreEqual("[(id,something),(value2,else)]", row.ToString());
|
||||
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[1][0]);
|
||||
row = (MapValue)dataTable.Rows[1][0];
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[1][0]);
|
||||
row = (IMapValue)dataTable.Rows[1][0];
|
||||
Assert.AreEqual("id", row.FirstOrDefault().Key);
|
||||
Assert.AreEqual("something2", row.FirstOrDefault().Value);
|
||||
Assert.AreEqual("value", row.Skip(1).FirstOrDefault().Key);
|
||||
|
|
@ -222,90 +275,90 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[(id,something2),(value,else2)]", row.ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MAP_TYPE_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MAP_TYPE_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(8, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 2, 4, default))(false);
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[0][1]);
|
||||
|
||||
Assert.AreEqual("[(1,1),(2,2)]", dataTable.Rows[0][1].ToString());
|
||||
Assert.AreEqual(1, ((MapValue)dataTable.Rows[0][1]).Keys[0]);
|
||||
Assert.AreEqual(1, ((MapValue)dataTable.Rows[0][1]).Values[0]);
|
||||
Assert.AreEqual(2, ((MapValue)dataTable.Rows[0][1]).Keys[1]);
|
||||
Assert.AreEqual(2, ((MapValue)dataTable.Rows[0][1]).Values[1]);
|
||||
Assert.AreEqual(1, ((IMapValue)dataTable.Rows[0][1]).Keys[0]);
|
||||
Assert.AreEqual(1, ((IMapValue)dataTable.Rows[0][1]).Values[0]);
|
||||
Assert.AreEqual(2, ((IMapValue)dataTable.Rows[0][1]).Keys[1]);
|
||||
Assert.AreEqual(2, ((IMapValue)dataTable.Rows[0][1]).Values[1]);
|
||||
|
||||
Assert.AreEqual(string.Empty, dataTable.Rows[1][1].ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[1][1]);
|
||||
|
||||
Assert.AreEqual("[]", dataTable.Rows[2][1].ToString());
|
||||
Assert.IsEmpty(((MapValue)dataTable.Rows[2][1]).Keys.Cast<dynamic>());
|
||||
Assert.IsEmpty(((MapValue)dataTable.Rows[2][1]).Values.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IMapValue)dataTable.Rows[2][1]).Keys.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IMapValue)dataTable.Rows[2][1]).Values.Cast<dynamic>());
|
||||
|
||||
Assert.AreEqual("[(3,3),(4,4)]", dataTable.Rows[3][1].ToString());
|
||||
Assert.AreEqual(3, ((MapValue)dataTable.Rows[3][1]).Keys[0]);
|
||||
Assert.AreEqual(3, ((MapValue)dataTable.Rows[3][1]).Values[0]);
|
||||
Assert.AreEqual(4, ((MapValue)dataTable.Rows[3][1]).Keys[1]);
|
||||
Assert.AreEqual(4, ((MapValue)dataTable.Rows[3][1]).Values[1]);
|
||||
Assert.AreEqual(3, ((IMapValue)dataTable.Rows[3][1]).Keys[0]);
|
||||
Assert.AreEqual(3, ((IMapValue)dataTable.Rows[3][1]).Values[0]);
|
||||
Assert.AreEqual(4, ((IMapValue)dataTable.Rows[3][1]).Keys[1]);
|
||||
Assert.AreEqual(4, ((IMapValue)dataTable.Rows[3][1]).Values[1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task STRUCT_TYPE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/STRUCT_TYPE_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/STRUCT_TYPE_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(10, parquetEngine.RecordCount);
|
||||
Assert.HasCount(6, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[2][0]);
|
||||
Assert.AreEqual("{\"appId\":\"e4a20b59-dd0e-4c50-b074-e8ae4786df30\",\"version\":0,\"lastUpdated\":1564524299648}", ((StructValue)dataTable.Rows[2][0]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[5][1]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet\",\"partitionValues\":[],\"size\":404,\"modificationTime\":1564524299000,\"dataChange\":false,\"stats\":null,\"tags\":null}", ((StructValue)dataTable.Rows[5][1]).ToString());
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[3][2]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet\",\"deletionTimestamp\":1564524298213,\"dataChange\":false}", ((StructValue)dataTable.Rows[3][2]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][3]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[1][3]);
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":null,\"configuration\":[],\"createdTime\":1564524294376}", ((StructValue)dataTable.Rows[1][3]).ToString());
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[0][4]);
|
||||
Assert.AreEqual("{\"minReaderVersion\":1,\"minWriterVersion\":2}", ((StructValue)dataTable.Rows[0][4]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][5]);
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[2][0]);
|
||||
Assert.AreEqual("{\"appId\":\"e4a20b59-dd0e-4c50-b074-e8ae4786df30\",\"version\":0,\"lastUpdated\":1564524299648}", dataTable.Rows[2][0].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[5][1]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet\",\"partitionValues\":[],\"size\":404,\"modificationTime\":1564524299000,\"dataChange\":false,\"stats\":null,\"tags\":null}", dataTable.Rows[5][1].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[6][2]);
|
||||
Assert.AreEqual("{\"path\":\"part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet\",\"deletionTimestamp\":1564524298214,\"dataChange\":false}", dataTable.Rows[6][2].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[1][3]);
|
||||
if (_canHandleNullComplexTypes)
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":null,\"configuration\":[],\"createdTime\":1564524294376}", dataTable.Rows[1][3].ToString());
|
||||
else
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":[],\"configuration\":[],\"createdTime\":1564524294376}", dataTable.Rows[1][3].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[0][4]);
|
||||
Assert.AreEqual("{\"minReaderVersion\":1,\"minWriterVersion\":2}", dataTable.Rows[0][4].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task NULLABLE_GUID_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/NULLABLE_GUID_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NULLABLE_GUID_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(33, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsFalse(dataTable.Rows[0][22] as bool?);
|
||||
Assert.IsFalse((bool)dataTable.Rows[0][22]);
|
||||
Assert.AreEqual(new Guid("fdcbf90c-20d3-d745-b29f-9c2de1baa979"), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateTime(2019, 1, 1), dataTable.Rows[0][4]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MALFORMED_DATETIME_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MALFORMED_DATETIME_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MALFORMED_DATETIME_TEST.parquet", default);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(typeof(DateTime), dataTable.Rows[0]["ds"]?.GetType());
|
||||
Assert.IsInstanceOfType<DateTime>(dataTable.Rows[0]["ds"]);
|
||||
Assert.AreEqual(new DateTime(2017, 1, 1), dataTable.Rows[0]["ds"]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task COLUMN_NAME_WITH_FORWARD_SLASH_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_NAME_WITH_FORWARD_SLASH.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/COLUMN_NAME_WITH_FORWARD_SLASH.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(320, parquetEngine.Fields);
|
||||
|
|
@ -314,10 +367,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual((byte)0, dataTable.Rows[0]["FLC K/L"]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task ORACLE_MALFORMED_INT64_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/ORACLE_MALFORMED_INT64_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/ORACLE_MALFORMED_INT64_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(126, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
|
@ -327,43 +380,41 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual((long)1, dataTable.Rows[0][1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCTS_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS1.parquet", default);
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 1, default))(false);
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 2, default))(false);
|
||||
|
||||
Assert.AreEqual("Product1", dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("Product2", dataTable.Rows[1][0]);
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":10},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":20}]", dataTable.Rows[0][1].ToString());
|
||||
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 1, 1, default))(false);
|
||||
|
||||
Assert.AreEqual("Product2", dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":30},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":40}]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":30},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":40}]", dataTable.Rows[1][1].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCTS_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS2.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(29, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][28]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][28]);
|
||||
Assert.AreEqual("[{\"purposeId\":\"HF85PyyGFprJXJvh5Pk9tg\",\"status\":\"Granted\",\"externalId\":\"General\",\"date\":\"2025-06-05 14:30:33\"}]", dataTable.Rows[0][28].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task EMPTY_LIST_OF_STRUCTS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/EMPTY_LIST_OF_STRUCTS.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/EMPTY_LIST_OF_STRUCTS.parquet", default);
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -372,18 +423,16 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("Product1", dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("Product2", dataTable.Rows[1][0]);
|
||||
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[0][1]).Data);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[1][1]);
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[1][1]).Data);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][1]);
|
||||
Assert.AreEqual("[]", dataTable.Rows[1][1].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task PARQUET_MR_BREAKING_CHANGE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARQUET-MR_1.15.0.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/PARQUET-MR_1.15.0.parquet", default);
|
||||
Assert.AreEqual(5, parquetEngine.RecordCount);
|
||||
Assert.HasCount(7, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -395,14 +444,15 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("John Doe", dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("David Lee", dataTable.Rows[4][1]);
|
||||
|
||||
Assert.IsTrue(dataTable.Rows[0][4] as bool?);
|
||||
Assert.IsTrue(dataTable.Rows[4][4] as bool?);
|
||||
Assert.IsTrue((bool)dataTable.Rows[0][4]);
|
||||
Assert.IsTrue((bool)dataTable.Rows[4][4]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB can't open this file")]
|
||||
public async Task DECIMALS_WITH_NO_SCALE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_WITH_NO_SCALE_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_WITH_NO_SCALE_TEST.parquet", default);
|
||||
Assert.AreEqual(10589, parquetEngine.RecordCount);
|
||||
Assert.HasCount(8, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -417,10 +467,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(0m, dataTable.Rows[100][7]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_LIST_OF_INT()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_INT.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_INT.parquet", default);
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -436,10 +486,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[[1],[],[3],null,[5]]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_LIST_OF_LIST_OF_STRING()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_LIST_OF_STRING.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_LIST_OF_STRING.parquet", default);
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -455,10 +505,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[[[\"a\",\"b\"],[\"c\",\"d\"],[\"e\"]],[null,[\"f\"]]]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCT_OF_LIST_OF_STRUCT()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCT_OF_LIST_OF_STRUCT.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCT_OF_LIST_OF_STRUCT.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -468,10 +518,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(expectedJson, dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public async Task TWO_TIER_TEPEATED_LIST_FIELDS_TEST()
|
||||
[SkippableTestMethod]
|
||||
public async Task TWO_TIER_REPEATED_LIST_FIELDS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/TWO_TIER_TEPEATED_LIST_FIELDS_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/TWO_TIER_TEPEATED_LIST_FIELDS_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(8, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -482,9 +532,187 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][2]);
|
||||
Assert.AreEqual("hello", dataTable.Rows[0][3]);
|
||||
Assert.AreEqual("[10,20]", dataTable.Rows[0][4].ToString());
|
||||
Assert.AreEqual("{\"nested\":\"nested!\"}", dataTable.Rows[0][5].ToString());
|
||||
if (_treatsTwoTierListAsStruct)
|
||||
Assert.AreEqual("{\"nested\":\"nested!\"}", dataTable.Rows[0][5].ToString());
|
||||
else
|
||||
Assert.AreEqual(@"[""nested!""]", dataTable.Rows[0][5].ToString());
|
||||
|
||||
Assert.AreEqual("096d06d7-e00b-4f70-ad5c-ca4da9a9630a", dataTable.Rows[0][6]);
|
||||
Assert.AreEqual("[\"element1\",\"element2\"]", dataTable.Rows[0][7].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task CUSTOM_METADATA_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.Contains("pandas", parquetEngine.CustomMetadata.Keys);
|
||||
const string expectedPandas = "{\"index_columns\":[{\"kind\":\"range\",\"name\":null,\"start\":0,\"stop\":3,\"step\":1}],\"column_indexes\":[{\"name\":null,\"field_name\":null,\"pandas_type\":\"unicode\",\"numpy_type\":\"object\",\"metadata\":{\"encoding\":\"UTF-8\"}}],\"columns\":[{\"name\":\"int64_list\",\"field_name\":\"int64_list\",\"pandas_type\":\"list[int64]\",\"numpy_type\":\"object\",\"metadata\":null},{\"name\":\"utf8_list\",\"field_name\":\"utf8_list\",\"pandas_type\":\"list[unicode]\",\"numpy_type\":\"object\",\"metadata\":null}],\"creator\":{\"library\":\"pyarrow\",\"version\":\"0.15.1\"},\"pandas_version\":\"0.25.3\"}";
|
||||
Assert.AreEqual(TryFormatJSON(expectedPandas), TryFormatJSON(parquetEngine.CustomMetadata["pandas"]));
|
||||
|
||||
Assert.Contains("ARROW:schema", parquetEngine.CustomMetadata.Keys);
|
||||
const string expectedArrow = "/////4ADAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABAwAQAAAAAAAKAAwAAAAEAAgACgAAAHQCAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAABMAgAABAAAADwCAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDMsICJzdGVwIjogMX1dLCAiY29sdW1uX2luZGV4ZXMiOiBbeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiBudWxsLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IHsiZW5jb2RpbmciOiAiVVRGLTgifX1dLCAiY29sdW1ucyI6IFt7Im5hbWUiOiAiaW50NjRfbGlzdCIsICJmaWVsZF9uYW1lIjogImludDY0X2xpc3QiLCAicGFuZGFzX3R5cGUiOiAibGlzdFtpbnQ2NF0iLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogInV0ZjhfbGlzdCIsICJmaWVsZF9uYW1lIjogInV0ZjhfbGlzdCIsICJwYW5kYXNfdHlwZSI6ICJsaXN0W3VuaWNvZGVdIiwgIm51bXB5X3R5cGUiOiAib2JqZWN0IiwgIm1ldGFkYXRhIjogbnVsbH1dLCAiY3JlYXRvciI6IHsibGlicmFyeSI6ICJweWFycm93IiwgInZlcnNpb24iOiAiMC4xNS4xIn0sICJwYW5kYXNfdmVyc2lvbiI6ICIwLjI1LjMifQAAAAAGAAAAcGFuZGFzAAACAAAAYAAAAAQAAACE////AAABDEAAAAAQAAAABAAAAAEAAAAIAAAAqP///6T///8AAAEFFAAAAAwAAAAEAAAAAAAAAMT///8EAAAAaXRlbQAAAAAJAAAAdXRmOF9saXN0AAAA3P///wAAAQxkAAAAFAAAAAQAAAABAAAAHAAAAAQABAAEAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAECJAAAABQAAAAEAAAAAAAAAAgADAAIAAcACAAAAAAAAAFAAAAABAAAAGl0ZW0AAAAACgAAAGludDY0X2xpc3QAAA==";
|
||||
Assert.AreEqual(expectedArrow, parquetEngine.CustomMetadata["ARROW:schema"]);
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task DECIMALS_OUTOFRANGE_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_OUTOFRANGE_TEST.parquet", default);
|
||||
Assert.AreEqual(12, parquetEngine.RecordCount);
|
||||
Assert.HasCount(51, parquetEngine.Fields);
|
||||
|
||||
await Assert.ThrowsAsync<DecimalOverflowException>(() =>
|
||||
parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default));
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "Our implementation can't open this file")]
|
||||
public async Task LIST_OF_NESTED_STRUCTS_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_NESTED_STRUCTS_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
|
||||
Assert.AreEqual("[{\"B\":{\"id\":1}},{\"B\":{\"id\":null}},{\"B\":null}]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "Nested Maps not supported")]
|
||||
public async Task NESTED_MAPS_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_MAPS_TEST.parquet", default);
|
||||
Assert.AreEqual(6, parquetEngine.RecordCount);
|
||||
Assert.HasCount(3, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 3, default))(false);
|
||||
|
||||
Assert.AreEqual("[(a,[(1,True),(2,False)])]", dataTable.Rows[0][0].ToString());
|
||||
Assert.AreEqual("[(b,[(1,True)])]", dataTable.Rows[1][0].ToString());
|
||||
Assert.AreEqual("[(c,)]", dataTable.Rows[2][0].ToString());
|
||||
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 3, 3, default))(false);
|
||||
|
||||
Assert.AreEqual("[(d,[])]", dataTable.Rows[0][0].ToString());
|
||||
Assert.AreEqual("[(e,[(1,True)])]", dataTable.Rows[1][0].ToString());
|
||||
Assert.AreEqual("[(f,[(3,True),(4,False),(5,True)])]", dataTable.Rows[2][0].ToString());
|
||||
}
|
||||
|
||||
private static string TryFormatJSON(string possibleJSON)
|
||||
{
|
||||
try
|
||||
{
|
||||
var jsonElement = JsonSerializer.Deserialize<JsonElement>(possibleJSON);
|
||||
return JsonSerializer.Serialize(jsonElement, new JsonSerializerOptions { WriteIndented = true });
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
//malformed json detected
|
||||
return possibleJSON;
|
||||
}
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task BYTEARRAY_VALUE_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/BYTEARRAY_VALUE_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsInstanceOfType<IByteArrayValue>(dataTable.Rows[0][0]);
|
||||
|
||||
const string expected = "67-33-73-68-61-72-70-5F-73-74-6C-20-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00";
|
||||
Assert.AreEqual(expected, dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "List field is causing issues")]
|
||||
public async Task NESTED_STRUCTS_AND_LISTS()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_STRUCTS_AND_LISTS.parquet", default);
|
||||
Assert.AreEqual(552, parquetEngine.RecordCount);
|
||||
Assert.HasCount(20, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 1, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"explicit\":null,\"ref_reco\":3,\"text\":\"it is not the case that routine child vaccinations should be mandatory.\"}]", dataTable.Rows[0][1].ToString());
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][11]);
|
||||
Assert.AreEqual("[[\"p\",\"routine child vaccinations, or their side effects, are dangerous\"],[\"q\",\"routine child vaccinations should be mandatory\"]]", dataTable.Rows[0][11].ToString());
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][19]);
|
||||
Assert.AreEqual("[[\"id\",\"argkp_1feffc6a-01eb-4f64-a42f-db898627fbc8\"]]", dataTable.Rows[0][19].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task METADATA_TEST1()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_STRUCTS_AND_LISTS.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.Metadata.ParquetVersion);
|
||||
Assert.AreEqual(552, parquetEngine.Metadata.RowCount);
|
||||
Assert.AreEqual(1, parquetEngine.Metadata.RowGroupCount);
|
||||
Assert.AreEqual("parquet-cpp-arrow version 4.0.1", parquetEngine.Metadata.CreatedBy);
|
||||
Assert.HasCount(1, parquetEngine.Metadata.RowGroups);
|
||||
var rowGroup = parquetEngine.Metadata.RowGroups.First();
|
||||
Assert.AreEqual(33, rowGroup.ColumnCount);
|
||||
Assert.AreEqual(552, rowGroup.RowCount);
|
||||
Assert.AreEqual(2704, rowGroup.FileOffset);
|
||||
Assert.AreEqual(0, rowGroup.Ordinal);
|
||||
Assert.AreEqual(134465, rowGroup.TotalByteSize);
|
||||
Assert.AreEqual(61314, rowGroup.TotalCompressedSize);
|
||||
|
||||
Assert.IsNotNull(rowGroup.Columns);
|
||||
Assert.HasCount(33, rowGroup.Columns);
|
||||
|
||||
var firstColumn = rowGroup.Columns.First();
|
||||
Assert.IsNull(firstColumn.BloomFilterLength);
|
||||
Assert.IsNull(firstColumn.BloomFilterOffset);
|
||||
Assert.AreEqual(0, firstColumn.ColumnId);
|
||||
Assert.AreEqual(1801, firstColumn.DataPageOffset);
|
||||
Assert.AreEqual(4, firstColumn.DictionaryPageOffset);
|
||||
Assert.IsNull(firstColumn.IndexPageOffset);
|
||||
Assert.AreEqual(552, firstColumn.NumValues);
|
||||
Assert.AreEqual("argdown_reconstruction", firstColumn.PathInSchema);
|
||||
Assert.AreEqual(2700, firstColumn.TotalCompressedSize);
|
||||
Assert.AreEqual(10114, firstColumn.TotalUncompressedSize);
|
||||
Assert.AreEqual("BYTE_ARRAY", firstColumn.Type);
|
||||
|
||||
Assert.IsNotNull(firstColumn.Statistics);
|
||||
Assert.IsNull(firstColumn.Statistics.Min);
|
||||
Assert.IsNull(firstColumn.Statistics.Max);
|
||||
Assert.IsNull(firstColumn.Statistics.DistinctCount);
|
||||
Assert.AreEqual(0, firstColumn.Statistics.NullCount);
|
||||
Assert.AreEqual("(1) child vaccination saves lives. (2) if child vaccination saves lives then routine child vaccinations should be mandatory. -- with modus ponens from (1) (2) -- (3) routine child vaccinations should be mandatory.", firstColumn.Statistics.MinValue);
|
||||
Assert.AreEqual("(1) the us offers great opportunities for individuals. (2) if the us offers great opportunities for individuals then the usa is a good country to live in. -- with modus ponens from (1) (2) -- (3) the usa is a good country to live in.", firstColumn.Statistics.MaxValue);
|
||||
Assert.IsNull(firstColumn.Statistics.IsMinValueExact);
|
||||
Assert.IsNull(firstColumn.Statistics.IsMinValueExact);
|
||||
|
||||
var lastColumn = rowGroup.Columns.Last();
|
||||
Assert.IsNull(lastColumn.BloomFilterLength);
|
||||
Assert.IsNull(lastColumn.BloomFilterOffset);
|
||||
Assert.AreEqual(32, lastColumn.ColumnId);
|
||||
Assert.AreEqual(63771, lastColumn.DataPageOffset);
|
||||
Assert.AreEqual(43433, lastColumn.DictionaryPageOffset);
|
||||
Assert.IsNull(lastColumn.IndexPageOffset);
|
||||
Assert.AreEqual(1104, lastColumn.NumValues);
|
||||
Assert.AreEqual($"metadata{_schemaPathSeperator}list{_schemaPathSeperator}item{_schemaPathSeperator}list{_schemaPathSeperator}item", lastColumn.PathInSchema);
|
||||
Assert.AreEqual(21830, lastColumn.TotalCompressedSize);
|
||||
Assert.AreEqual(27163, lastColumn.TotalUncompressedSize);
|
||||
Assert.AreEqual("BYTE_ARRAY", lastColumn.Type);
|
||||
|
||||
Assert.IsNotNull(lastColumn.Statistics);
|
||||
Assert.IsNull(lastColumn.Statistics.Min);
|
||||
Assert.IsNull(lastColumn.Statistics.Max);
|
||||
Assert.IsNull(lastColumn.Statistics.DistinctCount);
|
||||
Assert.AreEqual(0, lastColumn.Statistics.NullCount);
|
||||
Assert.AreEqual("argkp_007a45bc-7a3b-4030-8178-33d7c5fa5cb8", lastColumn.Statistics.MinValue);
|
||||
Assert.AreEqual("id", lastColumn.Statistics.MaxValue);
|
||||
Assert.IsNull(lastColumn.Statistics.IsMinValueExact);
|
||||
Assert.IsNull(lastColumn.Statistics.IsMinValueExact);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -288,7 +288,7 @@ namespace ParquetViewer.Tests
|
|||
[TestMethod]
|
||||
public void ByteArrayValue_IsCorrectlyTruncated()
|
||||
{
|
||||
var byteArrayValue = new ByteArrayValue("test", [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]);
|
||||
var byteArrayValue = new Engine.Types.ByteArrayValue([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]);
|
||||
Assert.AreEqual("01[...]10", byteArrayValue.ToStringTruncated(1));
|
||||
Assert.AreEqual("01[...]10", byteArrayValue.ToStringTruncated(2));
|
||||
Assert.AreEqual("01-02[...]09-10", byteArrayValue.ToStringTruncated(11));
|
||||
|
|
@ -297,4 +297,4 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("01-02-03-04-05-06-07-08-09-10", byteArrayValue.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0-windows</TargetFramework>
|
||||
<TargetFramework>net10.0-windows</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsPackable>false</IsPackable>
|
||||
|
|
@ -28,6 +28,8 @@
|
|||
<PackageReference Include="RichardSzalay.MockHttp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine.DuckDB\ParquetViewer.Engine.DuckDB.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer.Engine.ParquetNET\ParquetViewer.Engine.ParquetNET.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer\ParquetViewer.csproj" />
|
||||
</ItemGroup>
|
||||
|
|
|
|||
15
src/ParquetViewer.Tests/SkipWhenAttribute.cs
Normal file
15
src/ParquetViewer.Tests/SkipWhenAttribute.cs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
namespace ParquetViewer.Tests
|
||||
{
|
||||
[AttributeUsage(AttributeTargets.Method, AllowMultiple = true)]
|
||||
public class SkipWhenAttribute : Attribute
|
||||
{
|
||||
public Type TestClassToSkip { get; }
|
||||
public string? Reason { get; set; }
|
||||
|
||||
public SkipWhenAttribute(Type testClassToSkip, string? reason)
|
||||
{
|
||||
TestClassToSkip = testClassToSkip;
|
||||
Reason = reason;
|
||||
}
|
||||
}
|
||||
}
|
||||
38
src/ParquetViewer.Tests/SkippableTestMethodAttribute.cs
Normal file
38
src/ParquetViewer.Tests/SkippableTestMethodAttribute.cs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace ParquetViewer.Tests
|
||||
{
|
||||
internal class SkippableTestMethodAttribute : TestMethodAttribute
|
||||
{
|
||||
public SkippableTestMethodAttribute([CallerFilePath] string callerFilePath = "", [CallerLineNumber] int callerLineNumber = -1)
|
||||
: base(callerFilePath, callerLineNumber)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public override Task<TestResult[]> ExecuteAsync(ITestMethod testMethod)
|
||||
{
|
||||
var methodInfo = testMethod.MethodInfo;
|
||||
var skipAttrs = methodInfo.GetCustomAttributes(typeof(SkipWhenAttribute), inherit: true)
|
||||
.Cast<SkipWhenAttribute>()
|
||||
.ToList();
|
||||
|
||||
var skipAttribute = skipAttrs.FirstOrDefault(a => a.TestClassToSkip.FullName == testMethod.TestClassName);
|
||||
if (skipAttribute is not null)
|
||||
{
|
||||
var result = new TestResult
|
||||
{
|
||||
Outcome = UnitTestOutcome.Inconclusive, // treated as skipped in MSTest
|
||||
TestFailureException = null
|
||||
};
|
||||
result.TestContextMessages
|
||||
= $"Test skipped for {testMethod.TestClassName}.{testMethod.TestMethodName}" +
|
||||
$"{(skipAttribute.Reason is not null ? $" {skipAttribute.Reason}" : string.Empty)}.";
|
||||
|
||||
return Task.FromResult(new[] { result });
|
||||
}
|
||||
|
||||
return base.ExecuteAsync(testMethod);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,4 +40,4 @@ namespace ParquetViewer.Tests
|
|||
public bool AnalyticsDataGatheringConsent => true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.4.33213.308
|
||||
# Visual Studio Version 18
|
||||
VisualStudioVersion = 18.1.11312.151 d18.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer", "ParquetViewer\ParquetViewer.csproj", "{6019FC1B-3610-4682-BF96-8345C95CB7EC}"
|
||||
EndProject
|
||||
|
|
@ -9,6 +9,15 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer.Engine", "Par
|
|||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer.Tests", "ParquetViewer.Tests\ParquetViewer.Tests.csproj", "{16D10BC9-08BF-4248-8975-1B54C42EB2C2}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParquetViewer.Engine.DuckDB", "ParquetViewer.Engine.DuckDB\ParquetViewer.Engine.DuckDB.csproj", "{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParquetViewer.Engine.ParquetNET", "ParquetViewer.Engine.ParquetNET\ParquetViewer.Engine.ParquetNET.csproj", "{4B69AD86-BDF8-01E8-59B8-E690760BB827}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{F5D39637-1812-4802-8DB3-254CBBE5C313}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
.editorconfig = .editorconfig
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
|
@ -18,8 +27,8 @@ Global
|
|||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.Build.0 = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
|
|
@ -34,6 +43,18 @@ Global
|
|||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release_SelfContained|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release_SelfContained|Any CPU.Build.0 = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
|
|
|||
|
|
@ -168,9 +168,9 @@ namespace ParquetViewer
|
|||
}
|
||||
else if (success == false)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.FileAssociationFailedErrorMessageFormat.Format(exitCode),
|
||||
Resources.Errors.FileAssociationFailedErrorTitle,
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.FileAssociationFailedErrorMessageFormat.Format(exitCode),
|
||||
Resources.Errors.FileAssociationFailedErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
SetCheckboxSilent(!associateFileExtensionCheckBox.Checked);
|
||||
}
|
||||
|
|
@ -209,7 +209,7 @@ namespace ParquetViewer
|
|||
this.newVersionLabel.Image = Resources.Icons.external_link_icon;
|
||||
}
|
||||
else if (latestRelease.Version == Env.AssemblyVersion)
|
||||
{
|
||||
{
|
||||
this.newVersionLabel.Enabled = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -291,4 +291,4 @@ namespace ParquetViewer
|
|||
Process.Start(new ProcessStartInfo(url.ToString()) { UseShellExecute = true });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
using System;
|
||||
using ParquetViewer.Exceptions;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json.Serialization;
|
||||
|
|
@ -23,6 +24,9 @@ namespace ParquetViewer.Analytics
|
|||
public long ReadTimeMS { get; set; }
|
||||
public long IndexTimeMS { get; set; }
|
||||
public long RenderTimeMS { get; set; }
|
||||
[JsonIgnore]
|
||||
public ParquetEngineTypeId EngineType { get; set; }
|
||||
public string EngineTypeName => EngineType.ToString();
|
||||
|
||||
public FileOpenEvent() : base(EVENT_TYPE)
|
||||
{
|
||||
|
|
@ -30,8 +34,8 @@ namespace ParquetViewer.Analytics
|
|||
}
|
||||
|
||||
public static void FireAndForget(bool isFolder, int numPartitions, long numRows, int numRowGroups, int numFields,
|
||||
string[] fieldTypes, long recordOffset, long recordCount, int numLoadedFields,
|
||||
long totalLoadTimeMilliseconds, long readTimeMS, long indexTimeMS, long renderTimeMS)
|
||||
string[] fieldTypes, long recordOffset, long recordCount, int numLoadedFields, long totalLoadTimeMilliseconds,
|
||||
long readTimeMS, long indexTimeMS, long renderTimeMS, ParquetEngineTypeId engineType)
|
||||
{
|
||||
var _ = new FileOpenEvent
|
||||
{
|
||||
|
|
@ -47,9 +51,16 @@ namespace ParquetViewer.Analytics
|
|||
LoadTimeMS = totalLoadTimeMilliseconds,
|
||||
ReadTimeMS = readTimeMS,
|
||||
IndexTimeMS = indexTimeMS,
|
||||
RenderTimeMS = renderTimeMS
|
||||
RenderTimeMS = renderTimeMS,
|
||||
EngineType = engineType,
|
||||
}.Record();
|
||||
}
|
||||
|
||||
public enum ParquetEngineTypeId
|
||||
{
|
||||
ParquetNET,
|
||||
DuckDB
|
||||
}
|
||||
}
|
||||
|
||||
public class FileExportEvent : AmplitudeEvent
|
||||
|
|
@ -114,7 +125,8 @@ namespace ParquetViewer.Analytics
|
|||
AboutBox,
|
||||
UserGuide,
|
||||
DragDrop,
|
||||
LoadAllRows
|
||||
LoadAllRows,
|
||||
QueryEditor,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -169,9 +181,18 @@ namespace ParquetViewer.Analytics
|
|||
this.Exception = ex ?? throw new ArgumentNullException(nameof(ex));
|
||||
}
|
||||
|
||||
public static void FireAndForget(System.Exception ex)
|
||||
public static void FireAndForget(Exception ex)
|
||||
{
|
||||
var _ = new ExceptionEvent(ex).Record();
|
||||
if (ex is RowsReadException rre)
|
||||
{
|
||||
//Record two separate exceptions for both parquet.net and duckdb
|
||||
var _ = new ExceptionEvent(rre.ParquetNetException).Record()
|
||||
.ContinueWith((_) => _ = new ExceptionEvent(rre.DuckDbException).Record());
|
||||
}
|
||||
else
|
||||
{
|
||||
var _ = new ExceptionEvent(ex).Record();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -210,10 +231,11 @@ namespace ParquetViewer.Analytics
|
|||
private const string EVENT_TYPE = "sql.execute";
|
||||
|
||||
public bool IsValid { get; set; }
|
||||
public int RecordCountTotal { get; set; }
|
||||
public int? RecordCountTotal { get; set; }
|
||||
public int? RecordCountFiltered { get; set; }
|
||||
public int ColumnCount { get; set; }
|
||||
public int? ColumnCount { get; set; }
|
||||
public long RunTimeMS { get; set; }
|
||||
public bool IsDuckDB { get; set; }
|
||||
|
||||
public ExecuteQueryEvent() : base(EVENT_TYPE)
|
||||
{
|
||||
|
|
@ -237,4 +259,4 @@ namespace ParquetViewer.Analytics
|
|||
var _ = new ColumnFormattedEvent(formatName).Record();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -20,4 +20,4 @@ namespace ParquetViewer.Analytics
|
|||
ConsentProvider = consentProvider;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public bool AnalyticsDataGatheringConsent => AppSettings.AnalyticsDataGatheringConsent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public bool AnalyticsDataGatheringConsent { get; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -20,6 +20,7 @@ namespace ParquetViewer
|
|||
private const string CustomDateFormatKey = "CustomDateFormat";
|
||||
private const string DarkModeKey = "DarkMode";
|
||||
private const string UserSelectedCultureKey = "UserSelectedCulture";
|
||||
private const string QueryEditorZoomLevelKey = "QueryEditorZoomLevel";
|
||||
|
||||
public static DateFormat DateTimeDisplayFormat
|
||||
{
|
||||
|
|
@ -109,11 +110,17 @@ namespace ParquetViewer
|
|||
public static CultureInfo? UserSelectedCulture
|
||||
{
|
||||
get => ReadRegistryValue(UserSelectedCultureKey, out string? value) ?
|
||||
(UtilityMethods.TryParseCultureInfo(value, out CultureInfo? cultureInfo) ? cultureInfo : null)
|
||||
(UtilityMethods.TryParseCultureInfo(value, out CultureInfo? cultureInfo) ? cultureInfo : null)
|
||||
: null;
|
||||
set => SetRegistryValue(UserSelectedCultureKey, value?.ToString() ?? string.Empty);
|
||||
}
|
||||
|
||||
public static int? QueryEditorZoomLevel
|
||||
{
|
||||
get => ReadRegistryValue(QueryEditorZoomLevelKey, out int value) ? value : null;
|
||||
set => SetRegistryValue(QueryEditorZoomLevelKey, value);
|
||||
}
|
||||
|
||||
private static bool ReadRegistryValue<T>(string key, [NotNullWhen(true)] out T? value)
|
||||
{
|
||||
try
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ namespace ParquetViewer.Controls
|
|||
try
|
||||
{
|
||||
//Prepare audio stream
|
||||
if (this.Value is ByteArrayValue byteArray)
|
||||
if (this.Value is IByteArrayValue byteArray)
|
||||
{
|
||||
this._audioStream = GetAudioStream(byteArray.Data, out var audioFormat);
|
||||
this._audioFormat = audioFormat;
|
||||
|
|
@ -79,7 +79,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidDataException($"{this.ValueType.Name} was not the expected type {nameof(ByteArrayValue)}");
|
||||
throw new InvalidDataException($"{this.ValueType.Name} was not the expected type {nameof(IByteArrayValue)}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
|
@ -112,7 +112,7 @@ namespace ParquetViewer.Controls
|
|||
this.RedrawCell();
|
||||
}
|
||||
|
||||
protected override void Paint(Graphics graphics, Rectangle clipBounds, Rectangle cellBounds, int rowIndex, DataGridViewElementStates cellState, object value, object formattedValue, string errorText, DataGridViewCellStyle cellStyle, DataGridViewAdvancedBorderStyle advancedBorderStyle, DataGridViewPaintParts paintParts)
|
||||
protected override void Paint(Graphics graphics, Rectangle clipBounds, Rectangle cellBounds, int rowIndex, DataGridViewElementStates cellState, object? value, object? formattedValue, string? errorText, DataGridViewCellStyle cellStyle, DataGridViewAdvancedBorderStyle advancedBorderStyle, DataGridViewPaintParts paintParts)
|
||||
{
|
||||
InitializePlayerAsync(); //Trigger initialization if it wasn't performed yet
|
||||
|
||||
|
|
@ -359,7 +359,7 @@ namespace ParquetViewer.Controls
|
|||
if (this.DataGridView is null) //just in case
|
||||
return;
|
||||
|
||||
if (this.Value is not ByteArrayValue byteArrayValue)
|
||||
if (this.Value is not IByteArrayValue byteArrayValue)
|
||||
return;
|
||||
|
||||
if (this._audioFormat is null || this._audioFormat == AudioFormat.Invalid)
|
||||
|
|
@ -380,7 +380,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
CleanupFile(saveFileDialog.FileName); //Delete any existing file (user already confirmed any overwrite)
|
||||
|
||||
if (this.Value is not ByteArrayValue byteArray)
|
||||
if (this.Value is not IByteArrayValue byteArray)
|
||||
throw new InvalidDataException("Audio data was not found");
|
||||
|
||||
await File.WriteAllBytesAsync(saveFileDialog.FileName, byteArray.Data);
|
||||
|
|
@ -484,7 +484,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public enum AudioFormat
|
||||
{
|
||||
Invalid,
|
||||
|
|
|
|||
|
|
@ -51,4 +51,4 @@ namespace ParquetViewer.Controls
|
|||
_tooltip.SetToolTip(this, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Controls
|
||||
|
|
@ -34,6 +35,7 @@ namespace ParquetViewer.Controls
|
|||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public int DelayedTextChangedTimeout { get; set; }
|
||||
|
||||
protected virtual void OnDelayedTextChanged(EventArgs e)
|
||||
|
|
@ -91,4 +93,4 @@ namespace ParquetViewer.Controls
|
|||
OnDelayedTextChanged(EventArgs.Empty);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -79,4 +79,4 @@ namespace ParquetViewer.Controls
|
|||
_openForms.Remove(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ using ParquetViewer.Engine.Types;
|
|||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Linq;
|
||||
|
|
@ -22,6 +23,7 @@ namespace ParquetViewer.Controls
|
|||
const string FORMATTING_ERROR_TEXT = "#ERR";
|
||||
|
||||
private Theme _gridTheme = Theme.LightModeTheme;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Theme GridTheme
|
||||
{
|
||||
get => _gridTheme;
|
||||
|
|
@ -35,9 +37,16 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Image? CopyToClipboardIcon { get; set; } = null;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Image? CopyAsWhereIcon { get; set; } = null;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public bool ShowCopyAsWhereContextMenuItem { get; set; } = false;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string ColumnNameEscapeFormat { get; set; } = "[{0}]";
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string DateValueEscapeFormat { get; set; } = "#{0}#";
|
||||
|
||||
private readonly HashSet<int> clickableColumnIndexes = new();
|
||||
private readonly Dictionary<(int, int), QuickPeekForm> openQuickPeekForms = new();
|
||||
|
|
@ -48,7 +57,7 @@ namespace ParquetViewer.Controls
|
|||
private static readonly Regex _validColumnNameRegex = new Regex("^[a-zA-Z0-9_]+$");
|
||||
|
||||
//We keep track of format overrides with the column name so we can keep formatting the same if the user adds/removes fields from the same file
|
||||
private readonly Dictionary<string, ByteArrayValue.DisplayFormat> byteArrayColumnsWithFormatOverrides = new();
|
||||
private readonly Dictionary<string, IByteArrayValue.DisplayFormat> byteArrayColumnsWithFormatOverrides = new();
|
||||
private readonly Dictionary<string, FloatDisplayFormat> floatColumnsWithFormatOverrides = new();
|
||||
|
||||
public ParquetGridView() : base()
|
||||
|
|
@ -87,13 +96,13 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
checkboxColumn.ThreeState = true;
|
||||
}
|
||||
else if (column.ValueType == typeof(ListValue)
|
||||
|| column.ValueType == typeof(MapValue)
|
||||
|| column.ValueType == typeof(StructValue))
|
||||
else if (column.ValueType.ImplementsInterface<IListValue>()
|
||||
|| column.ValueType.ImplementsInterface<IMapValue>()
|
||||
|| column.ValueType.ImplementsInterface<IStructValue>())
|
||||
{
|
||||
column.DefaultCellStyle = GetHyperlinkCellStyle(column);
|
||||
}
|
||||
else if (column.ValueType == typeof(ByteArrayValue))
|
||||
else if (column.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
{
|
||||
//Check if this column contains images
|
||||
for (var i = 0; i < this.Rows.Count; i++)
|
||||
|
|
@ -101,7 +110,7 @@ namespace ParquetViewer.Controls
|
|||
var cellValue = this[column.Index, i].Value;
|
||||
if (cellValue != DBNull.Value)
|
||||
{
|
||||
var isImage = ((ByteArrayValue)cellValue).ToImage(out var image);
|
||||
var isImage = ((IByteArrayValue)cellValue!).ToImage(out var image);
|
||||
if (isImage)
|
||||
{
|
||||
column.DefaultCellStyle = GetHyperlinkCellStyle(column);
|
||||
|
|
@ -117,15 +126,19 @@ namespace ParquetViewer.Controls
|
|||
public void UpdateDateFormats()
|
||||
{
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
|
||||
foreach (DataGridViewColumn column in this.Columns)
|
||||
{
|
||||
if (column.ValueType == typeof(DateTime))
|
||||
column.DefaultCellStyle.Format = dateFormat;
|
||||
else if (column.ValueType == typeof(DateOnly))
|
||||
column.DefaultCellStyle.Format = dateOnlyFormat;
|
||||
}
|
||||
|
||||
//Need to tell the parquet engine how to render date values
|
||||
ParquetEngineSettings.DateDisplayFormat = dateFormat;
|
||||
ParquetEngineSettings.DateOnlyDisplayFormat = dateOnlyFormat;
|
||||
}
|
||||
|
||||
protected override void OnCellPainting(DataGridViewCellPaintingEventArgs e)
|
||||
|
|
@ -140,8 +153,8 @@ namespace ParquetViewer.Controls
|
|||
e.PaintBackground(e.CellBounds, true);
|
||||
e.PaintContent(e.CellBounds);
|
||||
|
||||
WidenColumnForIndicator(this.Columns[e.ColumnIndex], e.Graphics!, e.CellStyle!.Font, false);
|
||||
var length = MeasureStringWidth(e.Graphics!, e.CellStyle.Font, e.FormattedValue?.ToString() ?? string.Empty, false);
|
||||
WidenColumnForIndicator(this.Columns[e.ColumnIndex], e.Graphics!, e.CellStyle!.Font!, false);
|
||||
var length = MeasureStringWidth(e.Graphics!, e.CellStyle.Font!, e.FormattedValue?.ToString() ?? string.Empty, false);
|
||||
var drawPoint = new Point(e.CellBounds.Left + length - 2, e.CellBounds.Y + 4);
|
||||
TextRenderer.DrawText(e.Graphics!, "*", e.CellStyle!.Font, drawPoint, e.CellStyle.ForeColor, TextFormatFlags.PreserveGraphicsClipping);
|
||||
|
||||
|
|
@ -156,7 +169,7 @@ namespace ParquetViewer.Controls
|
|||
e.Paint(e.CellBounds, DataGridViewPaintParts.All
|
||||
& ~(DataGridViewPaintParts.ContentForeground));
|
||||
|
||||
var font = new Font(e.CellStyle!.Font, FontStyle.Italic);
|
||||
var font = new Font(e.CellStyle!.Font!, FontStyle.Italic);
|
||||
var color = this.GridTheme.CellPlaceholderTextColor;
|
||||
if (e.State.HasFlag(DataGridViewElementStates.Selected))
|
||||
color = Color.White;
|
||||
|
|
@ -250,15 +263,31 @@ namespace ParquetViewer.Controls
|
|||
int columnIndex = this.HitTest(e.X, e.Y).ColumnIndex;
|
||||
|
||||
if (rowIndex >= 0 && columnIndex >= 0
|
||||
&& this[columnIndex, rowIndex].Value is StructValue structValue
|
||||
&& structValue.IsHuggingFaceImageFormat(out var data))
|
||||
&& this[columnIndex, rowIndex].Value is IStructValue structValue
|
||||
&& structValue.IsHuggingFaceFormat(out var data))
|
||||
{
|
||||
using var ms = new System.IO.MemoryStream(data);
|
||||
var image = Image.FromStream(ms); //quick peek form will dispose of this image when closed
|
||||
Image? image;
|
||||
try
|
||||
{
|
||||
using var ms = new System.IO.MemoryStream(data);
|
||||
image = Image.FromStream(ms); //quick peek form will dispose of this image when closed
|
||||
}
|
||||
catch (ArgumentException)
|
||||
{
|
||||
//Data is not an image
|
||||
image = null;
|
||||
}
|
||||
catch
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
var quickPeekForm = new QuickPeekForm(this.Columns[columnIndex].Name, image, uniqueCellTag, rowIndex, columnIndex);
|
||||
ShowQuickPeekForm(quickPeekForm, this[columnIndex, rowIndex], uniqueCellTag, QuickPeekEvent.DataTypeId.Image);
|
||||
if (image is not null)
|
||||
{
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
var quickPeekForm = new QuickPeekForm(this.Columns[columnIndex].Name, image, uniqueCellTag, rowIndex, columnIndex);
|
||||
ShowQuickPeekForm(quickPeekForm, this[columnIndex, rowIndex], uniqueCellTag, QuickPeekEvent.DataTypeId.Image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -314,7 +343,7 @@ namespace ParquetViewer.Controls
|
|||
var dataType = QuickPeekEvent.DataTypeId.Unknown;
|
||||
QuickPeekForm? quickPeekForm = null;
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
if (clickedCell.Value is ListValue listValue)
|
||||
if (clickedCell.Value is IListValue listValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.List;
|
||||
|
||||
|
|
@ -330,7 +359,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is MapValue mapValue)
|
||||
else if (clickedCell.Value is IMapValue mapValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Map;
|
||||
|
||||
|
|
@ -348,15 +377,14 @@ namespace ParquetViewer.Controls
|
|||
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is StructValue structValue)
|
||||
else if (clickedCell.Value is IStructValue structValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Struct;
|
||||
|
||||
|
||||
var dt = structValue.ToDataTable();
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is ByteArrayValue byteArray && byteArray.ToImage(out var image))
|
||||
else if (clickedCell.Value is IByteArrayValue byteArray && byteArray.ToImage(out var image))
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Image;
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, image!, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
|
|
@ -496,7 +524,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
if (cellValueType == typeof(ByteArrayValue) && e.Value is ByteArrayValue byteArrayValue)
|
||||
if (cellValueType.ImplementsInterface<IByteArrayValue>() && e.Value is IByteArrayValue byteArrayValue)
|
||||
{
|
||||
//Don't truncate the binary data if this is a copy to clipboard operation
|
||||
int charLimit = this.isCopyingToClipboard ? int.MaxValue : MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL;
|
||||
|
|
@ -528,22 +556,25 @@ namespace ParquetViewer.Controls
|
|||
e.FormattingApplied = true;
|
||||
}
|
||||
}
|
||||
else if (cellValueType == typeof(StructValue) && e.Value is StructValue structValue)
|
||||
else if (cellValueType.ImplementsInterface<IStructValue>() && e.Value is IStructValue structValue)
|
||||
{
|
||||
e.Value = structValue.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL);
|
||||
e.FormattingApplied = true;
|
||||
}
|
||||
else if (cellValueType == typeof(ListValue) && e.Value is ListValue listValue)
|
||||
else if (cellValueType.ImplementsInterface<IListValue>() && e.Value is IListValue listValue)
|
||||
{
|
||||
e.Value = listValue.ToString().Left(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL - 3, "...");
|
||||
e.Value = listValue.ToString()!.Left(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL - 3, "...");
|
||||
e.FormattingApplied = true;
|
||||
}
|
||||
}
|
||||
|
||||
protected override void OnSorted(EventArgs e)
|
||||
{
|
||||
using var graphics = this.CreateGraphics();
|
||||
WidenColumnForIndicator(this.SortedColumn, graphics, this.Font, true);
|
||||
if (this.SortedColumn is not null)
|
||||
{
|
||||
using var graphics = this.CreateGraphics();
|
||||
WidenColumnForIndicator(this.SortedColumn, graphics, this.Font, true);
|
||||
}
|
||||
base.OnSorted(e);
|
||||
}
|
||||
|
||||
|
|
@ -651,7 +682,7 @@ namespace ParquetViewer.Controls
|
|||
const int MAX_WIDTH = 360;
|
||||
const int DECIMAL_PREFERRED_WIDTH = 180;
|
||||
|
||||
if (this.DataSource is not DataTable gridTable)
|
||||
if (this.DataSource is not DataTable gridTable || this.Columns.Count == 0)
|
||||
return;
|
||||
|
||||
var maxWidth = MAX_WIDTH;
|
||||
|
|
@ -683,12 +714,20 @@ namespace ParquetViewer.Controls
|
|||
//We can just measure a few without going through all of them.
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<DateTime>(i).ToString(AppSettings.DateTimeDisplayFormat.GetDateFormat()))
|
||||
.Take(100);
|
||||
.Take(50);
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(StructValue))
|
||||
else if (gridTable.Columns[i].DataType == typeof(DateOnly))
|
||||
{
|
||||
//All date only's will probably have the same string length so no need to go through all values.
|
||||
//We can just measure a few without going through all of them.
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<DateOnly>(i).ToString(AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat()))
|
||||
.Take(10);
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType.ImplementsInterface<IStructValue>())
|
||||
{
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<StructValue>(i)!.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL));
|
||||
.Select(row => row.Field<IStructValue>(i)!.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL));
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(float)
|
||||
&& this.floatColumnsWithFormatOverrides.TryGetValue(gridTable.Columns[i].ColumnName, out var displayFormat)
|
||||
|
|
@ -720,16 +759,16 @@ namespace ParquetViewer.Controls
|
|||
//Allow longer than preferred width if header is longer
|
||||
maxWidth = Math.Max(newColumnSize, DECIMAL_PREFERRED_WIDTH);
|
||||
}
|
||||
else if (this.Columns[i].CellTemplate.GetType() == typeof(AudioPlayerDataGridViewCell))
|
||||
else if (this.Columns[i].CellTemplate!.GetType() == typeof(AudioPlayerDataGridViewCell))
|
||||
{
|
||||
this.Columns[i].Width = Math.Min(Math.Max(240, newColumnSize), maxWidth);
|
||||
return;
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(ByteArrayValue)
|
||||
else if (gridTable.Columns[i].DataType.ImplementsInterface<IByteArrayValue>()
|
||||
&& this.byteArrayColumnsWithFormatOverrides.TryGetValue(gridTable.Columns[i].ColumnName, out var byteArrayDisplayFormat))
|
||||
{
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => FormatByteArrayString(row.Field<ByteArrayValue>(i)!, byteArrayDisplayFormat, 1000 /*1000 chars seems like a good max limit*/));
|
||||
.Select(row => FormatByteArrayString(row.Field<IByteArrayValue>(i)!, byteArrayDisplayFormat, 1000 /*1000 chars seems like a good max limit*/));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -824,7 +863,7 @@ namespace ParquetViewer.Controls
|
|||
this.clickableColumnIndexes.Add(column.Index);
|
||||
this.hyperlinkCellStyleCache ??= new DataGridViewCellStyle(column.DefaultCellStyle)
|
||||
{
|
||||
Font = new(column.DefaultCellStyle.Font ?? column.InheritedStyle.Font, FontStyle.Underline),
|
||||
Font = new(column.DefaultCellStyle.Font ?? column.InheritedStyle!.Font!, FontStyle.Underline),
|
||||
ForeColor = this.GridTheme.HyperlinkColor
|
||||
};
|
||||
return this.hyperlinkCellStyleCache;
|
||||
|
|
@ -880,23 +919,23 @@ namespace ParquetViewer.Controls
|
|||
.GroupBy(cell => cell.ColumnIndex)
|
||||
.OrderBy(column => column.Key))
|
||||
{
|
||||
DataTable? dataTable = this.DataSource as DataTable;
|
||||
var cellValues = selectedCellsByColumn.Select(cell => this[cell.ColumnIndex, cell.RowIndex].Value);
|
||||
var cellValues = selectedCellsByColumn.Select(cell => this[cell.ColumnIndex, cell.RowIndex].Value!);
|
||||
var columnIndex = selectedCellsByColumn.Key;
|
||||
var column = this.Columns[columnIndex];
|
||||
columnsAndValuesToFilterBy.Add((column.Name, column.ValueType, cellValues.ToArray()));
|
||||
columnsAndValuesToFilterBy.Add((column.Name, column.ValueType!, cellValues.ToArray()));
|
||||
}
|
||||
|
||||
var filterQuery = GenerateFilterQuery(columnsAndValuesToFilterBy);
|
||||
if (filterQuery.Length < new TextBox().MaxLength) //This length check doesn't make the most sense but I wanted to put some kind of cap on this.
|
||||
var filterQuery = GenerateFilterQuery(columnsAndValuesToFilterBy, this.ColumnNameEscapeFormat, this.DateValueEscapeFormat);
|
||||
if (filterQuery.Length < new TextBox().MaxLength)
|
||||
{
|
||||
Clipboard.SetText(filterQuery, TextDataFormat.Text);
|
||||
}
|
||||
else
|
||||
{
|
||||
//If the query is too long to fit in our query box, show an error
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorMessage,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorTitle,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
|
@ -904,8 +943,14 @@ namespace ParquetViewer.Controls
|
|||
public static string GenerateFilterQuery(string columnName, Type valueType, object value)
|
||||
=> GenerateFilterQuery(new() { (columnName, valueType, [value]) });
|
||||
|
||||
public static string GenerateFilterQuery(List<(string ColumnName, Type ValueType, object[] Values)> columnsAndValuesToFilterBy)
|
||||
public static string GenerateFilterQuery(List<(string ColumnName, Type ValueType, object[] Values)> columnsAndValuesToFilterBy,
|
||||
string columnNameEscapeFormat = "[{0}]", string dateValueEscapeFormat = "#{0}#")
|
||||
{
|
||||
if (columnNameEscapeFormat.Length < 5)
|
||||
throw new ArgumentException("Column name escape format is too short.", nameof(columnNameEscapeFormat));
|
||||
if (dateValueEscapeFormat.Length < 5)
|
||||
throw new ArgumentException("Date value escape format is too short.", nameof(dateValueEscapeFormat));
|
||||
|
||||
var queryBuilder = new StringBuilder();
|
||||
if (columnsAndValuesToFilterBy is null || columnsAndValuesToFilterBy.Count == 0)
|
||||
{
|
||||
|
|
@ -920,10 +965,10 @@ namespace ParquetViewer.Controls
|
|||
ArgumentNullException.ThrowIfNull(values);
|
||||
|
||||
//Wrap column name in brackets if it contains spaces or punctuation (if it isn't wrapped already)
|
||||
var isAlreadyWrapped = columnName.StartsWith("[") && columnName.EndsWith("]");
|
||||
var isAlreadyWrapped = columnName.StartsWith(columnNameEscapeFormat.First()) && columnName.EndsWith(columnNameEscapeFormat.Last());
|
||||
if (!isAlreadyWrapped && !_validColumnNameRegex.IsMatch(columnName))
|
||||
{
|
||||
columnName = $"[{columnName}]";
|
||||
columnName = string.Format(columnNameEscapeFormat, columnName);
|
||||
}
|
||||
|
||||
var hasNulls = values.Any(value => value == DBNull.Value || value is null);
|
||||
|
|
@ -977,7 +1022,7 @@ namespace ParquetViewer.Controls
|
|||
if (valueType == typeof(DateTime))
|
||||
{
|
||||
//Use a standard date format so the query is always syntactically correct
|
||||
queryBuilder.Append($"#{((DateTime)value).ToString("yyyy-MM-dd HH:mm:ss.FFFFFFF")}#");
|
||||
queryBuilder.AppendFormat(dateValueEscapeFormat, ((DateTime)value).ToString("yyyy-MM-dd HH:mm:ss.FFFFFFF"));
|
||||
}
|
||||
else if (valueType.IsNumber())
|
||||
{
|
||||
|
|
@ -1014,18 +1059,18 @@ namespace ParquetViewer.Controls
|
|||
private void ShowDisplayFormatOptions(int columnIndex)
|
||||
{
|
||||
//If this is a byte array column, show available formatting options
|
||||
if (this.Columns[columnIndex].ValueType == typeof(ByteArrayValue)
|
||||
&& this.Columns[columnIndex].CellTemplate.GetType() != typeof(AudioPlayerDataGridViewCell))
|
||||
if (this.Columns[columnIndex].ValueType.ImplementsInterface<IByteArrayValue>()
|
||||
&& this.Columns[columnIndex].CellTemplate?.GetType() != typeof(AudioPlayerDataGridViewCell))
|
||||
{
|
||||
const int RECORDS_TO_INTERSECT_COUNT = 8;
|
||||
|
||||
//Find a few different non-null values and find the common display formats that all of them support.
|
||||
//This will reduce the chance the user sees #ERR in the cells from bad formatting conversions.
|
||||
int intersectCounter = RECORDS_TO_INTERSECT_COUNT;
|
||||
IEnumerable<ByteArrayValue.DisplayFormat> possibleDisplayFormats = Enum.GetValues<ByteArrayValue.DisplayFormat>();
|
||||
IEnumerable<IByteArrayValue.DisplayFormat> possibleDisplayFormats = Enum.GetValues<IByteArrayValue.DisplayFormat>();
|
||||
for (var i = 0; i < this.RowCount; i++)
|
||||
{
|
||||
if (this[columnIndex, i].Value is not ByteArrayValue byteArrayValue)
|
||||
if (this[columnIndex, i].Value is not IByteArrayValue byteArrayValue)
|
||||
continue;
|
||||
|
||||
possibleDisplayFormats = possibleDisplayFormats.Intersect(byteArrayValue.PossibleDisplayFormats);
|
||||
|
|
@ -1119,12 +1164,12 @@ namespace ParquetViewer.Controls
|
|||
/// <returns>String representation of the binary data in the desired format if possible.
|
||||
/// If conversion fails, <see cref="FORMATTING_ERROR_TEXT"/> is returned instead</returns>
|
||||
/// <remarks>Utilize <see cref="ByteArrayValue.PossibleDisplayFormats"/> to avoid calling incompatible conversions</remarks>
|
||||
private static string FormatByteArrayString(ByteArrayValue byteArrayValue, ByteArrayValue.DisplayFormat desiredFormat, int desiredLength = int.MaxValue)
|
||||
private static string FormatByteArrayString(IByteArrayValue byteArrayValue, IByteArrayValue.DisplayFormat desiredFormat, int desiredLength = int.MaxValue)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(byteArrayValue);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(desiredLength, 1);
|
||||
|
||||
if (desiredFormat == ByteArrayValue.DisplayFormat.IPv4)
|
||||
if (desiredFormat == IByteArrayValue.DisplayFormat.IPv4)
|
||||
{
|
||||
if (byteArrayValue.ToIPv4(out var ipAddress))
|
||||
{
|
||||
|
|
@ -1133,7 +1178,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.IPv6)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.IPv6)
|
||||
{
|
||||
if (byteArrayValue.ToIPv6(out var ipAddress))
|
||||
{
|
||||
|
|
@ -1142,7 +1187,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Guid)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Guid)
|
||||
{
|
||||
if (byteArrayValue.ToGuid(out var @guid))
|
||||
{
|
||||
|
|
@ -1151,7 +1196,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Short)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Short)
|
||||
{
|
||||
if (byteArrayValue.ToShort(out var @short))
|
||||
{
|
||||
|
|
@ -1160,7 +1205,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Integer)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Integer)
|
||||
{
|
||||
if (byteArrayValue.ToInteger(out var @int))
|
||||
{
|
||||
|
|
@ -1169,7 +1214,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Long)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Long)
|
||||
{
|
||||
if (byteArrayValue.ToLong(out var @long))
|
||||
{
|
||||
|
|
@ -1178,7 +1223,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Float)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Float)
|
||||
{
|
||||
if (byteArrayValue.ToFloat(out var @float))
|
||||
{
|
||||
|
|
@ -1187,7 +1232,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Double)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Double)
|
||||
{
|
||||
if (byteArrayValue.ToDouble(out var @double))
|
||||
{
|
||||
|
|
@ -1196,7 +1241,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.ASCII)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.ASCII)
|
||||
{
|
||||
if (byteArrayValue.ToASCII(out var ascii))
|
||||
{
|
||||
|
|
@ -1208,7 +1253,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Base64)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Base64)
|
||||
{
|
||||
byteArrayValue.ToBase64(out var base64);
|
||||
if (base64.Length <= desiredLength)
|
||||
|
|
@ -1216,7 +1261,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return base64[..desiredLength] + "[...]";
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Size)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Size)
|
||||
{
|
||||
return byteArrayValue.Data.Length.ToString() + (byteArrayValue.Data.Length == 1 ? " byte" : " bytes");
|
||||
}
|
||||
|
|
@ -1234,7 +1279,7 @@ namespace ParquetViewer.Controls
|
|||
//Check for audio data
|
||||
foreach (DataGridViewColumn column in this.Columns)
|
||||
{
|
||||
if (column.ValueType == typeof(ByteArrayValue))
|
||||
if (column.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
{
|
||||
var isAudioColumn = false;
|
||||
var tryCount = 0;
|
||||
|
|
@ -1247,8 +1292,9 @@ namespace ParquetViewer.Controls
|
|||
if (value == DBNull.Value)
|
||||
continue;
|
||||
|
||||
byte[] data = ((ByteArrayValue)value).Data;
|
||||
if (AudioPlayerDataGridViewCell.IsAudio(data, out var _))
|
||||
var byteArray = (IByteArrayValue)value;
|
||||
if (AudioPlayerDataGridViewCell.IsAudio(byteArray.Data, out var _)
|
||||
&& !byteArray.ToImage(out _)) //help prevent false positives by checking for image data
|
||||
{
|
||||
isAudioColumn = true;
|
||||
break;
|
||||
|
|
@ -1272,7 +1318,7 @@ namespace ParquetViewer.Controls
|
|||
public void DisposeAudioCells()
|
||||
{
|
||||
foreach (var audioColumn in this.Columns.Cast<DataGridViewColumn>()
|
||||
.Where(column => column.CellTemplate.GetType() == typeof(AudioPlayerDataGridViewCell)))
|
||||
.Where(column => column.CellTemplate?.GetType() == typeof(AudioPlayerDataGridViewCell)))
|
||||
{
|
||||
foreach (DataGridViewRow row in this.Rows)
|
||||
{
|
||||
|
|
@ -1296,4 +1342,4 @@ namespace ParquetViewer.Controls
|
|||
Decimal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Threading.Tasks;
|
||||
|
|
@ -12,6 +13,7 @@ namespace ParquetViewer.Controls
|
|||
private readonly string originalTitle = string.Empty;
|
||||
|
||||
private string titleSuffix = string.Empty;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string TitleSuffix
|
||||
{
|
||||
get => titleSuffix;
|
||||
|
|
@ -30,8 +32,11 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public Guid UniqueTag { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public int SourceRowIndex { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public int SourceColumnIndex { get; set; }
|
||||
|
||||
public event EventHandler<TakeMeBackEventArgs>? TakeMeBackEvent;
|
||||
|
|
@ -86,6 +91,11 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
width += column.Width;
|
||||
}
|
||||
if (this.mainGridView.Rows.Count > 8) //8 is a magic number... Better than nothing imo
|
||||
{
|
||||
width += 24; //widen for scrollbar
|
||||
}
|
||||
|
||||
this.Width = Math.Min(Math.Max(width, 280), 900); //900 pixel max seems reasonable, right?
|
||||
|
||||
if (this.mainGridView.Rows.Count == 1)
|
||||
|
|
@ -95,7 +105,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
else if (this.mainPictureBox is not null)
|
||||
{
|
||||
this.Text += $" ({Resources.Strings.DimensionsText}: {this.mainPictureBox.Image.PhysicalDimension.Width} x {this.mainPictureBox.Image.PhysicalDimension.Height})";
|
||||
this.Text += $" ({Resources.Strings.DimensionsText}: {this.mainPictureBox.Image!.PhysicalDimension.Width} x {this.mainPictureBox.Image.PhysicalDimension.Height})";
|
||||
this.Text += $" ({Resources.Strings.TypeText}: {this.mainPictureBox.Image.RawFormat})";
|
||||
|
||||
this.Width = Math.Max(Math.Min((int)(Screen.FromControl(this).WorkingArea.Width / 1.8), this.mainPictureBox.Image.Width), 400);
|
||||
|
|
@ -142,7 +152,7 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
using var saveFileDialog = new SaveFileDialog
|
||||
{
|
||||
Filter = $"{this.mainPictureBox.Image.RawFormat.ToString().ToUpperInvariant()} image|*.{this.mainPictureBox.Image.RawFormat.ToString().ToLowerInvariant()}",
|
||||
Filter = $"{this.mainPictureBox.Image!.RawFormat.ToString().ToUpperInvariant()} image|*.{this.mainPictureBox.Image.RawFormat.ToString().ToLowerInvariant()}",
|
||||
Title = Resources.Strings.SaveImageAsButtonText.Format(this.mainPictureBox.Image.RawFormat.ToString().ToUpperInvariant())
|
||||
};
|
||||
|
||||
|
|
@ -154,8 +164,8 @@ namespace ParquetViewer.Controls
|
|||
bitmap.Save(saveFileDialog.FileName, this.mainPictureBox.Image.RawFormat);
|
||||
|
||||
MessageBox.Show(this,
|
||||
Resources.Strings.ImageSavedToDiskMessage.Format(saveFileDialog.FileName),
|
||||
Resources.Strings.ImageSavedToDiskTitle,
|
||||
Resources.Strings.ImageSavedToDiskMessage.Format(saveFileDialog.FileName),
|
||||
Resources.Strings.ImageSavedToDiskTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
}
|
||||
}
|
||||
|
|
@ -165,7 +175,7 @@ namespace ParquetViewer.Controls
|
|||
try
|
||||
{
|
||||
this.mainPictureBox.Cursor = Cursors.WaitCursor;
|
||||
Clipboard.SetImage(this.mainPictureBox.Image);
|
||||
Clipboard.SetImage(this.mainPictureBox.Image!);
|
||||
await Task.Delay(100); //allow cursor to change
|
||||
}
|
||||
finally
|
||||
|
|
@ -197,4 +207,4 @@ namespace ParquetViewer.Controls
|
|||
public int SourceRowIndex { get; } = sourceRowIndex;
|
||||
public int SourceColumnIndex { get; } = sourceColumnIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
using ParquetViewer;
|
||||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Windows.Forms;
|
||||
using System.Windows.Forms.VisualStyles;
|
||||
|
|
@ -21,6 +22,7 @@ public class StylableCheckBox : CheckBox
|
|||
/// <summary>
|
||||
/// Gets or sets the foreground color of the checkbox label if a checkbox is disabled
|
||||
/// </summary>
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Color DisabledForeColor
|
||||
{
|
||||
get;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
using System.Drawing;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Controls
|
||||
{
|
||||
public class ThemableToolStripSeperator : ToolStripSeparator
|
||||
{
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public new Color BackColor { get; set; } = Color.Transparent;
|
||||
|
||||
/// <remarks>
|
||||
|
|
@ -23,4 +25,4 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,7 +12,7 @@ namespace ParquetViewer
|
|||
public partial class CustomDateFormatInputForm : FormBase
|
||||
{
|
||||
public string UserEnteredDateFormat => this.desiredDateFormatTextBox.Text;
|
||||
|
||||
|
||||
public CustomDateFormatInputForm()
|
||||
{
|
||||
InitializeComponent();
|
||||
|
|
@ -85,8 +85,8 @@ namespace ParquetViewer
|
|||
else
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.InvalidDateFormatErrorMessage,
|
||||
Resources.Errors.InvalidDateFormatErrorTitle,
|
||||
Resources.Errors.InvalidDateFormatErrorMessage,
|
||||
Resources.Errors.InvalidDateFormatErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
|
@ -104,4 +104,4 @@ namespace ParquetViewer
|
|||
this.dateFormatDocsLinkLabel.ActiveLinkColor = theme.ActiveHyperlinkColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1759,7 +1759,7 @@
|
|||
<value>17</value>
|
||||
</data>
|
||||
<data name="instructionsTableLayoutPanel.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>554, 254</value>
|
||||
<value>554, 236</value>
|
||||
</data>
|
||||
<data name="instructionsTableLayoutPanel.TabIndex" type="System.Int32, mscorlib">
|
||||
<value>1</value>
|
||||
|
|
@ -1894,7 +1894,7 @@
|
|||
<value>6</value>
|
||||
</data>
|
||||
<data name="mainTableLayoutPanel.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>578, 373</value>
|
||||
<value>578, 355</value>
|
||||
</data>
|
||||
<data name="mainTableLayoutPanel.TabIndex" type="System.Int32, mscorlib">
|
||||
<value>0</value>
|
||||
|
|
@ -1939,7 +1939,10 @@
|
|||
<value>7, 15</value>
|
||||
</data>
|
||||
<data name="$this.ClientSize" type="System.Drawing.Size, System.Drawing">
|
||||
<value>578, 373</value>
|
||||
<value>578, 355</value>
|
||||
</data>
|
||||
<data name="$this.MaximumSize" type="System.Drawing.Size, System.Drawing">
|
||||
<value>900, 750</value>
|
||||
</data>
|
||||
<data name="$this.StartPosition" type="System.Windows.Forms.FormStartPosition, System.Windows.Forms">
|
||||
<value>CenterParent</value>
|
||||
|
|
|
|||
|
|
@ -6,4 +6,4 @@ namespace ParquetViewer.Exceptions
|
|||
{
|
||||
public InvalidQueryException(Exception? ex = null) : base(Resources.Errors.InvalidQueryErrorMessage, ex) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
16
src/ParquetViewer/Exceptions/RowsReadException.cs
Normal file
16
src/ParquetViewer/Exceptions/RowsReadException.cs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
using System;
|
||||
|
||||
namespace ParquetViewer.Exceptions
|
||||
{
|
||||
public class RowsReadException : Exception
|
||||
{
|
||||
public RowsReadException(Exception parquetNetEx, Exception duckDbEx, string? message = null) : base(message, new AggregateException([parquetNetEx, duckDbEx]))
|
||||
{
|
||||
this.ParquetNetException = parquetNetEx;
|
||||
this.DuckDbException = duckDbEx;
|
||||
}
|
||||
|
||||
public Exception ParquetNetException { get; }
|
||||
public Exception DuckDbException { get; }
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,9 @@ namespace ParquetViewer.Exceptions
|
|||
{
|
||||
internal class UnsupportedAssemblyVersionException : Exception
|
||||
{
|
||||
public UnsupportedAssemblyVersionException(string unsupportedAssemblyVersion, Exception? ex = null)
|
||||
public UnsupportedAssemblyVersionException(string unsupportedAssemblyVersion, Exception? ex = null)
|
||||
: base(Resources.Errors.UnexpectedAssemblyVersionErrorFormat.Format(unsupportedAssemblyVersion), ex) { }
|
||||
|
||||
public static void Record(string unsupportedAssemblyVersion) => ExceptionEvent.FireAndForget(new UnsupportedAssemblyVersionException(unsupportedAssemblyVersion));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3,7 +3,7 @@ using System;
|
|||
|
||||
namespace ParquetViewer.Exceptions
|
||||
{
|
||||
public class XlsCellLengthException: Exception
|
||||
public class XlsCellLengthException : Exception
|
||||
{
|
||||
public readonly FileType FileType = FileType.XLS;
|
||||
|
||||
|
|
@ -15,4 +15,4 @@ namespace ParquetViewer.Exceptions
|
|||
this.MaxLength = maxLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
using Parquet.Schema;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Linq;
|
||||
using System.Windows.Forms;
|
||||
|
|
@ -16,8 +15,11 @@ namespace ParquetViewer
|
|||
private const int DynamicFieldCheckboxYIncrement = 30;
|
||||
private const int MaxNumberOfFieldsWeCanRender = 5000;
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> PreSelectedFields { get; set; }
|
||||
public List<Field> AvailableFields { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> AvailableFields { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> NewSelectedFields { get; set; }
|
||||
|
||||
private string _selectedFieldsOnlyLabelTemplate;
|
||||
|
|
@ -25,14 +27,14 @@ namespace ParquetViewer
|
|||
public FieldsToLoadForm()
|
||||
{
|
||||
InitializeComponent();
|
||||
this.AvailableFields ??= new List<Field>();
|
||||
this.AvailableFields ??= new List<string>();
|
||||
this.PreSelectedFields ??= new List<string>();
|
||||
this.NewSelectedFields ??= new List<string>();
|
||||
this._selectedFieldsOnlyLabelTemplate = this.showSelectedFieldsRadioButton.Text;
|
||||
this.SetSelectedFieldCount();
|
||||
}
|
||||
|
||||
public FieldsToLoadForm(IEnumerable<Field> availableFields, IEnumerable<string> preSelectedFields) : this()
|
||||
public FieldsToLoadForm(IEnumerable<string> availableFields, IEnumerable<string> preSelectedFields) : this()
|
||||
{
|
||||
this.AvailableFields = availableFields?.ToList() ?? new();
|
||||
this.PreSelectedFields = preSelectedFields?.ToList() ?? new();
|
||||
|
|
@ -44,7 +46,7 @@ namespace ParquetViewer
|
|||
this.RenderFieldsCheckboxes(this.AvailableFields, this.PreSelectedFields);
|
||||
}
|
||||
|
||||
private void RenderFieldsCheckboxes(List<Field> availableFields, List<string>? preSelectedFields)
|
||||
private void RenderFieldsCheckboxes(List<string> availableFields, List<string>? preSelectedFields)
|
||||
{
|
||||
this.fieldsPanel.SuspendLayout(); //Suspending the layout while dynamically adding controls adds significant performance improvement
|
||||
this.ClearAndDisposeCheckboxes();
|
||||
|
|
@ -68,7 +70,7 @@ namespace ParquetViewer
|
|||
bool isClearingSelectAllCheckbox = false;
|
||||
|
||||
var checkboxControls = new List<CheckBox>();
|
||||
foreach (Field field in availableFields)
|
||||
foreach (string field in availableFields)
|
||||
{
|
||||
if (isFirst) //Add toggle all checkbox and some other setting changes
|
||||
{
|
||||
|
|
@ -81,12 +83,8 @@ namespace ParquetViewer
|
|||
}
|
||||
|
||||
var totalFieldCount = availableFields.Count;
|
||||
var supportedFieldCount = availableFields.Where(IsSupportedFieldType).Count();
|
||||
var unsupportedFieldCount = totalFieldCount - supportedFieldCount;
|
||||
var unsupportedFieldsText = unsupportedFieldCount > 0 ? $" - {Resources.Strings.UnsupportedFieldCountTextFormat.Format(unsupportedFieldCount)}" : string.Empty;
|
||||
|
||||
string selectAllCheckBoxText = Resources.Strings.SelectAllCheckmarkTextFormat.Format(supportedFieldCount + unsupportedFieldsText);
|
||||
string deselectAllCheckBoxText = Resources.Strings.DeselectAllCheckmarkTextFormat.Format(supportedFieldCount + unsupportedFieldsText);
|
||||
string selectAllCheckBoxText = Resources.Strings.SelectAllCheckmarkTextFormat.Format(totalFieldCount);
|
||||
string deselectAllCheckBoxText = Resources.Strings.DeselectAllCheckmarkTextFormat.Format(totalFieldCount);
|
||||
var selectAllCheckbox = new CheckboxWithTooltip(this.fieldsPanel)
|
||||
{
|
||||
Name = SelectAllCheckboxName,
|
||||
|
|
@ -127,17 +125,16 @@ namespace ParquetViewer
|
|||
locationY += DynamicFieldCheckboxYIncrement;
|
||||
}
|
||||
|
||||
bool isUnsupportedFieldType = !IsSupportedFieldType(field, out var unsupportedReason);
|
||||
var fieldCheckbox = new CheckboxWithTooltip(this.fieldsPanel)
|
||||
{
|
||||
Name = string.Concat("checkbox_", field.Name),
|
||||
Text = string.Concat(field.Name, isUnsupportedFieldType ? $" {Resources.Strings.UnsupportedFieldText}" : string.Empty),
|
||||
Tag = field.Name,
|
||||
Checked = preSelectedFields?.Contains(field.Name) == true,
|
||||
Name = string.Concat("checkbox_", field),
|
||||
Text = field,
|
||||
Tag = field,
|
||||
Checked = preSelectedFields?.Contains(field) == true,
|
||||
Location = new Point(locationX, locationY),
|
||||
DisabledForeColor = _disabledTextColor,
|
||||
AutoSize = true,
|
||||
Enabled = !isUnsupportedFieldType
|
||||
Enabled = true
|
||||
};
|
||||
fieldCheckbox.CheckedChanged += (object? checkboxSender, EventArgs checkboxEventArgs) =>
|
||||
{
|
||||
|
|
@ -178,11 +175,6 @@ namespace ParquetViewer
|
|||
};
|
||||
checkboxControls.Add(fieldCheckbox);
|
||||
|
||||
if (isUnsupportedFieldType)
|
||||
{
|
||||
fieldCheckbox.SetTooltip(unsupportedReason!);
|
||||
}
|
||||
|
||||
locationY += DynamicFieldCheckboxYIncrement;
|
||||
}
|
||||
|
||||
|
|
@ -222,67 +214,6 @@ namespace ParquetViewer
|
|||
this.fieldsPanel.Controls.Clear();
|
||||
}
|
||||
|
||||
public static bool IsSupportedFieldType(Field field)
|
||||
=> IsSupportedFieldType(field, out var _);
|
||||
|
||||
public static bool IsSupportedFieldType(Field field, [NotNullWhen(false)] out string? unsupportedReason)
|
||||
{
|
||||
if (field.SchemaType == SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.List && field is ListField lf)
|
||||
{
|
||||
//We don't support lists of maps
|
||||
if (lf.Item.SchemaType == SchemaType.Map)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.List.ToString(), lf.Item.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.Map && field is MapField mf)
|
||||
{
|
||||
if (mf.Key.SchemaType != SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.Map.ToString(), mf.Key.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
else if (mf.Value.SchemaType != SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.Map.ToString(), mf.Value.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.Struct && field is StructField sf)
|
||||
{
|
||||
foreach (var structField in sf.Fields)
|
||||
{
|
||||
if (!IsSupportedFieldType(structField, out unsupportedReason))
|
||||
{
|
||||
unsupportedReason = Resources.Errors.StructWithUnsupportedFieldErrorMessageFormat.Format(field.Name, structField.Name)
|
||||
+ Environment.NewLine + unsupportedReason;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
unsupportedReason = Resources.Errors.UnknownFieldTypeErrorMessage;
|
||||
return false;
|
||||
}
|
||||
|
||||
private void allFieldsRadioButton_CheckedChanged(object sender, EventArgs e)
|
||||
{
|
||||
if (((RadioButton)sender).Checked)
|
||||
|
|
@ -319,7 +250,7 @@ namespace ParquetViewer
|
|||
this.NewSelectedFields.Clear();
|
||||
if (this.allFieldsRadioButton.Checked || (this.fieldsPanel.Controls.Find(SelectAllCheckboxName, true).FirstOrDefault() as CheckBox)?.Checked == true)
|
||||
{
|
||||
this.NewSelectedFields.AddRange(this.AvailableFields.Where(IsSupportedFieldType).Select(f => f.Name));
|
||||
this.NewSelectedFields.AddRange(this.AvailableFields);
|
||||
}
|
||||
else if (this.PreSelectedFields.Count > 0)
|
||||
{
|
||||
|
|
@ -328,9 +259,9 @@ namespace ParquetViewer
|
|||
else
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorMessage,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorTitle,
|
||||
MessageBoxButtons.OK,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorMessage,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorTitle,
|
||||
MessageBoxButtons.OK,
|
||||
MessageBoxIcon.Warning);
|
||||
return;
|
||||
}
|
||||
|
|
@ -353,19 +284,19 @@ namespace ParquetViewer
|
|||
{
|
||||
if (!string.IsNullOrWhiteSpace(this.filterColumnsTextbox.Text))
|
||||
{
|
||||
IEnumerable<Field> filteredFields;
|
||||
IEnumerable<string> filteredFields;
|
||||
var filteredColumnsNames = this.filterColumnsTextbox.Text.Split(',').ToList();
|
||||
|
||||
if (filteredColumnsNames.Count == 1)
|
||||
{
|
||||
var filter = filteredColumnsNames[0];
|
||||
filteredFields = this.AvailableFields.Where(w => w.Name.Contains(filter, StringComparison.InvariantCultureIgnoreCase));
|
||||
filteredFields = this.AvailableFields.Where(w => w.Contains(filter, StringComparison.InvariantCultureIgnoreCase));
|
||||
}
|
||||
else
|
||||
{
|
||||
char[] charsToTrim = { '"', ' ', '\'' };
|
||||
filteredColumnsNames = filteredColumnsNames.Select(s => s.Trim(charsToTrim)).ToList();
|
||||
filteredFields = this.AvailableFields.Where(w => filteredColumnsNames.Contains(w.Name));
|
||||
filteredFields = this.AvailableFields.Where(w => filteredColumnsNames.Contains(w));
|
||||
}
|
||||
|
||||
this.RenderFieldsCheckboxes(filteredFields.ToList(), this.PreSelectedFields);
|
||||
|
|
@ -412,4 +343,4 @@ namespace ParquetViewer
|
|||
this.rememberMyChoiceCheckBox.DisabledForeColor = this._disabledTextColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ namespace ParquetViewer.Helpers
|
|||
public static class Constants
|
||||
{
|
||||
public const string WikiURL = "https://github.com/mukunku/ParquetViewer/wiki";
|
||||
public const string DuckDBSqlSyntaxURL = "https://duckdb.org/docs/stable/sql/query_syntax/select";
|
||||
}
|
||||
|
||||
public static class User
|
||||
|
|
@ -175,4 +176,4 @@ namespace ParquetViewer.Helpers
|
|||
PARQUET,
|
||||
XLSX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,10 +28,10 @@ namespace ParquetViewer.Helpers
|
|||
{ typeof(string), "NVARCHAR({0}) {1}NULL" },
|
||||
{ typeof(TimeSpan), "INT {1}NULL" },
|
||||
{ typeof(byte[]), "VARBINARY {1}NULL" },
|
||||
{ typeof(ListValue), "sql_variant {1}NULL /*LIST*/" },
|
||||
{ typeof(MapValue), "sql_variant {1}NULL /*MAP*/" },
|
||||
{ typeof(StructValue), "sql_variant {1}NULL /*STRUCT*/" },
|
||||
{ typeof(ByteArrayValue), "VARBINARY({0}) {1}NULL" },
|
||||
{ typeof(IListValue), "sql_variant {1}NULL /*LIST*/" },
|
||||
{ typeof(IMapValue), "sql_variant {1}NULL /*MAP*/" },
|
||||
{ typeof(IStructValue), "sql_variant {1}NULL /*STRUCT*/" },
|
||||
{ typeof(IByteArrayValue), "VARBINARY({0}) {1}NULL" },
|
||||
};
|
||||
|
||||
public string? TablePrefix { get; set; }
|
||||
|
|
@ -82,9 +82,19 @@ namespace ParquetViewer.Helpers
|
|||
|
||||
public static string GetTypeFor(DataColumn column)
|
||||
{
|
||||
var item = TypeMap[column.DataType] as string
|
||||
?? throw new NotSupportedException($"No type mapping is provided for {column.DataType.Name}");
|
||||
bool useMaxKeyword = column.DataType == typeof(string) || column.DataType == typeof(ByteArrayValue);
|
||||
Type columnType = column.DataType;
|
||||
if (columnType.ImplementsInterface<IListValue>())
|
||||
columnType = typeof(IListValue);
|
||||
else if (columnType.ImplementsInterface<IMapValue>())
|
||||
columnType = typeof(IMapValue);
|
||||
else if (columnType.ImplementsInterface<IStructValue>())
|
||||
columnType = typeof(IStructValue);
|
||||
else if (columnType.ImplementsInterface<IByteArrayValue>())
|
||||
columnType = typeof(IByteArrayValue);
|
||||
|
||||
var item = TypeMap[columnType] as string
|
||||
?? throw new NotSupportedException(string.Format("No type mapping is provided for {0}", column.DataType.Name));
|
||||
bool useMaxKeyword = column.DataType == typeof(string) || column.DataType.ImplementsInterface<IByteArrayValue>();
|
||||
return string.Format(item, useMaxKeyword ? "MAX" : column.MaxLength.ToString(), column.AllowDBNull ? string.Empty : "NOT ");
|
||||
}
|
||||
|
||||
|
|
@ -166,4 +176,4 @@ namespace ParquetViewer.Helpers
|
|||
return stringBuilder.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -109,4 +109,4 @@ namespace ParquetViewer.Helpers
|
|||
writer.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
using Microsoft.Win32;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
|
@ -9,7 +10,6 @@ using System.Drawing;
|
|||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Helpers
|
||||
|
|
@ -17,7 +17,9 @@ namespace ParquetViewer.Helpers
|
|||
public static class ExtensionMethods
|
||||
{
|
||||
private const string DefaultDateTimeFormat = "g";
|
||||
private const string DefaultDateOnlyFormat = "d";
|
||||
public const string ISO8601DateTimeFormat = "yyyy-MM-ddTHH:mm:ss.FFFFFFF";
|
||||
public const string ISO8601DateOnlyFormat = "yyyy-MM-dd";
|
||||
|
||||
/// <summary>
|
||||
/// Returns a list of all column names within a given datatable
|
||||
|
|
@ -47,6 +49,15 @@ namespace ParquetViewer.Helpers
|
|||
_ => string.Empty
|
||||
};
|
||||
|
||||
public static string GetDateOnlyFormat(this DateFormat dateFormat) => dateFormat switch
|
||||
{
|
||||
DateFormat.ISO8601 => ISO8601DateOnlyFormat,
|
||||
DateFormat.Default => DefaultDateOnlyFormat,
|
||||
DateFormat.Custom => AppSettings.CustomDateFormat is not null ?
|
||||
UtilityMethods.StripTimeComponentsFromDateFormat(AppSettings.CustomDateFormat) : DefaultDateOnlyFormat,
|
||||
_ => string.Empty
|
||||
};
|
||||
|
||||
public static string GetExtension(this FileType fileType)
|
||||
=> Enum.IsDefined(fileType)
|
||||
? $".{fileType.ToString().ToLowerInvariant()}"
|
||||
|
|
@ -56,7 +67,7 @@ namespace ParquetViewer.Helpers
|
|||
|
||||
public static Size RenderedSize(this PictureBox pictureBox)
|
||||
{
|
||||
var wfactor = (double)pictureBox.Image.Width / pictureBox.ClientSize.Width;
|
||||
var wfactor = (double)pictureBox.Image!.Width / pictureBox.ClientSize.Width;
|
||||
var hfactor = (double)pictureBox.Image.Height / pictureBox.ClientSize.Height;
|
||||
|
||||
var resizeFactor = Math.Max(wfactor, hfactor);
|
||||
|
|
@ -86,12 +97,6 @@ namespace ParquetViewer.Helpers
|
|||
public static bool IsSimple(this Type type)
|
||||
=> TypeDescriptor.GetConverter(type).CanConvertFrom(typeof(string));
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the type is a number type.
|
||||
/// </summary>
|
||||
public static bool IsNumber(this Type type) =>
|
||||
System.Array.Exists(type.GetInterfaces(), i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(INumber<>));
|
||||
|
||||
public static T ToEnum<T>(this int value, T @default) where T : struct, Enum
|
||||
{
|
||||
if (Enum.IsDefined(typeof(T), value))
|
||||
|
|
@ -109,54 +114,6 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
|
||||
public static Array GetColumnValues(this DataTable dataTable, Type type, string columnName, int skipCount, int fetchCount)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(dataTable);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(skipCount, 0);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fetchCount, 0);
|
||||
|
||||
if (!dataTable.Columns.Contains(columnName))
|
||||
throw new ArgumentException($"Column `{columnName}` does not exist in the datatable");
|
||||
|
||||
var recordCountAfterSkip = dataTable.Rows.Count - skipCount;
|
||||
var recordCountToRead = fetchCount > recordCountAfterSkip ? recordCountAfterSkip : fetchCount;
|
||||
var values = Array.CreateInstance(type, recordCountToRead);
|
||||
var index = 0;
|
||||
foreach(DataRow row in dataTable.Rows)
|
||||
{
|
||||
if (skipCount-- > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = row[columnName];
|
||||
if (value == DBNull.Value)
|
||||
value = null;
|
||||
else if (value is ByteArrayValue byteArray)
|
||||
value = byteArray.Data;
|
||||
else if (value is ListValue || value is MapValue || value is StructValue)
|
||||
throw new NotSupportedException("List, Map, and Struct types are currently not supported.");
|
||||
|
||||
values.SetValue(value, index++);
|
||||
|
||||
if (--fetchCount <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
public static Type GetNullableVersion(this Type sourceType) => sourceType == null
|
||||
? throw new ArgumentNullException(nameof(sourceType))
|
||||
: !sourceType.IsValueType
|
||||
|| (sourceType.IsGenericType
|
||||
&& sourceType.GetGenericTypeDefinition() == typeof(Nullable<>))
|
||||
? sourceType
|
||||
: typeof(Nullable<>).MakeGenericType(sourceType);
|
||||
|
||||
/// <summary>
|
||||
/// Converts a float to a string without using the scientific notation, if possible
|
||||
/// </summary>
|
||||
|
|
@ -232,8 +189,8 @@ namespace ParquetViewer.Helpers
|
|||
return enumerable;
|
||||
}
|
||||
|
||||
/// <remarks>Can't put this into ByteArrayValue itself as that assembly doesn't reference System.Drawing</remarks>
|
||||
public static bool ToImage(this ByteArrayValue byteArrayValue, out Image? image)
|
||||
/// <remarks>Can't put this into IByteArrayValue itself as that assembly doesn't reference System.Drawing</remarks>
|
||||
public static bool ToImage(this IByteArrayValue byteArrayValue, [NotNullWhen(true)] out Image? image)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(byteArrayValue);
|
||||
|
||||
|
|
@ -259,7 +216,33 @@ namespace ParquetViewer.Helpers
|
|||
catch { /*swallow*/ }
|
||||
}
|
||||
|
||||
public static string Format(this string formatString, params object?[] args)
|
||||
public static bool ImplementsInterface<T>(this Type? type)
|
||||
{
|
||||
if (type is null)
|
||||
return false;
|
||||
else
|
||||
return typeof(T).IsAssignableFrom(type);
|
||||
}
|
||||
|
||||
public static string Format(this string formatString, params object?[] args)
|
||||
=> string.Format(formatString, args);
|
||||
|
||||
/// <summary>
|
||||
/// https://huggingface.co/docs/hub/en/datasets-image#parquet-format
|
||||
/// </summary>
|
||||
/// <returns>True if this is a struct with "bytes" and "path" fields</returns>
|
||||
public static bool IsHuggingFaceFormat(this IStructValue structValue, [NotNullWhen(true)] out byte[]? data)
|
||||
{
|
||||
if (structValue.Data.ColumnNames.Count == 2
|
||||
&& structValue.Data.ColumnNames.Contains("bytes")
|
||||
&& structValue.Data.ColumnNames.Contains("path")
|
||||
&& structValue.Data.GetValue("bytes") is ByteArrayValue byteArrayValue)
|
||||
{
|
||||
data = byteArrayValue.Data;
|
||||
return true;
|
||||
}
|
||||
data = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
using Apache.Arrow.Ipc;
|
||||
using Parquet.Meta;
|
||||
using ParquetViewer.Engine;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
|
||||
|
|
@ -25,156 +26,65 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
|
||||
public static string ThriftMetadataToJSON(Engine.ParquetEngine parquetEngine, long recordCount, int fieldCount)
|
||||
public static string ThriftMetadataToJSON(IParquetEngine parquetEngine, long recordCount, int fieldCount)
|
||||
{
|
||||
try
|
||||
{
|
||||
object ProcessSchemaTree(Engine.ParquetSchemaElement parquetSchemaElement)
|
||||
{
|
||||
return new
|
||||
{
|
||||
parquetSchemaElement.Path,
|
||||
Type = parquetSchemaElement.SchemaElement.Type.ToString(),
|
||||
parquetSchemaElement.SchemaElement.TypeLength,
|
||||
LogicalType = LogicalTypeToJSONObject(parquetSchemaElement.SchemaElement.LogicalType),
|
||||
RepetitionType = parquetSchemaElement.SchemaElement.RepetitionType.ToString(),
|
||||
ConvertedType = parquetSchemaElement.SchemaElement.ConvertedType.ToString(),
|
||||
Children = parquetSchemaElement.Children.Select(pse => ProcessSchemaTree(pse)).ToArray()
|
||||
};
|
||||
}
|
||||
|
||||
var jsonObject = new
|
||||
{
|
||||
parquetEngine.ThriftMetadata.Version,
|
||||
parquetEngine.Metadata.ParquetVersion,
|
||||
NumRows = recordCount,
|
||||
NumRowGroups = parquetEngine.ThriftMetadata.RowGroups?.Count ?? -1, //What about partitioned files?
|
||||
NumRowGroups = parquetEngine.Metadata.RowGroupCount, //We assume partitioned files all have the same row group count
|
||||
NumFields = fieldCount,
|
||||
parquetEngine.ThriftMetadata.CreatedBy,
|
||||
Schema = ProcessSchemaTree(parquetEngine.ParquetSchemaTree),
|
||||
RowGroups = (parquetEngine.ThriftMetadata.RowGroups ?? Enumerable.Empty<RowGroup>()).Select(rowGroup => new
|
||||
parquetEngine.Metadata.CreatedBy,
|
||||
Schema = new
|
||||
{
|
||||
parquetEngine.Metadata.SchemaTree.Path,
|
||||
RepetitionType = parquetEngine.Metadata.SchemaTree.RepetitionType?.ToString().ToUpper(),
|
||||
Children = ProcessChildren(parquetEngine.Metadata.SchemaTree)
|
||||
},
|
||||
RowGroups = (parquetEngine.Metadata.RowGroups ?? Enumerable.Empty<IRowGroupMetadata>()).Select(rowGroup => new
|
||||
{
|
||||
rowGroup.Ordinal,
|
||||
rowGroup.NumRows,
|
||||
SortingColumns = (rowGroup.SortingColumns ?? Enumerable.Empty<SortingColumn>()).Select(sortingColumn => new
|
||||
{
|
||||
sortingColumn.ColumnIdx,
|
||||
sortingColumn.Descending,
|
||||
sortingColumn.NullsFirst
|
||||
}).ToArray(),
|
||||
rowGroup.RowCount,
|
||||
rowGroup.SortingColumns,
|
||||
rowGroup.Columns,
|
||||
rowGroup.FileOffset,
|
||||
rowGroup.TotalByteSize,
|
||||
rowGroup.TotalCompressedSize
|
||||
}).ToArray()
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(jsonObject, new JsonSerializerOptions { WriteIndented = true });
|
||||
IEnumerable<object> ProcessChildren(IParquetSchemaElement schemaElement)
|
||||
{
|
||||
foreach (var child in (schemaElement.Children ?? Enumerable.Empty<IParquetSchemaElement>()))
|
||||
{
|
||||
yield return new
|
||||
{
|
||||
child.Path,
|
||||
child.Type,
|
||||
child.TypeLength,
|
||||
child.LogicalType,
|
||||
RepetitionType = child.RepetitionType?.ToString().ToUpper(),
|
||||
child.ConvertedType,
|
||||
child.Scale,
|
||||
child.Precision,
|
||||
child.NumChildren,
|
||||
Children = child.Children.Select(ProcessChildren)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return JsonSerializer.Serialize(jsonObject, new JsonSerializerOptions
|
||||
{
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return $"Something went wrong while processing the schema:{Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
|
||||
static object? LogicalTypeToJSONObject(LogicalType? logicalType)
|
||||
{
|
||||
if (logicalType is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else if (logicalType.STRING is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.STRING) };
|
||||
}
|
||||
else if (logicalType.MAP is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.MAP) };
|
||||
}
|
||||
else if (logicalType.LIST is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.LIST) };
|
||||
}
|
||||
else if (logicalType.ENUM is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.ENUM) };
|
||||
}
|
||||
else if (logicalType.DECIMAL is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.DECIMAL),
|
||||
logicalType.DECIMAL.Scale,
|
||||
logicalType.DECIMAL.Precision
|
||||
};
|
||||
}
|
||||
else if (logicalType.DATE is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.DATE) };
|
||||
}
|
||||
else if (logicalType.TIME is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIME),
|
||||
logicalType.TIME.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIME.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.TIMESTAMP is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIMESTAMP),
|
||||
logicalType.TIMESTAMP.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIMESTAMP.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.INTEGER is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.INTEGER),
|
||||
logicalType.INTEGER.BitWidth,
|
||||
logicalType.INTEGER.IsSigned
|
||||
};
|
||||
}
|
||||
else if (logicalType.JSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.JSON) };
|
||||
}
|
||||
else if (logicalType.BSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.BSON) };
|
||||
}
|
||||
else if (logicalType.UUID is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.UUID) };
|
||||
}
|
||||
else if (logicalType.UNKNOWN is not null)
|
||||
{
|
||||
return new { Name = $"{logicalType.UNKNOWN.GetType().Name}" };
|
||||
}
|
||||
else
|
||||
{
|
||||
return new { Name = nameof(logicalType.UNKNOWN) };
|
||||
}
|
||||
}
|
||||
|
||||
static string TimeUnitToString(TimeUnit? timeUnit)
|
||||
{
|
||||
var timeUnitString = string.Empty;
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MILLIS);
|
||||
}
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MICROS);
|
||||
}
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.NANOS);
|
||||
}
|
||||
return timeUnitString;
|
||||
}
|
||||
}
|
||||
|
||||
public static string TryFormatJSON(string possibleJSON)
|
||||
|
|
@ -191,4 +101,4 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -130,4 +130,4 @@ namespace ParquetViewer.Helpers
|
|||
public override string ToString()
|
||||
=> $"{this.Major}.{this.Minor}.{this.Patch}.{this.Build}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -169,5 +169,11 @@ namespace ParquetViewer.Helpers
|
|||
public override Color MenuItemPressedGradientMiddle => this._theme.SelectionBackColor;
|
||||
public override Color MenuItemPressedGradientEnd => this._theme.SelectionBackColor;
|
||||
#endregion
|
||||
|
||||
#region Dropdown hover colors
|
||||
public override Color ButtonSelectedGradientBegin => this._theme.SelectionBackColor;
|
||||
public override Color ButtonSelectedGradientMiddle => this._theme.SelectionBackColor;
|
||||
public override Color ButtonSelectedGradientEnd => this._theme.SelectionBackColor;
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -98,5 +98,22 @@ namespace ParquetViewer.Helpers
|
|||
//Terminate this instance
|
||||
Application.Exit();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best effort attempt at stripping time components from a date format string.
|
||||
/// </summary>
|
||||
/// <param name="dateFormat">Date format with potential time components</param>
|
||||
/// <returns>Date format with no time components</returns>
|
||||
public static string StripTimeComponentsFromDateFormat(string dateFormat)
|
||||
{
|
||||
var timeComponents = new string[] { "H", "h", "m", "s", "f", "F", "t", "z", "K" };
|
||||
foreach (var component in timeComponents)
|
||||
{
|
||||
dateFormat = dateFormat.Replace(component, string.Empty);
|
||||
}
|
||||
dateFormat = dateFormat.Replace(" ", " ");
|
||||
dateFormat = dateFormat.TrimEnd('/', '-', '.', ' ', ',', '_');
|
||||
return dateFormat.Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -148,4 +148,4 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
100
src/ParquetViewer/MainForm.Designer.cs
generated
100
src/ParquetViewer/MainForm.Designer.cs
generated
|
|
@ -35,7 +35,7 @@ namespace ParquetViewer
|
|||
{
|
||||
components = new System.ComponentModel.Container();
|
||||
System.ComponentModel.ComponentResourceManager resources = new System.ComponentModel.ComponentResourceManager(typeof(MainForm));
|
||||
DataGridViewCellStyle dataGridViewCellStyle1 = new DataGridViewCellStyle();
|
||||
DataGridViewCellStyle dataGridViewCellStyle2 = new DataGridViewCellStyle();
|
||||
mainTableLayoutPanel = new TableLayoutPanel();
|
||||
recordsToLabel = new Label();
|
||||
recordCountTextBox = new DelayedOnChangedTextBox();
|
||||
|
|
@ -68,6 +68,7 @@ namespace ParquetViewer
|
|||
toolsToolStripMenuItem = new ToolStripMenuItem();
|
||||
getSQLCreateTableScriptToolStripMenuItem = new ToolStripMenuItem();
|
||||
metadataViewerToolStripMenuItem = new ToolStripMenuItem();
|
||||
openQueryEditorToolToolStripMenuItem = new ToolStripMenuItem();
|
||||
helpToolStripMenuItem = new ToolStripMenuItem();
|
||||
userGuideToolStripMenuItem = new ToolStripMenuItem();
|
||||
shareAnonymousUsageDataToolStripMenuItem = new ToolStripMenuItem();
|
||||
|
|
@ -107,17 +108,20 @@ namespace ParquetViewer
|
|||
mainTableLayoutPanel.Controls.Add(mainGridView, 0, 1);
|
||||
mainTableLayoutPanel.Controls.Add(loadAllRowsButton, 10, 0);
|
||||
mainTableLayoutPanel.Name = "mainTableLayoutPanel";
|
||||
loadAllRowsButtonTooltip.SetToolTip(mainTableLayoutPanel, resources.GetString("mainTableLayoutPanel.ToolTip"));
|
||||
//
|
||||
// recordsToLabel
|
||||
//
|
||||
resources.ApplyResources(recordsToLabel, "recordsToLabel");
|
||||
recordsToLabel.Name = "recordsToLabel";
|
||||
loadAllRowsButtonTooltip.SetToolTip(recordsToLabel, resources.GetString("recordsToLabel.ToolTip"));
|
||||
//
|
||||
// recordCountTextBox
|
||||
//
|
||||
resources.ApplyResources(recordCountTextBox, "recordCountTextBox");
|
||||
recordCountTextBox.DelayedTextChangedTimeout = 1000;
|
||||
recordCountTextBox.Name = "recordCountTextBox";
|
||||
loadAllRowsButtonTooltip.SetToolTip(recordCountTextBox, resources.GetString("recordCountTextBox.ToolTip"));
|
||||
recordCountTextBox.DelayedTextChanged += recordsToTextBox_TextChanged;
|
||||
recordCountTextBox.KeyPress += recordsToTextBox_KeyPress;
|
||||
//
|
||||
|
|
@ -125,12 +129,14 @@ namespace ParquetViewer
|
|||
//
|
||||
resources.ApplyResources(showRecordsFromLabel, "showRecordsFromLabel");
|
||||
showRecordsFromLabel.Name = "showRecordsFromLabel";
|
||||
loadAllRowsButtonTooltip.SetToolTip(showRecordsFromLabel, resources.GetString("showRecordsFromLabel.ToolTip"));
|
||||
//
|
||||
// offsetTextBox
|
||||
//
|
||||
resources.ApplyResources(offsetTextBox, "offsetTextBox");
|
||||
offsetTextBox.DelayedTextChangedTimeout = 1000;
|
||||
offsetTextBox.Name = "offsetTextBox";
|
||||
loadAllRowsButtonTooltip.SetToolTip(offsetTextBox, resources.GetString("offsetTextBox.ToolTip"));
|
||||
offsetTextBox.DelayedTextChanged += offsetTextBox_TextChanged;
|
||||
offsetTextBox.KeyPress += offsetTextBox_KeyPress;
|
||||
//
|
||||
|
|
@ -140,6 +146,7 @@ namespace ParquetViewer
|
|||
runQueryButton.ForeColor = System.Drawing.Color.DarkRed;
|
||||
runQueryButton.Image = Resources.Icons.exclamation_icon;
|
||||
runQueryButton.Name = "runQueryButton";
|
||||
loadAllRowsButtonTooltip.SetToolTip(runQueryButton, resources.GetString("runQueryButton.ToolTip"));
|
||||
runQueryButton.UseVisualStyleBackColor = true;
|
||||
runQueryButton.Click += runQueryButton_Click;
|
||||
//
|
||||
|
|
@ -150,6 +157,7 @@ namespace ParquetViewer
|
|||
searchFilterLabel.LinkColor = System.Drawing.Color.Navy;
|
||||
searchFilterLabel.Name = "searchFilterLabel";
|
||||
searchFilterLabel.TabStop = true;
|
||||
loadAllRowsButtonTooltip.SetToolTip(searchFilterLabel, resources.GetString("searchFilterLabel.ToolTip"));
|
||||
searchFilterLabel.LinkClicked += searchFilterLabel_Click;
|
||||
//
|
||||
// searchFilterTextBox
|
||||
|
|
@ -157,6 +165,7 @@ namespace ParquetViewer
|
|||
resources.ApplyResources(searchFilterTextBox, "searchFilterTextBox");
|
||||
mainTableLayoutPanel.SetColumnSpan(searchFilterTextBox, 2);
|
||||
searchFilterTextBox.Name = "searchFilterTextBox";
|
||||
loadAllRowsButtonTooltip.SetToolTip(searchFilterTextBox, resources.GetString("searchFilterTextBox.ToolTip"));
|
||||
searchFilterTextBox.Enter += searchFilterTextBox_Enter;
|
||||
searchFilterTextBox.KeyPress += searchFilterTextBox_KeyPress;
|
||||
searchFilterTextBox.Leave += searchFilterTextBox_Leave;
|
||||
|
|
@ -166,26 +175,28 @@ namespace ParquetViewer
|
|||
resources.ApplyResources(clearFilterButton, "clearFilterButton");
|
||||
clearFilterButton.ForeColor = System.Drawing.Color.Black;
|
||||
clearFilterButton.Name = "clearFilterButton";
|
||||
loadAllRowsButtonTooltip.SetToolTip(clearFilterButton, resources.GetString("clearFilterButton.ToolTip"));
|
||||
clearFilterButton.UseVisualStyleBackColor = true;
|
||||
clearFilterButton.Click += clearFilterButton_Click;
|
||||
//
|
||||
// mainGridView
|
||||
//
|
||||
resources.ApplyResources(mainGridView, "mainGridView");
|
||||
mainGridView.AllowUserToAddRows = false;
|
||||
mainGridView.AllowUserToDeleteRows = false;
|
||||
mainGridView.AllowUserToOrderColumns = true;
|
||||
resources.ApplyResources(mainGridView, "mainGridView");
|
||||
mainGridView.ClipboardCopyMode = DataGridViewClipboardCopyMode.EnableWithoutHeaderText;
|
||||
mainGridView.ColumnHeadersBorderStyle = DataGridViewHeaderBorderStyle.Single;
|
||||
dataGridViewCellStyle1.Alignment = DataGridViewContentAlignment.MiddleLeft;
|
||||
dataGridViewCellStyle1.BackColor = System.Drawing.SystemColors.ControlLight;
|
||||
dataGridViewCellStyle1.Font = new System.Drawing.Font("Segoe UI Semibold", 9F, System.Drawing.FontStyle.Bold);
|
||||
dataGridViewCellStyle1.ForeColor = System.Drawing.SystemColors.WindowText;
|
||||
dataGridViewCellStyle1.SelectionBackColor = System.Drawing.SystemColors.Highlight;
|
||||
dataGridViewCellStyle1.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
|
||||
dataGridViewCellStyle1.WrapMode = DataGridViewTriState.True;
|
||||
mainGridView.ColumnHeadersDefaultCellStyle = dataGridViewCellStyle1;
|
||||
dataGridViewCellStyle2.Alignment = DataGridViewContentAlignment.MiddleLeft;
|
||||
dataGridViewCellStyle2.BackColor = System.Drawing.SystemColors.ControlLight;
|
||||
dataGridViewCellStyle2.Font = new System.Drawing.Font("Segoe UI Semibold", 9F, System.Drawing.FontStyle.Bold);
|
||||
dataGridViewCellStyle2.ForeColor = System.Drawing.SystemColors.WindowText;
|
||||
dataGridViewCellStyle2.SelectionBackColor = System.Drawing.SystemColors.Highlight;
|
||||
dataGridViewCellStyle2.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
|
||||
dataGridViewCellStyle2.WrapMode = DataGridViewTriState.True;
|
||||
mainGridView.ColumnHeadersDefaultCellStyle = dataGridViewCellStyle2;
|
||||
mainGridView.ColumnHeadersHeightSizeMode = DataGridViewColumnHeadersHeightSizeMode.DisableResizing;
|
||||
mainGridView.ColumnNameEscapeFormat = "[{0}]";
|
||||
mainTableLayoutPanel.SetColumnSpan(mainGridView, 11);
|
||||
mainGridView.CopyAsWhereIcon = (System.Drawing.Image)resources.GetObject("mainGridView.CopyAsWhereIcon");
|
||||
mainGridView.CopyToClipboardIcon = (System.Drawing.Image)resources.GetObject("mainGridView.CopyToClipboardIcon");
|
||||
|
|
@ -195,6 +206,7 @@ namespace ParquetViewer
|
|||
mainTableLayoutPanel.SetRowSpan(mainGridView, 2);
|
||||
mainGridView.ShowCellToolTips = false;
|
||||
mainGridView.ShowCopyAsWhereContextMenuItem = true;
|
||||
loadAllRowsButtonTooltip.SetToolTip(mainGridView, resources.GetString("mainGridView.ToolTip"));
|
||||
mainGridView.DataBindingComplete += mainGridView_DataBindingComplete;
|
||||
//
|
||||
// loadAllRowsButton
|
||||
|
|
@ -216,16 +228,17 @@ namespace ParquetViewer
|
|||
//
|
||||
// mainMenuStrip
|
||||
//
|
||||
resources.ApplyResources(mainMenuStrip, "mainMenuStrip");
|
||||
mainMenuStrip.BackColor = System.Drawing.SystemColors.Control;
|
||||
mainMenuStrip.Items.AddRange(new ToolStripItem[] { fileToolStripMenuItem, editToolStripMenuItem, toolsToolStripMenuItem, helpToolStripMenuItem });
|
||||
resources.ApplyResources(mainMenuStrip, "mainMenuStrip");
|
||||
mainMenuStrip.Name = "mainMenuStrip";
|
||||
loadAllRowsButtonTooltip.SetToolTip(mainMenuStrip, resources.GetString("mainMenuStrip.ToolTip"));
|
||||
//
|
||||
// fileToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(fileToolStripMenuItem, "fileToolStripMenuItem");
|
||||
fileToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { newToolStripMenuItem, openToolStripMenuItem, openFolderToolStripMenuItem, toolStripSeparator, saveAsToolStripMenuItem, toolStripSeparator1, exitToolStripMenuItem });
|
||||
fileToolStripMenuItem.Name = "fileToolStripMenuItem";
|
||||
resources.ApplyResources(fileToolStripMenuItem, "fileToolStripMenuItem");
|
||||
//
|
||||
// newToolStripMenuItem
|
||||
//
|
||||
|
|
@ -241,14 +254,14 @@ namespace ParquetViewer
|
|||
//
|
||||
// openFolderToolStripMenuItem
|
||||
//
|
||||
openFolderToolStripMenuItem.Name = "openFolderToolStripMenuItem";
|
||||
resources.ApplyResources(openFolderToolStripMenuItem, "openFolderToolStripMenuItem");
|
||||
openFolderToolStripMenuItem.Name = "openFolderToolStripMenuItem";
|
||||
openFolderToolStripMenuItem.Click += openFolderToolStripMenuItem_Click;
|
||||
//
|
||||
// toolStripSeparator
|
||||
//
|
||||
toolStripSeparator.Name = "toolStripSeparator";
|
||||
resources.ApplyResources(toolStripSeparator, "toolStripSeparator");
|
||||
toolStripSeparator.Name = "toolStripSeparator";
|
||||
//
|
||||
// saveAsToolStripMenuItem
|
||||
//
|
||||
|
|
@ -258,20 +271,20 @@ namespace ParquetViewer
|
|||
//
|
||||
// toolStripSeparator1
|
||||
//
|
||||
toolStripSeparator1.Name = "toolStripSeparator1";
|
||||
resources.ApplyResources(toolStripSeparator1, "toolStripSeparator1");
|
||||
toolStripSeparator1.Name = "toolStripSeparator1";
|
||||
//
|
||||
// exitToolStripMenuItem
|
||||
//
|
||||
exitToolStripMenuItem.Name = "exitToolStripMenuItem";
|
||||
resources.ApplyResources(exitToolStripMenuItem, "exitToolStripMenuItem");
|
||||
exitToolStripMenuItem.Name = "exitToolStripMenuItem";
|
||||
exitToolStripMenuItem.Click += exitToolStripMenuItem_Click;
|
||||
//
|
||||
// editToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(editToolStripMenuItem, "editToolStripMenuItem");
|
||||
editToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { changeFieldsMenuStripButton, changeDateFormatToolStripMenuItem, alwaysLoadAllRecordsToolStripMenuItem, darkModeToolStripMenuItem });
|
||||
editToolStripMenuItem.Name = "editToolStripMenuItem";
|
||||
resources.ApplyResources(editToolStripMenuItem, "editToolStripMenuItem");
|
||||
//
|
||||
// changeFieldsMenuStripButton
|
||||
//
|
||||
|
|
@ -281,50 +294,50 @@ namespace ParquetViewer
|
|||
//
|
||||
// changeDateFormatToolStripMenuItem
|
||||
//
|
||||
changeDateFormatToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { defaultToolStripMenuItem, iSO8601ToolStripMenuItem, customDateFormatToolStripMenuItem });
|
||||
resources.ApplyResources(changeDateFormatToolStripMenuItem, "changeDateFormatToolStripMenuItem");
|
||||
changeDateFormatToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { defaultToolStripMenuItem, iSO8601ToolStripMenuItem, customDateFormatToolStripMenuItem });
|
||||
changeDateFormatToolStripMenuItem.Name = "changeDateFormatToolStripMenuItem";
|
||||
//
|
||||
// defaultToolStripMenuItem
|
||||
//
|
||||
defaultToolStripMenuItem.Name = "defaultToolStripMenuItem";
|
||||
resources.ApplyResources(defaultToolStripMenuItem, "defaultToolStripMenuItem");
|
||||
defaultToolStripMenuItem.Name = "defaultToolStripMenuItem";
|
||||
defaultToolStripMenuItem.Tag = "0";
|
||||
defaultToolStripMenuItem.Click += DateFormatMenuItem_Click;
|
||||
//
|
||||
// iSO8601ToolStripMenuItem
|
||||
//
|
||||
iSO8601ToolStripMenuItem.Name = "iSO8601ToolStripMenuItem";
|
||||
resources.ApplyResources(iSO8601ToolStripMenuItem, "iSO8601ToolStripMenuItem");
|
||||
iSO8601ToolStripMenuItem.Name = "iSO8601ToolStripMenuItem";
|
||||
iSO8601ToolStripMenuItem.Tag = "2";
|
||||
iSO8601ToolStripMenuItem.Click += DateFormatMenuItem_Click;
|
||||
//
|
||||
// customDateFormatToolStripMenuItem
|
||||
//
|
||||
customDateFormatToolStripMenuItem.Name = "customDateFormatToolStripMenuItem";
|
||||
resources.ApplyResources(customDateFormatToolStripMenuItem, "customDateFormatToolStripMenuItem");
|
||||
customDateFormatToolStripMenuItem.Name = "customDateFormatToolStripMenuItem";
|
||||
customDateFormatToolStripMenuItem.Tag = "6";
|
||||
customDateFormatToolStripMenuItem.Click += DateFormatMenuItem_Click;
|
||||
//
|
||||
// alwaysLoadAllRecordsToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(alwaysLoadAllRecordsToolStripMenuItem, "alwaysLoadAllRecordsToolStripMenuItem");
|
||||
alwaysLoadAllRecordsToolStripMenuItem.Checked = true;
|
||||
alwaysLoadAllRecordsToolStripMenuItem.CheckState = CheckState.Checked;
|
||||
alwaysLoadAllRecordsToolStripMenuItem.Name = "alwaysLoadAllRecordsToolStripMenuItem";
|
||||
resources.ApplyResources(alwaysLoadAllRecordsToolStripMenuItem, "alwaysLoadAllRecordsToolStripMenuItem");
|
||||
alwaysLoadAllRecordsToolStripMenuItem.Click += alwaysLoadAllRecordsToolStripMenuItem_Click;
|
||||
//
|
||||
// darkModeToolStripMenuItem
|
||||
//
|
||||
darkModeToolStripMenuItem.Name = "darkModeToolStripMenuItem";
|
||||
resources.ApplyResources(darkModeToolStripMenuItem, "darkModeToolStripMenuItem");
|
||||
darkModeToolStripMenuItem.Name = "darkModeToolStripMenuItem";
|
||||
darkModeToolStripMenuItem.Click += darkModeToolStripMenuItem_Click;
|
||||
//
|
||||
// toolsToolStripMenuItem
|
||||
//
|
||||
toolsToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { getSQLCreateTableScriptToolStripMenuItem, metadataViewerToolStripMenuItem });
|
||||
toolsToolStripMenuItem.Name = "toolsToolStripMenuItem";
|
||||
resources.ApplyResources(toolsToolStripMenuItem, "toolsToolStripMenuItem");
|
||||
toolsToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { getSQLCreateTableScriptToolStripMenuItem, metadataViewerToolStripMenuItem, openQueryEditorToolToolStripMenuItem });
|
||||
toolsToolStripMenuItem.Name = "toolsToolStripMenuItem";
|
||||
//
|
||||
// getSQLCreateTableScriptToolStripMenuItem
|
||||
//
|
||||
|
|
@ -339,57 +352,63 @@ namespace ParquetViewer
|
|||
metadataViewerToolStripMenuItem.Name = "metadataViewerToolStripMenuItem";
|
||||
metadataViewerToolStripMenuItem.Click += MetadataViewerToolStripMenuItem_Click;
|
||||
//
|
||||
// openQueryEditorToolToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(openQueryEditorToolToolStripMenuItem, "openQueryEditorToolToolStripMenuItem");
|
||||
openQueryEditorToolToolStripMenuItem.Name = "openQueryEditorToolToolStripMenuItem";
|
||||
openQueryEditorToolToolStripMenuItem.Click += openQueryEditorToolToolStripMenuItem_Click;
|
||||
//
|
||||
// helpToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(helpToolStripMenuItem, "helpToolStripMenuItem");
|
||||
helpToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { userGuideToolStripMenuItem, shareAnonymousUsageDataToolStripMenuItem, languageToolStripMenuItem, aboutToolStripMenuItem });
|
||||
helpToolStripMenuItem.Name = "helpToolStripMenuItem";
|
||||
resources.ApplyResources(helpToolStripMenuItem, "helpToolStripMenuItem");
|
||||
//
|
||||
// userGuideToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(userGuideToolStripMenuItem, "userGuideToolStripMenuItem");
|
||||
userGuideToolStripMenuItem.Image = Resources.Icons.external_link_icon;
|
||||
userGuideToolStripMenuItem.Name = "userGuideToolStripMenuItem";
|
||||
resources.ApplyResources(userGuideToolStripMenuItem, "userGuideToolStripMenuItem");
|
||||
userGuideToolStripMenuItem.Click += userGuideToolStripMenuItem_Click;
|
||||
//
|
||||
// shareAnonymousUsageDataToolStripMenuItem
|
||||
//
|
||||
shareAnonymousUsageDataToolStripMenuItem.Name = "shareAnonymousUsageDataToolStripMenuItem";
|
||||
resources.ApplyResources(shareAnonymousUsageDataToolStripMenuItem, "shareAnonymousUsageDataToolStripMenuItem");
|
||||
shareAnonymousUsageDataToolStripMenuItem.Name = "shareAnonymousUsageDataToolStripMenuItem";
|
||||
shareAnonymousUsageDataToolStripMenuItem.CheckedChanged += shareAnonymousUsageDataToolStripMenuItem_CheckedChanged;
|
||||
shareAnonymousUsageDataToolStripMenuItem.Click += shareAnonymousUsageDataToolStripMenuItem_Click;
|
||||
//
|
||||
// languageToolStripMenuItem
|
||||
//
|
||||
resources.ApplyResources(languageToolStripMenuItem, "languageToolStripMenuItem");
|
||||
languageToolStripMenuItem.DropDownItems.AddRange(new ToolStripItem[] { englishToolStripMenuItem, turkishToolStripMenuItem });
|
||||
languageToolStripMenuItem.Image = Resources.Icons.localization_icon;
|
||||
languageToolStripMenuItem.Name = "languageToolStripMenuItem";
|
||||
resources.ApplyResources(languageToolStripMenuItem, "languageToolStripMenuItem");
|
||||
//
|
||||
// englishToolStripMenuItem
|
||||
//
|
||||
englishToolStripMenuItem.Name = "englishToolStripMenuItem";
|
||||
resources.ApplyResources(englishToolStripMenuItem, "englishToolStripMenuItem");
|
||||
englishToolStripMenuItem.Name = "englishToolStripMenuItem";
|
||||
englishToolStripMenuItem.Tag = "en-US";
|
||||
englishToolStripMenuItem.Click += languageToolStripMenuItem_Click;
|
||||
//
|
||||
// turkishToolStripMenuItem
|
||||
//
|
||||
turkishToolStripMenuItem.Name = "turkishToolStripMenuItem";
|
||||
resources.ApplyResources(turkishToolStripMenuItem, "turkishToolStripMenuItem");
|
||||
turkishToolStripMenuItem.Name = "turkishToolStripMenuItem";
|
||||
turkishToolStripMenuItem.Tag = "tr-TR";
|
||||
turkishToolStripMenuItem.Click += languageToolStripMenuItem_Click;
|
||||
//
|
||||
// aboutToolStripMenuItem
|
||||
//
|
||||
aboutToolStripMenuItem.Name = "aboutToolStripMenuItem";
|
||||
resources.ApplyResources(aboutToolStripMenuItem, "aboutToolStripMenuItem");
|
||||
aboutToolStripMenuItem.Name = "aboutToolStripMenuItem";
|
||||
aboutToolStripMenuItem.Click += aboutToolStripMenuItem_Click;
|
||||
//
|
||||
// showingRecordCountStatusBarLabel
|
||||
//
|
||||
showingRecordCountStatusBarLabel.Name = "showingRecordCountStatusBarLabel";
|
||||
resources.ApplyResources(showingRecordCountStatusBarLabel, "showingRecordCountStatusBarLabel");
|
||||
showingRecordCountStatusBarLabel.Name = "showingRecordCountStatusBarLabel";
|
||||
//
|
||||
// actualShownRecordCountLabel
|
||||
//
|
||||
|
|
@ -398,19 +417,19 @@ namespace ParquetViewer
|
|||
//
|
||||
// recordsTextStatusBarLabel
|
||||
//
|
||||
recordsTextStatusBarLabel.Name = "recordsTextStatusBarLabel";
|
||||
resources.ApplyResources(recordsTextStatusBarLabel, "recordsTextStatusBarLabel");
|
||||
recordsTextStatusBarLabel.Name = "recordsTextStatusBarLabel";
|
||||
//
|
||||
// springStatusBarLabel
|
||||
//
|
||||
springStatusBarLabel.Name = "springStatusBarLabel";
|
||||
resources.ApplyResources(springStatusBarLabel, "springStatusBarLabel");
|
||||
springStatusBarLabel.Name = "springStatusBarLabel";
|
||||
springStatusBarLabel.Spring = true;
|
||||
//
|
||||
// showingStatusBarLabel
|
||||
//
|
||||
showingStatusBarLabel.Name = "showingStatusBarLabel";
|
||||
resources.ApplyResources(showingStatusBarLabel, "showingStatusBarLabel");
|
||||
showingStatusBarLabel.Name = "showingStatusBarLabel";
|
||||
showingStatusBarLabel.Click += showingStatusBarLabel_Click;
|
||||
//
|
||||
// recordCountStatusBarLabel
|
||||
|
|
@ -420,8 +439,8 @@ namespace ParquetViewer
|
|||
//
|
||||
// outOfStatusBarLabel
|
||||
//
|
||||
outOfStatusBarLabel.Name = "outOfStatusBarLabel";
|
||||
resources.ApplyResources(outOfStatusBarLabel, "outOfStatusBarLabel");
|
||||
outOfStatusBarLabel.Name = "outOfStatusBarLabel";
|
||||
//
|
||||
// totalRowCountStatusBarLabel
|
||||
//
|
||||
|
|
@ -430,10 +449,11 @@ namespace ParquetViewer
|
|||
//
|
||||
// mainStatusStrip
|
||||
//
|
||||
mainStatusStrip.Items.AddRange(new ToolStripItem[] { showingRecordCountStatusBarLabel, actualShownRecordCountLabel, recordsTextStatusBarLabel, springStatusBarLabel, showingStatusBarLabel, recordCountStatusBarLabel, outOfStatusBarLabel, totalRowCountStatusBarLabel });
|
||||
resources.ApplyResources(mainStatusStrip, "mainStatusStrip");
|
||||
mainStatusStrip.Items.AddRange(new ToolStripItem[] { showingRecordCountStatusBarLabel, actualShownRecordCountLabel, recordsTextStatusBarLabel, springStatusBarLabel, showingStatusBarLabel, recordCountStatusBarLabel, outOfStatusBarLabel, totalRowCountStatusBarLabel });
|
||||
mainStatusStrip.Name = "mainStatusStrip";
|
||||
mainStatusStrip.ShowItemToolTips = true;
|
||||
loadAllRowsButtonTooltip.SetToolTip(mainStatusStrip, resources.GetString("mainStatusStrip.ToolTip"));
|
||||
//
|
||||
// exportFileDialog
|
||||
//
|
||||
|
|
@ -448,8 +468,8 @@ namespace ParquetViewer
|
|||
//
|
||||
// MainForm
|
||||
//
|
||||
AllowDrop = true;
|
||||
resources.ApplyResources(this, "$this");
|
||||
AllowDrop = true;
|
||||
AutoScaleMode = AutoScaleMode.Font;
|
||||
Controls.Add(mainStatusStrip);
|
||||
Controls.Add(mainTableLayoutPanel);
|
||||
|
|
@ -458,6 +478,7 @@ namespace ParquetViewer
|
|||
KeyPreview = true;
|
||||
MainMenuStrip = mainMenuStrip;
|
||||
Name = "MainForm";
|
||||
loadAllRowsButtonTooltip.SetToolTip(this, resources.GetString("$this.ToolTip"));
|
||||
Load += MainForm_Load;
|
||||
DragDrop += MainForm_DragDrop;
|
||||
DragEnter += MainForm_DragEnter;
|
||||
|
|
@ -527,6 +548,7 @@ namespace ParquetViewer
|
|||
private ToolStripMenuItem languageToolStripMenuItem;
|
||||
private ToolStripMenuItem englishToolStripMenuItem;
|
||||
private ToolStripMenuItem turkishToolStripMenuItem;
|
||||
private ToolStripMenuItem openQueryEditorToolToolStripMenuItem;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ using ParquetViewer.Engine.Types;
|
|||
using ParquetViewer.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using System.Drawing;
|
||||
using System.Globalization;
|
||||
|
|
@ -45,7 +46,7 @@ namespace ParquetViewer
|
|||
private void recordsToTextBox_TextChanged(object sender, EventArgs? e)
|
||||
{
|
||||
var textbox = (TextBox)sender;
|
||||
if (int.TryParse(textbox.Text, out var recordCount))
|
||||
if (int.TryParse(textbox.Text, out var recordCount) && recordCount >= 0)
|
||||
this.CurrentMaxRowCount = recordCount;
|
||||
else
|
||||
textbox.Text = this.CurrentMaxRowCount.ToString();
|
||||
|
|
@ -173,8 +174,8 @@ namespace ParquetViewer
|
|||
|
||||
//Treat list, map, and struct types as strings by casting them automatically
|
||||
foreach (var complexField in this.mainGridView.Columns.OfType<DataGridViewColumn>()
|
||||
.Where(c => c.ValueType == typeof(ListValue) || c.ValueType == typeof(MapValue)
|
||||
|| c.ValueType == typeof(StructValue) || c.ValueType == typeof(ByteArrayValue))
|
||||
.Where(c => c.ValueType.ImplementsInterface<IListValue>() || c.ValueType.ImplementsInterface<IMapValue>()
|
||||
|| c.ValueType.ImplementsInterface<IStructValue>() || c.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
.Select(c => c.Name))
|
||||
{
|
||||
//This isn't perfect but it should handle most cases
|
||||
|
|
@ -287,5 +288,34 @@ namespace ParquetViewer
|
|||
AppSettings.UserSelectedCulture = newCultureInfo;
|
||||
UtilityMethods.RestartApplication();
|
||||
}
|
||||
|
||||
private QueryEditor? _openQueryEditor = null;
|
||||
private string? _queryEditorSavedQueryText = null;
|
||||
private void openQueryEditorToolToolStripMenuItem_Click(object sender, EventArgs e)
|
||||
{
|
||||
if (this._openQueryEditor == null || this._openQueryEditor.IsDisposed)
|
||||
{
|
||||
this._openQueryEditor = new QueryEditor(this.SelectedFields, this.OpenFileOrFolderPath, this.CurrentOffset, this.CurrentMaxRowCount);
|
||||
this._openQueryEditor.FormClosed += (s, args) =>
|
||||
{
|
||||
//Remember the user's query in case they accidentally close the window
|
||||
this._queryEditorSavedQueryText = this._openQueryEditor.QueryText;
|
||||
this._openQueryEditor.Dispose();
|
||||
this._openQueryEditor = null;
|
||||
};
|
||||
if (!string.IsNullOrWhiteSpace(this._queryEditorSavedQueryText))
|
||||
{
|
||||
this._openQueryEditor.QueryText = this._queryEditorSavedQueryText;
|
||||
}
|
||||
this._openQueryEditor.StartPosition = FormStartPosition.Manual;
|
||||
this._openQueryEditor.Location = this.Location + new Size(30, 30);
|
||||
this._openQueryEditor.Show(); //don't assign parent so the window can be handled separately by the user
|
||||
MenuBarClickEvent.FireAndForget(MenuBarClickEvent.ActionId.QueryEditor);
|
||||
}
|
||||
else
|
||||
{
|
||||
this._openQueryEditor.BringToFront();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
using MiniExcelLibs;
|
||||
using ParquetViewer.Analytics;
|
||||
using ParquetViewer.Engine;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using ParquetViewer.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
|
|
@ -55,7 +55,8 @@ namespace ParquetViewer
|
|||
this.exportFileDialog.Filter = "CSV file (*.csv)|*.csv|JSON file (*.json)|*.json|Excel '93 file (*.xls)|*.xls|Excel '07 file (*.xlsx)|*.xlsx";
|
||||
this.exportFileDialog.FilterIndex = (int)defaultFileType + 1;
|
||||
|
||||
if (this._openParquetEngine?.ParquetSchemaTree?.Children.All(s => s.FieldType == Engine.ParquetSchemaElement.FieldTypeId.Primitive) == true)
|
||||
if (this._openParquetEngine?.Metadata.SchemaTree?.Children.All(s => s.IsPrimitive) == true
|
||||
&& this._openParquetEngine is not Engine.DuckDB.ParquetEngine)
|
||||
{
|
||||
this.exportFileDialog.Filter += "|Parquet file (*.parquet)|*.parquet";
|
||||
}
|
||||
|
|
@ -67,55 +68,13 @@ namespace ParquetViewer
|
|||
|
||||
var fileExtension = Path.GetExtension(filePath);
|
||||
FileType? selectedFileType = UtilityMethods.ExtensionToFileType(fileExtension);
|
||||
if (selectedFileType is null)
|
||||
throw new ArgumentOutOfRangeException(fileExtension);
|
||||
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
loadingIcon = this.ShowLoadingIcon(Resources.Strings.ExportingDataLabelText, this.MainDataSource.DefaultView.Count * this.MainDataSource.Columns.Count);
|
||||
if (selectedFileType == FileType.CSV)
|
||||
{
|
||||
await WriteDataToCSVFile(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLS)
|
||||
{
|
||||
const int MAX_XLS_COLUMN_COUNT = 256; //.xls format has a hard limit on 256 columns
|
||||
if (this.MainDataSource!.Columns.Count > MAX_XLS_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.TooManyColumnsXlsErrorMessageFormat.Format(MAX_XLS_COLUMN_COUNT, this.MainDataSource.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await WriteDataToExcel93File(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLSX)
|
||||
{
|
||||
const int MAX_XLSX_COLUMN_COUNT = 16384; //.xlsx format has a hard limit on 16384 columns
|
||||
if (this.MainDataSource!.Columns.Count > MAX_XLSX_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.TooManyColumnsXlsxErrorMessageFormat.Format(MAX_XLSX_COLUMN_COUNT, this.MainDataSource.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await WriteDataToExcel2007File(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.JSON)
|
||||
{
|
||||
await WriteDataToJSONFile(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.PARQUET)
|
||||
{
|
||||
await this.WriteDataToParquetFile(filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format(Resources.Errors.UnsupportedExportTypeFormat, fileExtension));
|
||||
}
|
||||
await ExportResultsImpl(this.MainDataSource!, selectedFileType.Value, this._openParquetEngine,
|
||||
filePath, loadingIcon.CancellationToken, loadingIcon, this.OpenFileOrFolderPath);
|
||||
|
||||
if (loadingIcon.CancellationToken.IsCancellationRequested)
|
||||
{
|
||||
|
|
@ -127,15 +86,15 @@ namespace ParquetViewer
|
|||
long fileSizeInBytes = new FileInfo(filePath).Length;
|
||||
|
||||
FileExportEvent.FireAndForget(
|
||||
selectedFileType.Value,
|
||||
selectedFileType.Value,
|
||||
fileSizeInBytes,
|
||||
this.mainGridView.RowCount,
|
||||
this.mainGridView.ColumnCount,
|
||||
this.mainGridView.RowCount,
|
||||
this.mainGridView.ColumnCount,
|
||||
stopWatch.ElapsedMilliseconds);
|
||||
|
||||
MessageBox.Show(this,
|
||||
Resources.Strings.ExportSuccessfulMessageFormat.Format(Math.Round((fileSizeInBytes / 1024.0) / 1024.0, 2)),
|
||||
Resources.Strings.ExportSuccessfulTitle,
|
||||
Resources.Strings.ExportSuccessfulTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
}
|
||||
}
|
||||
|
|
@ -149,10 +108,10 @@ namespace ParquetViewer
|
|||
catch (XlsCellLengthException ex)
|
||||
{
|
||||
CleanupFile(filePath);
|
||||
|
||||
|
||||
if (MessageBox.Show(this,
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageFormat.Format(ex.MaxLength, ex.FileType.GetExtension(), FileType.XLSX.GetExtension()),
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageTitle,
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageTitle,
|
||||
MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation) == DialogResult.OK)
|
||||
{
|
||||
rerunType = FileType.XLSX;
|
||||
|
|
@ -184,10 +143,66 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
private async Task WriteDataToExcel2007File(DataTable mainDataSource, string path, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
|
||||
private static Task ExportResultsImpl(DataTable dataTable, FileType selectedFileType, IParquetEngine? engine,
|
||||
string filePath, CancellationToken cancellationToken, IProgress<int> progress, string? sourceFileOrFolderPath)
|
||||
{
|
||||
if (selectedFileType == FileType.CSV)
|
||||
{
|
||||
return WriteDataToCSVFile(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLS)
|
||||
{
|
||||
const int MAX_XLS_COLUMN_COUNT = 256; //.xls format has a hard limit on 256 columns
|
||||
if (dataTable.Columns.Count > MAX_XLS_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(
|
||||
Resources.Errors.TooManyColumnsXlsErrorMessageFormat.Format(MAX_XLS_COLUMN_COUNT, dataTable.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
return WriteDataToExcel93File(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLSX)
|
||||
{
|
||||
const int MAX_XLSX_COLUMN_COUNT = 16384; //.xlsx format has a hard limit on 16384 columns
|
||||
if (dataTable.Columns.Count > MAX_XLSX_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(
|
||||
Resources.Errors.TooManyColumnsXlsxErrorMessageFormat.Format(MAX_XLSX_COLUMN_COUNT, dataTable.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
var sheetName = Path.GetFileNameWithoutExtension(sourceFileOrFolderPath) ?? "Sheet1";
|
||||
return WriteDataToExcel2007File(dataTable, filePath, sheetName, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.JSON)
|
||||
{
|
||||
return WriteDataToJSONFile(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.PARQUET)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(engine);
|
||||
var engineTypeName = engine is Engine.ParquetNET.ParquetEngine ? "ParquetNET" :
|
||||
engine is Engine.DuckDB.ParquetEngine ? "DuckDB" :
|
||||
"Unknown";
|
||||
return WriteDataToParquetFile(engine, dataTable, filePath, cancellationToken, progress, engineTypeName);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format(Resources.Errors.UnsupportedExportTypeFormat, selectedFileType.ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task WriteDataToExcel2007File(DataTable mainDataSource, string path, string sheetName, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
{
|
||||
const int MAX_XLSX_SHEET_NAME_LENGTH = 31;
|
||||
var sheetName = Path.GetFileNameWithoutExtension(this.OpenFileOrFolderPath) ?? "Sheet1";
|
||||
|
||||
//sanitize sheet name
|
||||
sheetName = Regex.Replace(sheetName, "[^a-zA-Z0-9 _\\-()]", string.Empty).Left(MAX_XLSX_SHEET_NAME_LENGTH);
|
||||
|
|
@ -223,6 +238,7 @@ namespace ParquetViewer
|
|||
writer.WriteLine(rowBuilder.ToString());
|
||||
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
foreach (DataRowView row in dataTable.DefaultView)
|
||||
{
|
||||
rowBuilder.Clear();
|
||||
|
|
@ -248,6 +264,10 @@ namespace ParquetViewer
|
|||
{
|
||||
rowBuilder.Append(UtilityMethods.CleanCSVValue(dt.ToString(dateFormat)));
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
rowBuilder.Append(UtilityMethods.CleanCSVValue(dateOnly.ToString(dateOnlyFormat)));
|
||||
}
|
||||
else
|
||||
{
|
||||
var stringValue = value!.ToString()!; //we never have `null` only `DBNull.Value`
|
||||
|
|
@ -265,6 +285,7 @@ namespace ParquetViewer
|
|||
=> Task.Run(() =>
|
||||
{
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
var excelWriter = new ExcelWriter(fs);
|
||||
excelWriter.BeginWrite();
|
||||
|
|
@ -302,6 +323,10 @@ namespace ParquetViewer
|
|||
{
|
||||
excelWriter.WriteCell(i + 1, j, dt.ToString(dateFormat));
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
excelWriter.WriteCell(i + 1, j, dateOnly.ToString(dateOnlyFormat));
|
||||
}
|
||||
else
|
||||
{
|
||||
var stringValue = value.ToString();
|
||||
|
|
@ -353,7 +378,7 @@ namespace ParquetViewer
|
|||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object? value = row.Row.ItemArray[i];
|
||||
StructValue.WriteValue(jsonWriter, value!, false);
|
||||
Engine.Helpers.WriteValue(jsonWriter, value!, false);
|
||||
progress.Report(1);
|
||||
}
|
||||
jsonWriter.WriteEndObject();
|
||||
|
|
@ -361,22 +386,11 @@ namespace ParquetViewer
|
|||
jsonWriter.WriteEndArray();
|
||||
}, cancellationToken);
|
||||
|
||||
private Task WriteDataToParquetFile(string path, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
private static Task WriteDataToParquetFile(IParquetEngine engine, DataTable dataTable, string path,
|
||||
CancellationToken cancellationToken, IProgress<int> progress, string engineName)
|
||||
=> Task.Run(async () =>
|
||||
{
|
||||
var fields = new List<Parquet.Schema.Field>(this.MainDataSource!.Columns.Count);
|
||||
foreach (DataColumn column in this.MainDataSource.Columns)
|
||||
{
|
||||
fields.Add(this._openParquetEngine!.Schema!.Fields
|
||||
.Where(field => field.Name.Equals(column.ColumnName, StringComparison.InvariantCulture))
|
||||
.First());
|
||||
}
|
||||
var parquetSchema = new Parquet.Schema.ParquetSchema(fields);
|
||||
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
using var parquetWriter = await Parquet.ParquetWriter.CreateAsync(parquetSchema, fs, cancellationToken: cancellationToken);
|
||||
parquetWriter.CompressionLevel = System.IO.Compression.CompressionLevel.Optimal;
|
||||
parquetWriter.CustomMetadata = new Dictionary<string, string>
|
||||
var customMetadata = new Dictionary<string, string>
|
||||
{
|
||||
{
|
||||
"ParquetViewer", @"
|
||||
|
|
@ -384,38 +398,12 @@ namespace ParquetViewer
|
|||
""CreatedWith"": ""ParquetViewer"",
|
||||
""Version"": """ + Env.AssemblyVersion.ToString() + @""",
|
||||
""Website"": ""https://github.com/mukunku/ParquetViewer"",
|
||||
""CreationDate"": """ + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ") + @"""
|
||||
""CreationDate"": """ + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ") + @""",
|
||||
""Engine"": """ + engineName + @"""
|
||||
}"
|
||||
}
|
||||
};
|
||||
|
||||
const int MAX_ROWS_PER_ROWGROUP = 100_000; //Without batching we sometimes get "OverflowException: Array dimensions exceeded supported range" from Parquet.NET
|
||||
var batchIndex = 0;
|
||||
var isLastBatch = false;
|
||||
while (!isLastBatch)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
using var rowGroup = parquetWriter.CreateRowGroup();
|
||||
foreach (var dataField in parquetSchema.DataFields)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var type = dataField.IsNullable ? dataField.ClrType.GetNullableVersion() : dataField.ClrType;
|
||||
var values = this.MainDataSource.GetColumnValues(type, dataField.Name, batchIndex * MAX_ROWS_PER_ROWGROUP, MAX_ROWS_PER_ROWGROUP);
|
||||
var dataColumn = new Parquet.Data.DataColumn(dataField, values);
|
||||
await rowGroup.WriteColumnAsync(dataColumn, cancellationToken);
|
||||
progress.Report(values.Length); //No way to report progress for each row, so do it by column
|
||||
isLastBatch = values.Length < MAX_ROWS_PER_ROWGROUP;
|
||||
}
|
||||
batchIndex++;
|
||||
}
|
||||
await engine.WriteDataToParquetFileAsync(dataTable, path, cancellationToken, progress, customMetadata);
|
||||
}, cancellationToken);
|
||||
|
||||
private static void HandleAllFilesSkippedException(AllFilesSkippedException ex)
|
||||
|
|
@ -464,10 +452,16 @@ namespace ParquetViewer
|
|||
sb.AppendLine(Resources.Errors.MultipleSchemasDetectedEntriesErrorMessageFormat.Format(schemaIndex++));
|
||||
for (var i = 0; i < topCount; i++)
|
||||
{
|
||||
if (i == schema.Fields.Count)
|
||||
if (i == schema.Count)
|
||||
break;
|
||||
|
||||
sb.AppendLine($" {schema.Fields.ElementAt(i).Name}");
|
||||
sb.AppendLine($" {schema.ElementAt(i)}");
|
||||
}
|
||||
|
||||
if (schemaIndex > maxSchemasLimit)
|
||||
{
|
||||
sb.AppendLine("...");
|
||||
break;
|
||||
}
|
||||
|
||||
if (schemaIndex > maxSchemasLimit)
|
||||
|
|
@ -484,17 +478,19 @@ namespace ParquetViewer
|
|||
ShowError(Resources.Errors.MalformedFieldErrorMessageFormat.Format(ex.Message));
|
||||
}
|
||||
|
||||
private static void HandleDecimalOverflowException(DecimalOverflowException ex)
|
||||
private static void HandleDecimalOverflowException(DecimalOverflowException ex)
|
||||
=> ShowError(
|
||||
Resources.Errors.DecimalValueTooLargeErrorMessageFormat.Format(
|
||||
ex.FieldName,
|
||||
ex.Precision,
|
||||
ex.Scale,
|
||||
DecimalOverflowException.MAX_DECIMAL_PRECISION,
|
||||
DecimalOverflowException.MAX_DECIMAL_SCALE),
|
||||
Resources.Errors.DecimalValueTooLargeErrorTitle);
|
||||
(ex.HasDetailedInfo ? Resources.Errors.DecimalValueTooLargeErrorMessageFormat
|
||||
: Resources.Errors.DecimalValueUnknownSizeTooLargeErrorMessageFormat)
|
||||
.Format(
|
||||
ex.FieldName,
|
||||
ex.Precision,
|
||||
ex.Scale,
|
||||
DecimalOverflowException.MAX_DECIMAL_PRECISION,
|
||||
DecimalOverflowException.MAX_DECIMAL_SCALE),
|
||||
Resources.Errors.DecimalValueTooLargeErrorTitle);
|
||||
|
||||
private static void ShowError(string message, string? title = null)
|
||||
private static void ShowError(string message, string? title = null)
|
||||
=> MessageBox.Show(message, title ?? Resources.Errors.GenericErrorMessage, MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -41,4 +41,4 @@ namespace ParquetViewer
|
|||
this.mainMenuStrip.Renderer = theme.ToolStripRenderer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -231,4 +231,4 @@ namespace ParquetViewer
|
|||
RefreshExperimentalFeatureToolStrips();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
using ParquetViewer.Analytics;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Engine;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
|
@ -49,6 +51,7 @@ namespace ParquetViewer
|
|||
this.mainGridView.ClearQuickPeekForms();
|
||||
this.mainGridView.ClearColumnFormatOverrides();
|
||||
this.ResetGetSQLCreateTableScriptToolStripMenuItemToolTipText();
|
||||
this._queryEditorSavedQueryText = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(this._openFileOrFolderPath))
|
||||
{
|
||||
|
|
@ -138,7 +141,7 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
private Engine.ParquetEngine? _openParquetEngine = null;
|
||||
private IParquetEngine? _openParquetEngine = null;
|
||||
#endregion
|
||||
|
||||
public MainForm()
|
||||
|
|
@ -199,13 +202,13 @@ namespace ParquetViewer
|
|||
{
|
||||
try
|
||||
{
|
||||
this._openParquetEngine = await Engine.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath, default);
|
||||
this._openParquetEngine = await Engine.ParquetNET.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath, default);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (this._openParquetEngine == null)
|
||||
{
|
||||
//cancel file open
|
||||
//cancel the file open
|
||||
this.OpenFileOrFolderPath = null;
|
||||
}
|
||||
|
||||
|
|
@ -217,9 +220,9 @@ namespace ParquetViewer
|
|||
{
|
||||
HandleSomeFilesSkippedException(sfse);
|
||||
}
|
||||
else if (ex is FileReadException fre)
|
||||
else if (ex is Engine.Exceptions.FileReadException fre)
|
||||
{
|
||||
HandleFileReadException(fre);
|
||||
MainForm.HandleFileReadException(fre);
|
||||
}
|
||||
else if (ex is MultipleSchemasFoundException msfe)
|
||||
{
|
||||
|
|
@ -238,10 +241,10 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
Parquet.Schema.ParquetSchema? schema = null;
|
||||
List<string>? fields = null;
|
||||
try
|
||||
{
|
||||
schema = this._openParquetEngine.Schema;
|
||||
fields = this._openParquetEngine.Fields;
|
||||
}
|
||||
catch (ArgumentException ex) when (ex.Message.StartsWith("at least one field is required"))
|
||||
{ /*swallow: This exception is thrown from Parquet.Net when the schema has no fields*/ }
|
||||
|
|
@ -250,12 +253,11 @@ namespace ParquetViewer
|
|||
throw new Parquet.ParquetException(Resources.Errors.ParquetSchemaReadErrorMessage, ex);
|
||||
}
|
||||
|
||||
var fields = schema?.Fields;
|
||||
if (fields?.Count > 0)
|
||||
{
|
||||
if (AppSettings.AlwaysSelectAllFields && !forceOpenDialog)
|
||||
{
|
||||
return fields.Where(FieldsToLoadForm.IsSupportedFieldType).Select(f => f.Name).ToList();
|
||||
return fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -278,6 +280,35 @@ namespace ParquetViewer
|
|||
}
|
||||
|
||||
private async void LoadFileToGridview()
|
||||
{
|
||||
if (this._openParquetEngine is null)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
await this.LoadFileToGridviewImpl(this._openParquetEngine);
|
||||
}
|
||||
catch (Exception unhandledEx)
|
||||
{
|
||||
//Try DuckDB if Parquet.NET fails
|
||||
if (this._openParquetEngine is Engine.DuckDB.ParquetEngine)
|
||||
throw;
|
||||
|
||||
try
|
||||
{
|
||||
var duckDbEngine = await Engine.DuckDB.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath!, default);
|
||||
await LoadFileToGridviewImpl(duckDbEngine);
|
||||
this.SwapEngines(duckDbEngine);
|
||||
}
|
||||
catch (Exception duckDbEx)
|
||||
{
|
||||
//If DuckDB fails too, bail
|
||||
throw new RowsReadException(unhandledEx, duckDbEx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task LoadFileToGridviewImpl(IParquetEngine engine)
|
||||
{
|
||||
var stopwatch = Stopwatch.StartNew(); var loadTime = TimeSpan.Zero; var indexTime = TimeSpan.Zero;
|
||||
LoadingIcon? loadingIcon = null;
|
||||
|
|
@ -296,12 +327,12 @@ namespace ParquetViewer
|
|||
return;
|
||||
}
|
||||
|
||||
long cellCount = this.SelectedFields.Count * Math.Min(this.CurrentMaxRowCount, this._openParquetEngine!.RecordCount - this.CurrentOffset);
|
||||
long cellCount = this.SelectedFields.Count * Math.Min(this.CurrentMaxRowCount, engine.RecordCount - this.CurrentOffset);
|
||||
loadingIcon = this.ShowLoadingIcon(Resources.Strings.LoadingDataLabelText, cellCount);
|
||||
|
||||
var intermediateResult = await Task.Run(async () =>
|
||||
{
|
||||
return await this._openParquetEngine.ReadRowsAsync(this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, loadingIcon.CancellationToken, loadingIcon);
|
||||
return await engine.ReadRowsAsync(this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, loadingIcon.CancellationToken, loadingIcon);
|
||||
}, loadingIcon.CancellationToken);
|
||||
|
||||
loadTime = stopwatch.Elapsed;
|
||||
|
|
@ -318,7 +349,7 @@ namespace ParquetViewer
|
|||
indexTime = stopwatch.Elapsed - loadTime;
|
||||
|
||||
this.recordCountStatusBarLabel.Text = string.Format(Resources.Strings.LoadedRecordCountRangeFormat, this.CurrentOffset, this.CurrentOffset + finalResult.Rows.Count);
|
||||
this.totalRowCountStatusBarLabel.Text = finalResult.ExtendedProperties[Engine.ParquetEngine.TotalRecordCountExtendedPropertyKey]!.ToString();
|
||||
this.totalRowCountStatusBarLabel.Text = engine.RecordCount.ToString();
|
||||
this.actualShownRecordCountLabel.Text = finalResult.Rows.Count.ToString();
|
||||
|
||||
this.MainDataSource = finalResult;
|
||||
|
|
@ -332,9 +363,9 @@ namespace ParquetViewer
|
|||
{
|
||||
HandleSomeFilesSkippedException(ex);
|
||||
}
|
||||
catch (FileReadException ex)
|
||||
catch (Engine.Exceptions.FileReadException ex)
|
||||
{
|
||||
HandleFileReadException(ex);
|
||||
MainForm.HandleFileReadException(ex);
|
||||
}
|
||||
catch (MultipleSchemasFoundException ex)
|
||||
{
|
||||
|
|
@ -364,18 +395,23 @@ namespace ParquetViewer
|
|||
this.showingStatusBarLabel.ToolTipText = $"Total time: {totalTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Load time: {loadTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Index time: {indexTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Render time: {renderTime:mm\\:ss\\.ff}" + Environment.NewLine;
|
||||
$" Render time: {renderTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$"Engine: {(engine is Engine.ParquetNET.ParquetEngine ? "ParquetNET" : "DuckDB")}";
|
||||
|
||||
loadingIcon?.Dispose();
|
||||
|
||||
if (wasSuccessful)
|
||||
{
|
||||
var engineType = this._openParquetEngine is Engine.DuckDB.ParquetEngine
|
||||
? FileOpenEvent.ParquetEngineTypeId.DuckDB
|
||||
: FileOpenEvent.ParquetEngineTypeId.ParquetNET;
|
||||
|
||||
FileOpenEvent.FireAndForget(
|
||||
Directory.Exists(this.OpenFileOrFolderPath),
|
||||
this._openParquetEngine!.NumberOfPartitions,
|
||||
this._openParquetEngine.RecordCount,
|
||||
this._openParquetEngine.ThriftMetadata.RowGroups.Count,
|
||||
this._openParquetEngine.Fields.Count,
|
||||
engine.NumberOfPartitions,
|
||||
engine.RecordCount,
|
||||
engine.Metadata.RowGroups.Count,
|
||||
engine.Fields.Count,
|
||||
this.MainDataSource!.Columns.Cast<DataColumn>().Select(column => column.DataType.Name).Distinct().Order().ToArray(),
|
||||
this.CurrentOffset,
|
||||
this.CurrentMaxRowCount,
|
||||
|
|
@ -383,7 +419,8 @@ namespace ParquetViewer
|
|||
(long)totalTime.TotalMilliseconds,
|
||||
(long)loadTime.TotalMilliseconds,
|
||||
(long)indexTime.TotalMilliseconds,
|
||||
(long)renderTime.TotalMilliseconds);
|
||||
(long)renderTime.TotalMilliseconds,
|
||||
engineType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -492,5 +529,11 @@ namespace ParquetViewer
|
|||
this.englishToolStripMenuItem.Checked = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void SwapEngines(IParquetEngine newEngine)
|
||||
{
|
||||
this._openParquetEngine.DisposeSafely();
|
||||
this._openParquetEngine = newEngine;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -120,12 +120,27 @@
|
|||
<data name="recordsToLabel.Text" xml:space="preserve">
|
||||
<value>Kayıt Sayısı:</value>
|
||||
</data>
|
||||
<data name="recordsToLabel.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="recordCountTextBox.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="showRecordsFromLabel.Text" xml:space="preserve">
|
||||
<value>Kayıt Atla:</value>
|
||||
</data>
|
||||
<data name="showRecordsFromLabel.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="offsetTextBox.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="runQueryButton.Text" xml:space="preserve">
|
||||
<value>İşle</value>
|
||||
</data>
|
||||
<data name="runQueryButton.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<assembly alias="System.Drawing" name="System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a" />
|
||||
<data name="searchFilterLabel.Font" type="System.Drawing.Font, System.Drawing">
|
||||
<value>Microsoft Sans Serif, 9pt, style=Underline</value>
|
||||
|
|
@ -139,9 +154,18 @@
|
|||
<data name="searchFilterLabel.Text" xml:space="preserve">
|
||||
<value>Sorgu Filtresi (?):</value>
|
||||
</data>
|
||||
<data name="searchFilterLabel.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="searchFilterTextBox.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="clearFilterButton.Text" xml:space="preserve">
|
||||
<value>Temizle</value>
|
||||
</data>
|
||||
<data name="clearFilterButton.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="mainGridView.CopyAsWhereIcon" type="System.Drawing.Bitmap, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>
|
||||
iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABGdBTUEAALGPC/xhBQAAAAlwSFlzAAAO
|
||||
|
|
@ -179,9 +203,18 @@
|
|||
SletYXyhW54FgKMFd4Y/+A3rSyMPm3Pf6gAAAABJRU5ErkJggg==
|
||||
</value>
|
||||
</data>
|
||||
<data name="mainGridView.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="loadAllRowsButton.ToolTip" xml:space="preserve">
|
||||
<value>Tüm kayıtları yükle (Ctrl+E)</value>
|
||||
</data>
|
||||
<data name="mainTableLayoutPanel.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="mainMenuStrip.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="fileToolStripMenuItem.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>51, 20</value>
|
||||
</data>
|
||||
|
|
@ -381,6 +414,12 @@
|
|||
<data name="metadataViewerToolStripMenuItem.Text" xml:space="preserve">
|
||||
<value>Metadata Önizleyicisi</value>
|
||||
</data>
|
||||
<data name="openQueryEditorToolToolStripMenuItem.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>251, 22</value>
|
||||
</data>
|
||||
<data name="openQueryEditorToolToolStripMenuItem.Text" xml:space="preserve">
|
||||
<value>Sorgu Editörü (Beta)</value>
|
||||
</data>
|
||||
<data name="helpToolStripMenuItem.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>56, 20</value>
|
||||
</data>
|
||||
|
|
@ -441,7 +480,13 @@
|
|||
<data name="outOfStatusBarLabel.Text" xml:space="preserve">
|
||||
<value>Toplam:</value>
|
||||
</data>
|
||||
<data name="mainStatusStrip.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
<data name="$this.Text" xml:space="preserve">
|
||||
<value>Yeni Parquet Dosyası</value>
|
||||
</data>
|
||||
<data name="$this.ToolTip" xml:space="preserve">
|
||||
<value />
|
||||
</data>
|
||||
</root>
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue