From d5781870a8309853a746940a85a1cee8f04f7f94 Mon Sep 17 00:00:00 2001 From: Brandon H Date: Tue, 7 Jan 2020 14:33:35 -0800 Subject: [PATCH] Initial commit. --- .gitattributes | 63 ++++ .gitignore | 356 ++++++++++++++++++ AzureFunctions-BatchedFileValidation.sln | 65 ++++ .../AzureFunctions.v1.Durable.csproj | 32 ++ .../Properties/launchSettings.json | 7 + AzureFunctions.v1.Durable/host.json | 2 + AzureFunctions.v1/AzureFunctions.v1.csproj | 31 ++ AzureFunctions.v1/host.json | 2 + .../AzureFunctions.v2.Durable.csproj | 27 ++ .../DurableContextExtensions.cs | 41 ++ .../FunctionEnsureAllFiles.cs | 69 ++++ .../FunctionValidateFileSet.cs | 31 ++ AzureFunctions.v2.Durable/Orchestrator.cs | 75 ++++ AzureFunctions.v2.Durable/host.json | 2 + .../AzureFunctions.v2.DurableEntities.csproj | 24 ++ .../BatchEntity.cs | 57 +++ .../Orchestrator.cs | 49 +++ AzureFunctions.v2.DurableEntities/README.md | 6 + AzureFunctions.v2.DurableEntities/host.json | 8 + AzureFunctions.v2/AzureFunctions.v2.csproj | 19 + AzureFunctions.v2/CustomerBlobAttributes.cs | 63 ++++ AzureFunctions.v2/Extensions.cs | 48 +++ AzureFunctions.v2/FunctionEnsureAllFiles.cs | 112 ++++++ AzureFunctions.v2/FunctionValidateFileSet.cs | 275 ++++++++++++++ AzureFunctions.v2/Helpers.cs | 286 ++++++++++++++ AzureFunctions.v2/LockTableEntity.cs | 60 +++ AzureFunctions.v2/host.json | 2 + CODE_OF_CONDUCT.md | 9 + LICENSE | 21 ++ LogicApps/DeploymentHelper.cs | 115 ++++++ LogicApps/deploy.ps1 | 107 ++++++ LogicApps/deploy.sh | 122 ++++++ LogicApps/deployer.rb | 71 ++++ LogicApps/parameters.json | 12 + LogicApps/template.json | 227 +++++++++++ README.md | 135 +++++++ SECURITY.md | 41 ++ sampledata/cust1_20171010_1112_type1.csv | 5 + sampledata/cust1_20171010_1112_type10.csv | 8 + sampledata/cust1_20171010_1112_type2.csv | 6 + sampledata/cust1_20171010_1112_type3.csv | 2 + sampledata/cust1_20171010_1112_type4.csv | 3 + sampledata/cust1_20171010_1112_type5.csv | 2 + sampledata/cust1_20171010_1112_type7.csv | 6 + sampledata/cust1_20171010_1112_type8.csv | 3 + sampledata/cust1_20171010_1112_type9.csv | 2 + sampledata/cust1_20171011_1112_type1.csv | 5 + sampledata/cust1_20171011_1112_type10.csv | 8 + sampledata/cust1_20171011_1112_type2.csv | 6 + sampledata/cust1_20171011_1112_type3.csv | 2 + sampledata/cust1_20171011_1112_type4.csv | 3 + sampledata/cust1_20171011_1112_type5.csv | 2 + sampledata/cust1_20171011_1112_type7.csv | 6 + sampledata/cust1_20171011_1112_type8.csv | 3 + sampledata/cust1_20171011_1112_type9.csv | 2 + 55 files changed, 2746 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 AzureFunctions-BatchedFileValidation.sln create mode 100644 AzureFunctions.v1.Durable/AzureFunctions.v1.Durable.csproj create mode 100644 AzureFunctions.v1.Durable/Properties/launchSettings.json create mode 100644 AzureFunctions.v1.Durable/host.json create mode 100644 AzureFunctions.v1/AzureFunctions.v1.csproj create mode 100644 AzureFunctions.v1/host.json create mode 100644 AzureFunctions.v2.Durable/AzureFunctions.v2.Durable.csproj create mode 100644 AzureFunctions.v2.Durable/DurableContextExtensions.cs create mode 100644 AzureFunctions.v2.Durable/FunctionEnsureAllFiles.cs create mode 100644 AzureFunctions.v2.Durable/FunctionValidateFileSet.cs create mode 100644 AzureFunctions.v2.Durable/Orchestrator.cs create mode 100644 AzureFunctions.v2.Durable/host.json create mode 100644 AzureFunctions.v2.DurableEntities/AzureFunctions.v2.DurableEntities.csproj create mode 100644 AzureFunctions.v2.DurableEntities/BatchEntity.cs create mode 100644 AzureFunctions.v2.DurableEntities/Orchestrator.cs create mode 100644 AzureFunctions.v2.DurableEntities/README.md create mode 100644 AzureFunctions.v2.DurableEntities/host.json create mode 100644 AzureFunctions.v2/AzureFunctions.v2.csproj create mode 100644 AzureFunctions.v2/CustomerBlobAttributes.cs create mode 100644 AzureFunctions.v2/Extensions.cs create mode 100644 AzureFunctions.v2/FunctionEnsureAllFiles.cs create mode 100644 AzureFunctions.v2/FunctionValidateFileSet.cs create mode 100644 AzureFunctions.v2/Helpers.cs create mode 100644 AzureFunctions.v2/LockTableEntity.cs create mode 100644 AzureFunctions.v2/host.json create mode 100644 CODE_OF_CONDUCT.md create mode 100644 LICENSE create mode 100644 LogicApps/DeploymentHelper.cs create mode 100644 LogicApps/deploy.ps1 create mode 100644 LogicApps/deploy.sh create mode 100644 LogicApps/deployer.rb create mode 100644 LogicApps/parameters.json create mode 100644 LogicApps/template.json create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 sampledata/cust1_20171010_1112_type1.csv create mode 100644 sampledata/cust1_20171010_1112_type10.csv create mode 100644 sampledata/cust1_20171010_1112_type2.csv create mode 100644 sampledata/cust1_20171010_1112_type3.csv create mode 100644 sampledata/cust1_20171010_1112_type4.csv create mode 100644 sampledata/cust1_20171010_1112_type5.csv create mode 100644 sampledata/cust1_20171010_1112_type7.csv create mode 100644 sampledata/cust1_20171010_1112_type8.csv create mode 100644 sampledata/cust1_20171010_1112_type9.csv create mode 100644 sampledata/cust1_20171011_1112_type1.csv create mode 100644 sampledata/cust1_20171011_1112_type10.csv create mode 100644 sampledata/cust1_20171011_1112_type2.csv create mode 100644 sampledata/cust1_20171011_1112_type3.csv create mode 100644 sampledata/cust1_20171011_1112_type4.csv create mode 100644 sampledata/cust1_20171011_1112_type5.csv create mode 100644 sampledata/cust1_20171011_1112_type7.csv create mode 100644 sampledata/cust1_20171011_1112_type8.csv create mode 100644 sampledata/cust1_20171011_1112_type9.csv diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ff0c42 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,63 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..688e5f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,356 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# Azure Functions localsettings file +local.settings.json + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ diff --git a/AzureFunctions-BatchedFileValidation.sln b/AzureFunctions-BatchedFileValidation.sln new file mode 100644 index 0000000..8d32bc8 --- /dev/null +++ b/AzureFunctions-BatchedFileValidation.sln @@ -0,0 +1,65 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29411.108 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FunctionsV1", "FunctionsV1", "{F8CE7BED-6111-476F-A5AF-2562039E2091}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FunctionsV2", "FunctionsV2", "{CFFE2BAD-8736-42FA-89FE-7774BF32588A}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v1", "AzureFunctions.v1\AzureFunctions.v1.csproj", "{5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v1.Durable", "AzureFunctions.v1.Durable\AzureFunctions.v1.Durable.csproj", "{D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v2", "AzureFunctions.v2\AzureFunctions.v2.csproj", "{6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v2.Durable", "AzureFunctions.v2.Durable\AzureFunctions.v2.Durable.csproj", "{57193360-9F20-40C5-BD96-61E8724BDA1C}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{4708BF93-EC9C-4DE7-BC4D-99D89D81D744}" + ProjectSection(SolutionItems) = preProject + README.md = README.md + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AzureFunctions.v2.DurableEntities", "AzureFunctions.v2.DurableEntities\AzureFunctions.v2.DurableEntities.csproj", "{0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57}.Release|Any CPU.Build.0 = Release|Any CPU + {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A}.Release|Any CPU.Build.0 = Release|Any CPU + {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B}.Release|Any CPU.Build.0 = Release|Any CPU + {57193360-9F20-40C5-BD96-61E8724BDA1C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {57193360-9F20-40C5-BD96-61E8724BDA1C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {57193360-9F20-40C5-BD96-61E8724BDA1C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {57193360-9F20-40C5-BD96-61E8724BDA1C}.Release|Any CPU.Build.0 = Release|Any CPU + {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {5C74E325-7AAE-4263-AC8C-CBC8F7C36B57} = {F8CE7BED-6111-476F-A5AF-2562039E2091} + {D4A24AAF-FEDC-4E00-B01C-28B52E8B7E8A} = {F8CE7BED-6111-476F-A5AF-2562039E2091} + {6755D0BB-65A3-4D19-85F6-507AE9AB1E7B} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} + {57193360-9F20-40C5-BD96-61E8724BDA1C} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} + {0B8C8EB8-7D1C-4509-A0F7-6C9DDEC20201} = {CFFE2BAD-8736-42FA-89FE-7774BF32588A} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {65B203C2-3EEE-4F9C-B4EB-AB8B67A71F67} + EndGlobalSection +EndGlobal diff --git a/AzureFunctions.v1.Durable/AzureFunctions.v1.Durable.csproj b/AzureFunctions.v1.Durable/AzureFunctions.v1.Durable.csproj new file mode 100644 index 0000000..7403331 --- /dev/null +++ b/AzureFunctions.v1.Durable/AzureFunctions.v1.Durable.csproj @@ -0,0 +1,32 @@ + + + net471 + + + + + + + + + + + + + + PreserveNewest + + + + + + + + + + + + PreserveNewest + + + diff --git a/AzureFunctions.v1.Durable/Properties/launchSettings.json b/AzureFunctions.v1.Durable/Properties/launchSettings.json new file mode 100644 index 0000000..509c807 --- /dev/null +++ b/AzureFunctions.v1.Durable/Properties/launchSettings.json @@ -0,0 +1,7 @@ +{ + "profiles": { + "AzureFunctions.Durable.v1": { + "commandName": "Project" + } + } +} \ No newline at end of file diff --git a/AzureFunctions.v1.Durable/host.json b/AzureFunctions.v1.Durable/host.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/AzureFunctions.v1.Durable/host.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/AzureFunctions.v1/AzureFunctions.v1.csproj b/AzureFunctions.v1/AzureFunctions.v1.csproj new file mode 100644 index 0000000..1182991 --- /dev/null +++ b/AzureFunctions.v1/AzureFunctions.v1.csproj @@ -0,0 +1,31 @@ + + + net471 + + + + + + + + + Never + + + + + PreserveNewest + + + + + + + + + + + PreserveNewest + + + diff --git a/AzureFunctions.v1/host.json b/AzureFunctions.v1/host.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/AzureFunctions.v1/host.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/AzureFunctions.v2.Durable/AzureFunctions.v2.Durable.csproj b/AzureFunctions.v2.Durable/AzureFunctions.v2.Durable.csproj new file mode 100644 index 0000000..287cb11 --- /dev/null +++ b/AzureFunctions.v2.Durable/AzureFunctions.v2.Durable.csproj @@ -0,0 +1,27 @@ + + + netstandard2.0 + v2 + + + + + + + + + + PreserveNewest + + + + + + + + + + PreserveNewest + + + \ No newline at end of file diff --git a/AzureFunctions.v2.Durable/DurableContextExtensions.cs b/AzureFunctions.v2.Durable/DurableContextExtensions.cs new file mode 100644 index 0000000..5816183 --- /dev/null +++ b/AzureFunctions.v2.Durable/DurableContextExtensions.cs @@ -0,0 +1,41 @@ +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json.Linq; + +namespace FileValidation +{ + static class DurableContextExtensions + { +#if FUNCTIONS_V1 + public static void Log(this DurableOrchestrationContext context, ILogger log, string messsage, bool onlyIfNotReplaying = true) + { + if (!onlyIfNotReplaying || !context.IsReplaying) + { + log.LogWarning(messsage); + } + } + + public static void Log(this DurableOrchestrationClient _, ILogger log, string messsage) => log.LogWarning(messsage); + + public static JToken GetInputAsJson(this DurableActivityContextBase ctx) => ctx.GetInput(); + + public static JToken GetInputAsJson(this DurableOrchestrationContextBase ctx) => ctx.GetInput(); +#else + public static void Log(this IDurableOrchestrationContext context, ILogger log, string messsage, bool onlyIfNotReplaying = true) + { + if (!onlyIfNotReplaying || !context.IsReplaying) + { + log.LogWarning(messsage); + } + } + + public static void Log(this IDurableClient _, ILogger log, string messsage) => log.LogWarning(messsage); + + public static JToken GetInputAsJson(this IDurableActivityContext ctx) => ctx.GetInput(); + + public static JToken GetInputAsJson(this IDurableOrchestrationContext ctx) => ctx.GetInput(); +#endif + + } +} diff --git a/AzureFunctions.v2.Durable/FunctionEnsureAllFiles.cs b/AzureFunctions.v2.Durable/FunctionEnsureAllFiles.cs new file mode 100644 index 0000000..bab3686 --- /dev/null +++ b/AzureFunctions.v2.Durable/FunctionEnsureAllFiles.cs @@ -0,0 +1,69 @@ +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Extensions.Logging; + +namespace FileValidation +{ + public static class FunctionEnsureAllFiles + { + [FunctionName("EnsureAllFiles")] +#if FUNCTIONS_V1 + public static async Task Run([OrchestrationTrigger]DurableOrchestrationContext context, ILogger log) +#else + public static async Task Run([OrchestrationTrigger]IDurableOrchestrationContext context, ILogger log) +#endif + { + if (!context.IsReplaying) + { + context.Log(log, $@"EnsureAllFiles STARTING - InstanceId: {context.InstanceId}"); + } + else + { + context.Log(log, $@"EnsureAllFiles REPLAYING"); + } + + dynamic eventGridSoleItem = context.GetInputAsJson(); + + CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(eventGridSoleItem, log); + if (newCustomerFile == null) + { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) + return; + } + + var expectedFiles = Helpers.GetExpectedFilesForCustomer(); + var filesStillWaitingFor = new HashSet(expectedFiles); + var filename = newCustomerFile.Filename; + + while (filesStillWaitingFor.Any()) + { + filesStillWaitingFor.Remove(Path.GetFileNameWithoutExtension(filename).Split('_').Last()); + if (filesStillWaitingFor.Count == 0) + { + break; + } + + context.Log(log, $@"Still waiting for more files... Still need {string.Join(", ", filesStillWaitingFor)} for customer {newCustomerFile.CustomerName}, batch {newCustomerFile.BatchPrefix}"); + + filename = await context.WaitForExternalEvent(@"newfile"); + context.Log(log, $@"Got new file via event: {filename}"); + } + + // Verify that this prefix isn't already in the lock table for processings + context.Log(log, @"Got all the files! Moving on..."); + + // call next step in functions with the prefix so it knows what to go grab + await context.CallActivityAsync(@"ValidateFileSet", new { prefix = $@"{newCustomerFile.ContainerName}/inbound/{newCustomerFile.BatchPrefix}", fileTypes = expectedFiles }); + } + + class BlobFilenameVsDatabaseFileMaskComparer : IEqualityComparer + { + public bool Equals(string x, string y) => y.Contains(x); + + public int GetHashCode(string obj) => obj.GetHashCode(); + } + } +} diff --git a/AzureFunctions.v2.Durable/FunctionValidateFileSet.cs b/AzureFunctions.v2.Durable/FunctionValidateFileSet.cs new file mode 100644 index 0000000..2088143 --- /dev/null +++ b/AzureFunctions.v2.Durable/FunctionValidateFileSet.cs @@ -0,0 +1,31 @@ +using System; +using System.Threading.Tasks; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Extensions.Logging; +using Microsoft.WindowsAzure.Storage; + +namespace FileValidation +{ + public static class FunctionValidateFileSet + { + [FunctionName(@"ValidateFileSet")] +#if FUNCTIONS_V1 + public static async Task Run([ActivityTrigger]DurableActivityContext context, ILogger log) +#else + public static async Task Run([ActivityTrigger]IDurableActivityContext context, ILogger log) +#endif + { + log.LogTrace(@"ValidateFileSet run."); + if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out _)) + { + throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); + } + + var payload = context.GetInputAsJson(); + var prefix = payload["prefix"].ToString(); // This is the entire path w/ prefix for the file set + + return await Helpers.DoValidationAsync(prefix, log); + } + } +} diff --git a/AzureFunctions.v2.Durable/Orchestrator.cs b/AzureFunctions.v2.Durable/Orchestrator.cs new file mode 100644 index 0000000..f576919 --- /dev/null +++ b/AzureFunctions.v2.Durable/Orchestrator.cs @@ -0,0 +1,75 @@ +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Azure.WebJobs.Extensions.Http; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace FileValidation +{ + public static class Orchestrator + { + [FunctionName("Orchestrator")] +#if FUNCTIONS_V1 + public static async System.Threading.Tasks.Task RunAsync([HttpTrigger(AuthorizationLevel.Function, "post", Route = null)]HttpRequestMessage req, [OrchestrationClient]DurableOrchestrationClient starter, ILogger log) +#else + public static async System.Threading.Tasks.Task RunAsync([HttpTrigger(AuthorizationLevel.Function, "post", Route = null)]HttpRequestMessage req, [DurableClient]IDurableClient starter, ILogger log) +#endif + { + var inputToFunction = JToken.ReadFrom(new JsonTextReader(new StreamReader(await req.Content.ReadAsStreamAsync()))); + dynamic eventGridSoleItem = (inputToFunction as JArray)?.SingleOrDefault(); + if (eventGridSoleItem == null) + { + return req.CreateCompatibleResponse(HttpStatusCode.BadRequest, @"Expecting only one item in the Event Grid message"); + } + + if (eventGridSoleItem.eventType == @"Microsoft.EventGrid.SubscriptionValidationEvent") + { + log.LogTrace(@"Event Grid Validation event received."); + return req.CreateCompatibleResponse(HttpStatusCode.OK, $"{{ \"validationResponse\" : \"{((dynamic)inputToFunction)[0].data.validationCode}\" }}"); + } + + CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(eventGridSoleItem, log); + if (newCustomerFile == null) + { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) + return req.CreateCompatibleResponse(HttpStatusCode.NoContent); + } + + string customerName = newCustomerFile.CustomerName, name = newCustomerFile.Filename; + starter.Log(log, $@"Processing new file. customer: {customerName}, filename: {name}"); + + // get the prefix for the name so we can check for others in the same container with in the customer blob storage account + var prefix = newCustomerFile.BatchPrefix; + + var instanceForPrefix = await starter.GetStatusAsync(prefix); + if (instanceForPrefix == null) + { + starter.Log(log, $@"New instance needed for prefix '{prefix}'. Starting..."); + var retval = await starter.StartNewAsync(@"EnsureAllFiles", prefix, eventGridSoleItem); + starter.Log(log, $@"Started. {retval}"); + } + else + { + starter.Log(log, $@"Instance already waiting. Current status: {instanceForPrefix.RuntimeStatus}. Firing 'newfile' event..."); + + if (instanceForPrefix.RuntimeStatus != OrchestrationRuntimeStatus.Running) + { + await starter.TerminateAsync(prefix, @"bounce"); + var retval = await starter.StartNewAsync(@"EnsureAllFiles", prefix, eventGridSoleItem); + starter.Log(log, $@"Restarted listener for {prefix}. {retval}"); + } + else + { + await starter.RaiseEventAsync(prefix, @"newfile", newCustomerFile.Filename); + } + } + + + return starter.CreateCheckStatusResponse(req, prefix); + } + } +} diff --git a/AzureFunctions.v2.Durable/host.json b/AzureFunctions.v2.Durable/host.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/AzureFunctions.v2.Durable/host.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/AzureFunctions.v2.DurableEntities/AzureFunctions.v2.DurableEntities.csproj b/AzureFunctions.v2.DurableEntities/AzureFunctions.v2.DurableEntities.csproj new file mode 100644 index 0000000..0fce5f0 --- /dev/null +++ b/AzureFunctions.v2.DurableEntities/AzureFunctions.v2.DurableEntities.csproj @@ -0,0 +1,24 @@ + + + netcoreapp2.1 + v2 + + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + Never + + + \ No newline at end of file diff --git a/AzureFunctions.v2.DurableEntities/BatchEntity.cs b/AzureFunctions.v2.DurableEntities/BatchEntity.cs new file mode 100644 index 0000000..902bf0c --- /dev/null +++ b/AzureFunctions.v2.DurableEntities/BatchEntity.cs @@ -0,0 +1,57 @@ +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json; + +namespace FileValidation +{ + [JsonObject(MemberSerialization.OptIn)] + public class BatchEntity : IBatchEntity + { + private readonly string _id; + private readonly ILogger _logger; + + public BatchEntity(string id, ILogger logger) + { + _id = id; + _logger = logger; + } + + [JsonProperty] + public List ReceivedFileTypes { get; set; } = new List(); + + [FunctionName(nameof(BatchEntity))] + public static Task Run([EntityTrigger]IDurableEntityContext ctx, ILogger logger) => ctx.DispatchAsync(ctx.EntityKey, logger); + + public async Task NewFile(string fileUri) + { + var newCustomerFile = CustomerBlobAttributes.Parse(fileUri); + _logger.LogInformation($@"Got new file via event: {newCustomerFile.Filename}"); + this.ReceivedFileTypes.Add(newCustomerFile.Filetype); + + _logger.LogTrace($@"Actor '{_id}' got file '{newCustomerFile.Filetype}'"); + + var filesStillWaitingFor = Helpers.GetExpectedFilesForCustomer().Except(this.ReceivedFileTypes); + if (filesStillWaitingFor.Any()) + { + _logger.LogInformation($@"Still waiting for more files... Still need {string.Join(", ", filesStillWaitingFor)} for customer {newCustomerFile.CustomerName}, batch {newCustomerFile.BatchPrefix}"); + } + else + { + _logger.LogInformation(@"Got all the files! Moving on..."); + + // call next step in functions with the prefix so it knows what to go grab + await Helpers.DoValidationAsync($@"{newCustomerFile.ContainerName}/inbound/{newCustomerFile.BatchPrefix}", _logger); + } + } + } + + public interface IBatchEntity + { + Task NewFile(string fileUri); + } + +} diff --git a/AzureFunctions.v2.DurableEntities/Orchestrator.cs b/AzureFunctions.v2.DurableEntities/Orchestrator.cs new file mode 100644 index 0000000..d668531 --- /dev/null +++ b/AzureFunctions.v2.DurableEntities/Orchestrator.cs @@ -0,0 +1,49 @@ +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Azure.WebJobs.Extensions.Http; +using Microsoft.Extensions.Logging; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; + +namespace FileValidation +{ + public static class Orchestrator + { + [FunctionName("Orchestrator")] + public static async System.Threading.Tasks.Task RunAsync([HttpTrigger(AuthorizationLevel.Function, "post", Route = null)]HttpRequestMessage req, [DurableClient]IDurableClient starter, ILogger log) + { + var inputToFunction = JToken.ReadFrom(new JsonTextReader(new StreamReader(await req.Content.ReadAsStreamAsync()))); + dynamic eventGridSoleItem = (inputToFunction as JArray)?.SingleOrDefault(); + if (eventGridSoleItem == null) + { + return req.CreateResponse(HttpStatusCode.BadRequest, @"Expecting only one item in the Event Grid message"); + } + + if (eventGridSoleItem.eventType == @"Microsoft.EventGrid.SubscriptionValidationEvent") + { + log.LogTrace(@"Event Grid Validation event received."); + return req.CreateCompatibleResponse(HttpStatusCode.OK, $"{{ \"validationResponse\" : \"{((dynamic)inputToFunction)[0].data.validationCode}\" }}"); + } + + CustomerBlobAttributes newCustomerFile = Helpers.ParseEventGridPayload(eventGridSoleItem, log); + if (newCustomerFile == null) + { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) + return req.CreateResponse(HttpStatusCode.NoContent); + } + + string customerName = newCustomerFile.CustomerName, name = newCustomerFile.Filename, containerName = newCustomerFile.ContainerName; + log.LogInformation($@"Processing new file. customer: {customerName}, filename: {name}"); + + // get the prefix for the name so we can check for others in the same container with in the customer blob storage account + var prefix = newCustomerFile.BatchPrefix; + await starter.SignalEntityAsync(prefix, b => b.NewFile(newCustomerFile.FullUrl)); + + return req.CreateResponse(HttpStatusCode.Accepted); + + } + } +} \ No newline at end of file diff --git a/AzureFunctions.v2.DurableEntities/README.md b/AzureFunctions.v2.DurableEntities/README.md new file mode 100644 index 0000000..5c630c2 --- /dev/null +++ b/AzureFunctions.v2.DurableEntities/README.md @@ -0,0 +1,6 @@ +# File validation using Durable Entities +To learn more about Durable Entities, check out the documentation [here](https://docs.microsoft.com/en-us/azure/azure-functions/durable/durable-functions-entities). + +In this sample, you'll see how we can treat the "batch" which is being validated as a virtual actor using Durable Entities. It's then up to the entity itself to determine when all files are present, tracking state along the way. + +As you'll see, it greatly simplifies the amount of Orchestration code we had to write in the other examples. \ No newline at end of file diff --git a/AzureFunctions.v2.DurableEntities/host.json b/AzureFunctions.v2.DurableEntities/host.json new file mode 100644 index 0000000..b02b929 --- /dev/null +++ b/AzureFunctions.v2.DurableEntities/host.json @@ -0,0 +1,8 @@ +{ + "version": "2.0", + "logging": { + "logLevel": { + "default": "Trace" + } + } +} \ No newline at end of file diff --git a/AzureFunctions.v2/AzureFunctions.v2.csproj b/AzureFunctions.v2/AzureFunctions.v2.csproj new file mode 100644 index 0000000..1034fd7 --- /dev/null +++ b/AzureFunctions.v2/AzureFunctions.v2.csproj @@ -0,0 +1,19 @@ + + + netstandard2.0 + v2 + + + + + + + + PreserveNewest + + + PreserveNewest + Never + + + \ No newline at end of file diff --git a/AzureFunctions.v2/CustomerBlobAttributes.cs b/AzureFunctions.v2/CustomerBlobAttributes.cs new file mode 100644 index 0000000..690b9a3 --- /dev/null +++ b/AzureFunctions.v2/CustomerBlobAttributes.cs @@ -0,0 +1,63 @@ +using System; +using System.Globalization; +using System.Text.RegularExpressions; + +namespace FileValidation +{ + public class CustomerBlobAttributes + { + static readonly Regex blobUrlRegexExtract = new Regex(@"^\S*/([^/]+)/inbound/((([^_]+)_([\d]+_[\d]+))_([\w]+))\.csv$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline); + + static readonly Regex blobUrlRegexExtractWithSubfolder = new Regex(@"^\S*/([^/]+)/([^/]+)/((([^_]+)_([\d]+_[\d]+))_([\w]+))\.csv$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline); + + public string FullUrl { get; private set; } + public string Filename { get; private set; } + public string BatchPrefix { get; private set; } + public DateTime BatchDateTime { get; private set; } + public string Filetype { get; private set; } + public string CustomerName { get; private set; } + public string ContainerName { get; private set; } + public string Subfolder { get; private set; } + + public static CustomerBlobAttributes Parse(string fullUri, bool detectSubfolder = false) + { + if (detectSubfolder) + { + var regexMatch = blobUrlRegexExtractWithSubfolder.Match(fullUri); + if (regexMatch.Success) + { + return new CustomerBlobAttributes + { + FullUrl = regexMatch.Groups[0].Value, + ContainerName = regexMatch.Groups[1].Value, + Subfolder = regexMatch.Groups[2].Value, + Filename = regexMatch.Groups[3].Value, + BatchPrefix = regexMatch.Groups[4].Value, + CustomerName = regexMatch.Groups[5].Value, + BatchDateTime = DateTime.ParseExact(regexMatch.Groups[6].Value, @"yyyyMMdd_HHmm", CultureInfo.InvariantCulture), + Filetype = regexMatch.Groups[7].Value + }; + } + } + else + { + var regexMatch = blobUrlRegexExtract.Match(fullUri); + if (regexMatch.Success) + { + return new CustomerBlobAttributes + { + FullUrl = regexMatch.Groups[0].Value, + ContainerName = regexMatch.Groups[1].Value, + Filename = regexMatch.Groups[2].Value, + BatchPrefix = regexMatch.Groups[3].Value, + CustomerName = regexMatch.Groups[4].Value, + BatchDateTime = DateTime.ParseExact(regexMatch.Groups[5].Value, @"yyyyMMdd_HHmm", CultureInfo.InvariantCulture), + Filetype = regexMatch.Groups[6].Value + }; + } + } + + return null; + } + } +} diff --git a/AzureFunctions.v2/Extensions.cs b/AzureFunctions.v2/Extensions.cs new file mode 100644 index 0000000..8bb10c0 --- /dev/null +++ b/AzureFunctions.v2/Extensions.cs @@ -0,0 +1,48 @@ +using System.Collections.Generic; +using System.Net; +using System.Net.Http; +using Microsoft.WindowsAzure.Storage.Blob; +using Microsoft.WindowsAzure.Storage.Table; + +namespace FileValidation +{ + static class StorageExtensions + { + public static async System.Threading.Tasks.Task> ExecuteQueryAsync(this CloudTable table, TableQuery query) where T : ITableEntity, new() + { + TableContinuationToken token = null; + var retVal = new List(); + do + { + var results = await table.ExecuteQuerySegmentedAsync(query, token); + retVal.AddRange(results.Results); + token = results.ContinuationToken; + } while (token != null); + + return retVal; + } + + + public static async System.Threading.Tasks.Task> ListBlobsAsync(this CloudBlobClient blobClient, string prefix) + { + BlobContinuationToken token = null; + var retVal = new List(); + do + { + var results = await blobClient.ListBlobsSegmentedAsync(prefix, token); + retVal.AddRange(results.Results); + token = results.ContinuationToken; + } while (token != null); + + return retVal; + } + + } + + static class HttpExtensions + { + public static HttpResponseMessage CreateCompatibleResponse(this HttpRequestMessage _, HttpStatusCode code) => new HttpResponseMessage(code); + + public static HttpResponseMessage CreateCompatibleResponse(this HttpRequestMessage _, HttpStatusCode code, string stringContent) => new HttpResponseMessage(code) { Content = new StringContent(stringContent) }; + } +} diff --git a/AzureFunctions.v2/FunctionEnsureAllFiles.cs b/AzureFunctions.v2/FunctionEnsureAllFiles.cs new file mode 100644 index 0000000..5918595 --- /dev/null +++ b/AzureFunctions.v2/FunctionEnsureAllFiles.cs @@ -0,0 +1,112 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.Http; +using Microsoft.Extensions.Logging; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Table; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace FileValidation +{ + public static class FunctionEnsureAllFiles + { + [FunctionName("EnsureAllFiles")] + public static async Task Run([HttpTrigger(AuthorizationLevel.Function, @"post")]HttpRequestMessage req, ILogger log) + { + var payloadFromEventGrid = JToken.ReadFrom(new JsonTextReader(new StreamReader(await req.Content.ReadAsStreamAsync()))); + dynamic eventGridSoleItem = (payloadFromEventGrid as JArray)?.SingleOrDefault(); + if (eventGridSoleItem == null) + { + return req.CreateErrorResponse(HttpStatusCode.BadRequest, $@"Expecting only one item in the Event Grid message"); + } + + if (eventGridSoleItem.eventType == @"Microsoft.EventGrid.SubscriptionValidationEvent") + { + log.LogTrace(@"Event Grid Validation event received."); + return new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent($"{{ \"validationResponse\" : \"{((dynamic)payloadFromEventGrid)[0].data.validationCode}\" }}") + }; + } + + var newCustomerFile = Helpers.ParseEventGridPayload(eventGridSoleItem, log); + if (newCustomerFile == null) + { // The request either wasn't valid (filename couldn't be parsed) or not applicable (put in to a folder other than /inbound) + return req.CreateResponse(System.Net.HttpStatusCode.NoContent); + } + + // get the prefix for the name so we can check for others in the same container with in the customer blob storage account + var prefix = newCustomerFile.BatchPrefix; + + if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var blobStorage)) + { + throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); + } + + var blobClient = blobStorage.CreateCloudBlobClient(); + var matches = await blobClient.ListBlobsAsync(prefix: $@"{newCustomerFile.ContainerName}/inbound/{prefix}"); + var matchNames = matches.Select(m => Path.GetFileNameWithoutExtension(blobClient.GetBlobReferenceFromServerAsync(m.StorageUri.PrimaryUri).GetAwaiter().GetResult().Name).Split('_').Last()).ToList(); + + var expectedFiles = Helpers.GetExpectedFilesForCustomer(); + var filesStillWaitingFor = expectedFiles.Except(matchNames, new BlobFilenameVsDatabaseFileMaskComparer()); + + if (!filesStillWaitingFor.Any()) + { + // Verify that this prefix isn't already in the lock table for processings + var lockTable = await Helpers.GetLockTableAsync(); + var entriesMatchingPrefix = await LockTableEntity.GetLockRecordAsync(prefix, lockTable); + if (entriesMatchingPrefix != null) + { + log.LogInformation($@"Skipping. We've already queued the batch with prefix '{prefix}' for processing"); + return req.CreateResponse(HttpStatusCode.NoContent); + } + + log.LogInformation(@"Got all the files! Moving on..."); + try + { + await lockTable.ExecuteAsync(TableOperation.Insert(new LockTableEntity(prefix))); + } + catch (StorageException) + { + log.LogInformation($@"Skipping. We've already queued the batch with prefix '{prefix}' for processing"); + return req.CreateResponse(HttpStatusCode.NoContent); + } + + using (var c = new HttpClient()) + { + var jsonObjectForValidator = +$@"{{ + ""prefix"" : ""{newCustomerFile.ContainerName}/inbound/{prefix}"", + ""fileTypes"" : [ + {string.Join(", ", expectedFiles.Select(e => $@"""{e}"""))} + ] +}}"; + // call next step in functions with the prefix so it knows what to go grab + await c.PostAsync($@"{Environment.GetEnvironmentVariable(@"ValidateFunctionUrl")}", new StringContent(jsonObjectForValidator)); + + return req.CreateResponse(HttpStatusCode.OK); + } + } + else + { + log.LogInformation($@"Still waiting for more files... Have {matches.Count()} file(s) from this customer ({newCustomerFile.CustomerName}) for batch {newCustomerFile.BatchPrefix}. Still need {string.Join(", ", filesStillWaitingFor)}"); + + return req.CreateResponse(HttpStatusCode.NoContent); + } + } + + class BlobFilenameVsDatabaseFileMaskComparer : IEqualityComparer + { + public bool Equals(string x, string y) => y.Contains(x); + + public int GetHashCode(string obj) => obj.GetHashCode(); + } + } +} diff --git a/AzureFunctions.v2/FunctionValidateFileSet.cs b/AzureFunctions.v2/FunctionValidateFileSet.cs new file mode 100644 index 0000000..2c29465 --- /dev/null +++ b/AzureFunctions.v2/FunctionValidateFileSet.cs @@ -0,0 +1,275 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; +using Microsoft.Azure.WebJobs; +using Microsoft.Azure.WebJobs.Extensions.Http; +using Microsoft.Extensions.Logging; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; +using Microsoft.WindowsAzure.Storage.Table; +using Newtonsoft.Json.Linq; + +namespace FileValidation +{ + public static class FunctionValidateFileSet + { + [FunctionName(@"ValidateFileSet")] + public static async Task Run([HttpTrigger(AuthorizationLevel.Function, @"post", Route = @"Validate")]HttpRequestMessage req, ILogger log) + { + log.LogTrace(@"ValidateFileSet run."); + if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) + { + throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); + } + + var payload = JObject.Parse(await req.Content.ReadAsStringAsync()); + + var prefix = payload["prefix"].ToString(); // This is the entire path w/ prefix for the file set + log.LogTrace($@"prefix: {prefix}"); + + var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); + log.LogTrace($@"filePrefix: {filePrefix}"); + + var lockTable = await Helpers.GetLockTableAsync(); + if (!await ShouldProceedAsync(lockTable, prefix, filePrefix, log)) + { + return req.CreateResponse(HttpStatusCode.OK); + } + + var blobClient = storageAccount.CreateCloudBlobClient(); + var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); + + var customerName = filePrefix.Split('_').First().Split('-').Last(); + + var errors = new List(); + var filesToProcess = payload["fileTypes"].Values(); + + foreach (var blobDetails in targetBlobs) + { + var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); + + var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); + if (!filesToProcess.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) + { + log.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", filesToProcess)}) for bottler '{customerName}'"); + continue; + } + + var lowerFileType = fileParts.Filetype.ToLowerInvariant(); + log.LogInformation($@"Validating {lowerFileType}..."); + + uint numColumns = 0; + switch (lowerFileType) + { + case @"type5": // salestype + numColumns = 2; + break; + case @"type10": // mixedpack + case @"type4": // shipfrom + numColumns = 3; + break; + case @"type1": // channel + case @"type2": // customer + numColumns = 4; + break; + case @"type9": // itemdetail + case @"type3": // shipto + numColumns = 14; + break; + case @"type6": // salesdetail + numColumns = 15; + break; + case @"type8": // product + numColumns = 21; + break; + case @"type7": // sales + numColumns = 23; + break; + default: + throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); + } + + errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); + } + try + { + await LockTableEntity.UpdateAsync(filePrefix, LockTableEntity.BatchState.Done, lockTable); + } + catch (StorageException) + { + log.LogWarning($@"That's weird. The lock for prefix {prefix} wasn't there. Shouldn't happen!"); + return req.CreateResponse(HttpStatusCode.OK); + } + + if (errors.Any()) + { + log.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); + + // move files to 'invalid-set' folder + await MoveBlobsAsync(log, blobClient, targetBlobs, @"invalid-set"); + + return req.CreateErrorResponse(HttpStatusCode.BadRequest, string.Join(@", ", errors)); + } + else + { + // move these files to 'valid-set' folder + await MoveBlobsAsync(log, blobClient, targetBlobs, @"valid-set"); + + log.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); + + return req.CreateResponse(HttpStatusCode.OK); + } + } + + private static async Task ShouldProceedAsync(CloudTable bottlerFilesTable, string prefix, string filePrefix, ILogger log) + { + try + { + var lockRecord = await LockTableEntity.GetLockRecordAsync(filePrefix, bottlerFilesTable); + if (lockRecord?.State == LockTableEntity.BatchState.Waiting) + { + // Update the lock record to mark it as in progress + lockRecord.State = LockTableEntity.BatchState.InProgress; + await bottlerFilesTable.ExecuteAsync(TableOperation.Replace(lockRecord)); + return true; + } + else + { + log.LogInformation($@"Validate for {prefix} skipped. State was {lockRecord?.State.ToString() ?? @"[null]"}."); + } + } + catch (StorageException) + { + log.LogInformation($@"Validate for {prefix} skipped (StorageException. Somebody else picked it up already."); + } + + return false; + } + + private static async Task MoveBlobsAsync(ILogger log, CloudBlobClient blobClient, IEnumerable targetBlobs, string folderName) + { + foreach (var b in targetBlobs) + { + var blobRef = await blobClient.GetBlobReferenceFromServerAsync(b.StorageUri.PrimaryUri); + var sourceBlob = b.Container.GetBlockBlobReference(blobRef.Name); + + var targetBlob = blobRef.Container + .GetDirectoryReference($@"{folderName}") + .GetBlockBlobReference(Path.GetFileName(blobRef.Name)); + + string sourceLeaseGuid = Guid.NewGuid().ToString(), targetLeaseGuid = Guid.NewGuid().ToString(); + var sourceLeaseId = await sourceBlob.AcquireLeaseAsync(TimeSpan.FromSeconds(60), sourceLeaseGuid); + var targetLeaseId = await targetBlob.AcquireLeaseAsync(TimeSpan.FromSeconds(60), targetLeaseGuid); + + await targetBlob.StartCopyAsync(sourceBlob); + + while (targetBlob.CopyState.Status == CopyStatus.Pending) + { + ; // spinlock until the copy completes + } + + var copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; + if (!copySucceeded) + { + log.LogError($@"Error copying {sourceBlob.Name} to {folderName} folder. Retrying once..."); + + await targetBlob.StartCopyAsync(sourceBlob); + + while (targetBlob.CopyState.Status == CopyStatus.Pending) + { + ; // spinlock until the copy completes + } + + copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; + if (!copySucceeded) + { + log.LogError($@"Error retrying copy of {sourceBlob.Name} to {folderName} folder. File not moved."); + } + } + + if (copySucceeded) + { +#if DEBUG + try + { +#endif + await sourceBlob.DeleteAsync(); +#if DEBUG + } + catch (StorageException ex) + { + log.LogError($@"Error deleting blob {sourceBlob.Name}", ex); + } +#endif + + await targetBlob.ReleaseLeaseAsync(new AccessCondition { LeaseId = targetLeaseId }); + await sourceBlob.ReleaseLeaseAsync(new AccessCondition { LeaseId = sourceLeaseId }); + } + } + } + + private static async Task> ValidateCsvStructureAsync(ICloudBlob blob, uint requiredNumberOfColumnsPerLine, string filetypeDescription) + { + var errs = new List(); + try + { + using (var blobReader = new StreamReader(await blob.OpenReadAsync(new AccessCondition(), new BlobRequestOptions(), new OperationContext()))) + { + var fileAttributes = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); + + for (var lineNumber = 0; !blobReader.EndOfStream; lineNumber++) + { + var errorPrefix = $@"{filetypeDescription} file '{fileAttributes.Filename}' Record {lineNumber}"; + var line = blobReader.ReadLine(); + var fields = line.Split(','); + if (fields.Length != requiredNumberOfColumnsPerLine) + { + errs.Add($@"{errorPrefix} is malformed. Should have {requiredNumberOfColumnsPerLine} values; has {fields.Length}"); + continue; + } + + for (var i = 0; i < fields.Length; i++) + { + errorPrefix = $@"{errorPrefix} Field {i}"; + var field = fields[i]; + // each field must be enclosed in double quotes + if (field[0] != '"' || field.Last() != '"') + { + errs.Add($@"{errorPrefix}: value ({field}) is not enclosed in double quotes ("")"); + continue; + } + } + } + + // Validate file is UTF-8 encoded + if (!blobReader.CurrentEncoding.BodyName.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) + { + errs.Add($@"{blob.Name} is not UTF-8 encoded"); + } + } + } + catch (StorageException storEx) + { + SwallowStorage404(storEx); + } + return errs; + } + + private static void SwallowStorage404(StorageException storEx) + { + var webEx = storEx.InnerException as WebException; + if ((webEx.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.NotFound) + { + // Ignore + } + else + { + throw storEx; + } + } + } +} diff --git a/AzureFunctions.v2/Helpers.cs b/AzureFunctions.v2/Helpers.cs new file mode 100644 index 0000000..0a37777 --- /dev/null +++ b/AzureFunctions.v2/Helpers.cs @@ -0,0 +1,286 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Blob; +using Microsoft.WindowsAzure.Storage.Table; + +namespace FileValidation +{ + static class Helpers + { + public static async System.Threading.Tasks.Task GetLockTableAsync(CloudStorageAccount storageAccount = null) + { + CloudTable customerFilesTable; + if (storageAccount == null) + { + if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"AzureWebJobsStorage"), out var sa)) + { + throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); + } + else + { + storageAccount = sa; + } + } + + try + { + customerFilesTable = storageAccount.CreateCloudTableClient().GetTableReference(@"FileProcessingLocks"); + } + catch (Exception ex) + { + throw new Exception($@"Error creating table client for locks: {ex}", ex); + } + + while (true) + { + try + { + await customerFilesTable.CreateIfNotExistsAsync(); + break; + } + catch { } + } + + return customerFilesTable; + } + + public static CustomerBlobAttributes ParseEventGridPayload(dynamic eventGridItem, ILogger log) + { + if (eventGridItem.eventType == @"Microsoft.Storage.BlobCreated" + && eventGridItem.data.api == @"PutBlob" + && eventGridItem.data.contentType == @"text/csv") + { + try + { + var retVal = CustomerBlobAttributes.Parse((string)eventGridItem.data.url); + if (retVal != null && !retVal.ContainerName.Equals(retVal.CustomerName)) + { + throw new ArgumentException($@"File '{retVal.Filename}' uploaded to container '{retVal.ContainerName}' doesn't have the right prefix: the first token in the filename ({retVal.CustomerName}) must be the customer name, which should match the container name", nameof(eventGridItem)); + } + + return retVal; + } + catch (Exception ex) + { + log.LogError(@"Error parsing Event Grid payload", ex); + } + } + + return null; + } + + public static IEnumerable GetExpectedFilesForCustomer() => new[] { @"type1", @"type2", @"type3", @"type4", @"type5", @"type7", @"type8", @"type9", @"type10" }; + + public static async Task DoValidationAsync(string prefix, ILogger logger = null) + { + logger?.LogTrace(@"ValidateFileSet run."); + if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) + { + throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); + } + + logger?.LogTrace($@"prefix: {prefix}"); + + var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); + logger?.LogTrace($@"filePrefix: {filePrefix}"); + + var blobClient = storageAccount.CreateCloudBlobClient(); + var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); + var customerName = filePrefix.Split('_').First().Split('-').Last(); + + var errors = new List(); + var expectedFiles = Helpers.GetExpectedFilesForCustomer(); + + foreach (var blobDetails in targetBlobs) + { + var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); + + var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); + if (!expectedFiles.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) + { + logger?.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", expectedFiles)}) for customer '{customerName}'"); + continue; + } + + var lowerFileType = fileParts.Filetype.ToLowerInvariant(); + uint numColumns = 0; + switch (lowerFileType) + { + case @"type5": // salestype + numColumns = 2; + break; + case @"type10": // mixed + case @"type4": // shipfrom + numColumns = 3; + break; + case @"type1": // channel + case @"type2": // customer + numColumns = 4; + break; + case @"type9": // itemdetail + numColumns = 5; + break; + case @"type3": // shipto + numColumns = 14; + break; + case @"type6": // salesdetail + numColumns = 15; + break; + case @"type8": // product + numColumns = 21; + break; + case @"type7": // sales + numColumns = 23; + break; + default: + throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); + } + + errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); + } + + if (errors.Any()) + { + logger.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); + + // move files to 'invalid-set' folder + await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"invalid-set", logger); + return false; + } + else + { + // move these files to 'valid-set' folder + await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"valid-set", logger); + + logger.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); + return true; + } + } + + private static async Task> ValidateCsvStructureAsync(ICloudBlob blob, uint requiredNumberOfColumnsPerLine, string filetypeDescription) + { + var errs = new List(); + try + { + using (var blobReader = new StreamReader(await blob.OpenReadAsync(new AccessCondition(), new BlobRequestOptions(), new OperationContext()))) + { + var fileAttributes = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); + + for (var lineNumber = 0; !blobReader.EndOfStream; lineNumber++) + { + var errorPrefix = $@"{filetypeDescription} file '{fileAttributes.Filename}' Record {lineNumber}"; + var line = blobReader.ReadLine(); + var fields = line.Split(','); + if (fields.Length != requiredNumberOfColumnsPerLine) + { + errs.Add($@"{errorPrefix} is malformed. Should have {requiredNumberOfColumnsPerLine} values; has {fields.Length}"); + continue; + } + + for (var i = 0; i < fields.Length; i++) + { + errorPrefix = $@"{errorPrefix} Field {i}"; + var field = fields[i]; + // each field must be enclosed in double quotes + if (field[0] != '"' || field.Last() != '"') + { + errs.Add($@"{errorPrefix}: value ({field}) is not enclosed in double quotes ("")"); + continue; + } + } + } + + // Validate file is UTF-8 encoded + if (!blobReader.CurrentEncoding.BodyName.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) + { + errs.Add($@"{blob.Name} is not UTF-8 encoded"); + } + } + } + catch (StorageException storEx) + { + SwallowStorage404(storEx); + } + return errs; + } + + private static void SwallowStorage404(StorageException storEx) + { + var webEx = storEx.InnerException as WebException; + if ((webEx.Response as HttpWebResponse)?.StatusCode == HttpStatusCode.NotFound) + { + // Ignore + } + else + { + throw storEx; + } + } + + public static async Task MoveBlobsAsync(CloudBlobClient blobClient, IEnumerable targetBlobs, string folderName, ILogger logger = null) + { + foreach (var b in targetBlobs) + { + var blobRef = await blobClient.GetBlobReferenceFromServerAsync(b.StorageUri.PrimaryUri); + var sourceBlob = b.Container.GetBlockBlobReference(blobRef.Name); + + var targetBlob = blobRef.Container + .GetDirectoryReference($@"{folderName}") + .GetBlockBlobReference(Path.GetFileName(blobRef.Name)); + + string sourceLeaseGuid = Guid.NewGuid().ToString(), targetLeaseGuid = Guid.NewGuid().ToString(); + var sourceLeaseId = await sourceBlob.AcquireLeaseAsync(TimeSpan.FromSeconds(60), sourceLeaseGuid); + + await targetBlob.StartCopyAsync(sourceBlob); + + while (targetBlob.CopyState.Status == CopyStatus.Pending) + { + ; // spinlock until the copy completes + } + + var copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; + if (!copySucceeded) + { + logger?.LogError($@"Error copying {sourceBlob.Name} to {folderName} folder. Retrying once..."); + + await targetBlob.StartCopyAsync(sourceBlob); + + while (targetBlob.CopyState.Status == CopyStatus.Pending) + { + ; // spinlock until the copy completes + } + + copySucceeded = targetBlob.CopyState.Status == CopyStatus.Success; + if (!copySucceeded) + { + logger?.LogError($@"Error retrying copy of {sourceBlob.Name} to {folderName} folder. File not moved."); + } + } + + if (copySucceeded) + { +#if DEBUG + try + { +#endif + await sourceBlob.ReleaseLeaseAsync(new AccessCondition { LeaseId = sourceLeaseId }); + await sourceBlob.DeleteAsync(); +#if DEBUG + } + catch (StorageException ex) + { + logger?.LogError($@"Error deleting blob {sourceBlob.Name}", ex); + } +#endif + + } + } + } + } +} \ No newline at end of file diff --git a/AzureFunctions.v2/LockTableEntity.cs b/AzureFunctions.v2/LockTableEntity.cs new file mode 100644 index 0000000..f6da9ae --- /dev/null +++ b/AzureFunctions.v2/LockTableEntity.cs @@ -0,0 +1,60 @@ +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.WindowsAzure.Storage; +using Microsoft.WindowsAzure.Storage.Table; + +namespace FileValidation +{ + class LockTableEntity : TableEntity + { + public LockTableEntity() : base() { } + + public LockTableEntity(string prefix) : base(prefix, prefix) { } + + [IgnoreProperty] + public string Prefix + { + get => this.PartitionKey; + set + { + this.PartitionKey = value; + this.RowKey = value; + } + } + + [IgnoreProperty] + public BatchState State { get; set; } = BatchState.Waiting; + + public string DbState + { + get => this.State.ToString(); + set => this.State = (BatchState)Enum.Parse(typeof(BatchState), value); + } + + public enum BatchState + { + Waiting, InProgress, Done + } + + public static async Task GetLockRecordAsync(string filePrefix, CloudTable customerFilesTable = null, CloudStorageAccount customerFilesTableStorageAccount = null) + { + customerFilesTable = customerFilesTable ?? await Helpers.GetLockTableAsync(customerFilesTableStorageAccount); + + return (await customerFilesTable.ExecuteQueryAsync( + new TableQuery() + .Where(TableQuery.GenerateFilterCondition(@"PartitionKey", QueryComparisons.Equal, filePrefix)))) + .SingleOrDefault(); + } + + public static async Task UpdateAsync(string filePrefix, BatchState state, CloudTable customerFilesTable = null, CloudStorageAccount customerFilesTableStorageAccount = null) + { + var entity = await GetLockRecordAsync(filePrefix, customerFilesTable); + entity.State = state; + + customerFilesTable = customerFilesTable ?? await Helpers.GetLockTableAsync(customerFilesTableStorageAccount); + + await customerFilesTable.ExecuteAsync(TableOperation.Replace(entity)); + } + } +} diff --git a/AzureFunctions.v2/host.json b/AzureFunctions.v2/host.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/AzureFunctions.v2/host.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f9ba8cf --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,9 @@ +# Microsoft Open Source Code of Conduct + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). + +Resources: + +- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) +- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9e841e7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/LogicApps/DeploymentHelper.cs b/LogicApps/DeploymentHelper.cs new file mode 100644 index 0000000..8cd9d0f --- /dev/null +++ b/LogicApps/DeploymentHelper.cs @@ -0,0 +1,115 @@ +// Requires the following Azure NuGet packages and related dependencies: +// package id="Microsoft.Azure.Management.Authorization" version="2.0.0" +// package id="Microsoft.Azure.Management.ResourceManager" version="1.4.0-preview" +// package id="Microsoft.Rest.ClientRuntime.Azure.Authentication" version="2.2.8-preview" + +using Microsoft.Azure.Management.ResourceManager; +using Microsoft.Azure.Management.ResourceManager.Models; +using Microsoft.Rest.Azure.Authentication; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using System; +using System.IO; + +namespace PortalGenerated +{ + /// + /// This is a helper class for deploying an Azure Resource Manager template + /// More info about template deployments can be found here https://go.microsoft.com/fwLink/?LinkID=733371 + /// + class DeploymentHelper + { + string subscriptionId = "your-subscription-id"; + string clientId = "your-service-principal-clientId"; + string clientSecret = "your-service-principal-client-secret"; + string resourceGroupName = "resource-group-name"; + string deploymentName = "deployment-name"; + string resourceGroupLocation = "resource-group-location"; // must be specified for creating a new resource group + string pathToTemplateFile = "path-to-template.json-on-disk"; + string pathToParameterFile = "path-to-parameters.json-on-disk"; + string tenantId = "tenant-id"; + + public async void Run() + { + // Try to obtain the service credentials + var serviceCreds = await ApplicationTokenProvider.LoginSilentAsync(tenantId, clientId, clientSecret); + + // Read the template and parameter file contents + JObject templateFileContents = GetJsonFileContents(pathToTemplateFile); + JObject parameterFileContents = GetJsonFileContents(pathToParameterFile); + + // Create the resource manager client + var resourceManagementClient = new ResourceManagementClient(serviceCreds); + resourceManagementClient.SubscriptionId = subscriptionId; + + // Create or check that resource group exists + EnsureResourceGroupExists(resourceManagementClient, resourceGroupName, resourceGroupLocation); + + // Start a deployment + DeployTemplate(resourceManagementClient, resourceGroupName, deploymentName, templateFileContents, parameterFileContents); + } + + /// + /// Reads a JSON file from the specified path + /// + /// The full path to the JSON file + /// The JSON file contents + private JObject GetJsonFileContents(string pathToJson) + { + JObject templatefileContent = new JObject(); + using (StreamReader file = File.OpenText(pathToJson)) + { + using (JsonTextReader reader = new JsonTextReader(file)) + { + templatefileContent = (JObject)JToken.ReadFrom(reader); + return templatefileContent; + } + } + } + + /// + /// Ensures that a resource group with the specified name exists. If it does not, will attempt to create one. + /// + /// The resource manager client. + /// The name of the resource group. + /// The resource group location. Required when creating a new resource group. + private static void EnsureResourceGroupExists(ResourceManagementClient resourceManagementClient, string resourceGroupName, string resourceGroupLocation) + { + if (resourceManagementClient.ResourceGroups.CheckExistence(resourceGroupName) != true) + { + Console.WriteLine(string.Format("Creating resource group '{0}' in location '{1}'", resourceGroupName, resourceGroupLocation)); + var resourceGroup = new ResourceGroup(); + resourceGroup.Location = resourceGroupLocation; + resourceManagementClient.ResourceGroups.CreateOrUpdate(resourceGroupName, resourceGroup); + } + else + { + Console.WriteLine(string.Format("Using existing resource group '{0}'", resourceGroupName)); + } + } + + /// + /// Starts a template deployment. + /// + /// The resource manager client. + /// The name of the resource group. + /// The name of the deployment. + /// The template file contents. + /// The parameter file contents. + private static void DeployTemplate(ResourceManagementClient resourceManagementClient, string resourceGroupName, string deploymentName, JObject templateFileContents, JObject parameterFileContents) + { + Console.WriteLine(string.Format("Starting template deployment '{0}' in resource group '{1}'", deploymentName, resourceGroupName)); + var deployment = new Deployment(); + + deployment.Properties = new DeploymentProperties + { + Mode = DeploymentMode.Incremental, + Template = templateFileContents, + Parameters = parameterFileContents["parameters"].ToObject() + }; + + var deploymentResult = resourceManagementClient.Deployments.CreateOrUpdate(resourceGroupName, deploymentName, deployment); + Console.WriteLine(string.Format("Deployment status: {0}", deploymentResult.Properties.ProvisioningState)); + } + } +} \ No newline at end of file diff --git a/LogicApps/deploy.ps1 b/LogicApps/deploy.ps1 new file mode 100644 index 0000000..1dd501b --- /dev/null +++ b/LogicApps/deploy.ps1 @@ -0,0 +1,107 @@ +<# + .SYNOPSIS + Deploys a template to Azure + + .DESCRIPTION + Deploys an Azure Resource Manager template + + .PARAMETER subscriptionId + The subscription id where the template will be deployed. + + .PARAMETER resourceGroupName + The resource group where the template will be deployed. Can be the name of an existing or a new resource group. + + .PARAMETER resourceGroupLocation + Optional, a resource group location. If specified, will try to create a new resource group in this location. If not specified, assumes resource group is existing. + + .PARAMETER deploymentName + The deployment name. + + .PARAMETER templateFilePath + Optional, path to the template file. Defaults to template.json. + + .PARAMETER parametersFilePath + Optional, path to the parameters file. Defaults to parameters.json. If file is not found, will prompt for parameter values based on template. +#> + +param( + [Parameter(Mandatory=$True)] + [string] + $subscriptionId, + + [Parameter(Mandatory=$True)] + [string] + $resourceGroupName, + + [string] + $resourceGroupLocation, + + [Parameter(Mandatory=$True)] + [string] + $deploymentName, + + [string] + $templateFilePath = "template.json", + + [string] + $parametersFilePath = "parameters.json" +) + +<# +.SYNOPSIS + Registers RPs +#> +Function RegisterRP { + Param( + [string]$ResourceProviderNamespace + ) + + Write-Host "Registering resource provider '$ResourceProviderNamespace'"; + Register-AzureRmResourceProvider -ProviderNamespace $ResourceProviderNamespace; +} + +#****************************************************************************** +# Script body +# Execution begins here +#****************************************************************************** +$ErrorActionPreference = "Stop" + +# sign in +Write-Host "Logging in..."; +Login-AzureRmAccount; + +# select subscription +Write-Host "Selecting subscription '$subscriptionId'"; +Select-AzureRmSubscription -SubscriptionID $subscriptionId; + +# Register RPs +$resourceProviders = @("microsoft.logic"); +if($resourceProviders.length) { + Write-Host "Registering resource providers" + foreach($resourceProvider in $resourceProviders) { + RegisterRP($resourceProvider); + } +} + +#Create or check for existing resource group +$resourceGroup = Get-AzureRmResourceGroup -Name $resourceGroupName -ErrorAction SilentlyContinue +if(!$resourceGroup) +{ + Write-Host "Resource group '$resourceGroupName' does not exist. To create a new resource group, please enter a location."; + if(!$resourceGroupLocation) { + $resourceGroupLocation = Read-Host "resourceGroupLocation"; + } + Write-Host "Creating resource group '$resourceGroupName' in location '$resourceGroupLocation'"; + New-AzureRmResourceGroup -Name $resourceGroupName -Location $resourceGroupLocation +} +else{ + Write-Host "Using existing resource group '$resourceGroupName'"; +} + +# Start the deployment +Write-Host "Starting deployment..."; +if(Test-Path $parametersFilePath) { + New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath -TemplateParameterFile $parametersFilePath; +} else { + New-AzureRmResourceGroupDeployment -ResourceGroupName $resourceGroupName -TemplateFile $templateFilePath; +} \ No newline at end of file diff --git a/LogicApps/deploy.sh b/LogicApps/deploy.sh new file mode 100644 index 0000000..e950bda --- /dev/null +++ b/LogicApps/deploy.sh @@ -0,0 +1,122 @@ +#!/bin/bash +set -euo pipefail +IFS=$'\n\t' + +# -e: immediately exit if any command has a non-zero exit status +# -o: prevents errors in a pipeline from being masked +# IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@) + +usage() { echo "Usage: $0 -i -g -n -l " 1>&2; exit 1; } + +declare subscriptionId="" +declare resourceGroupName="" +declare deploymentName="" +declare resourceGroupLocation="" + +# Initialize parameters specified from command line +while getopts ":i:g:n:l:" arg; do + case "${arg}" in + i) + subscriptionId=${OPTARG} + ;; + g) + resourceGroupName=${OPTARG} + ;; + n) + deploymentName=${OPTARG} + ;; + l) + resourceGroupLocation=${OPTARG} + ;; + esac +done +shift $((OPTIND-1)) + +#Prompt for parameters is some required parameters are missing +if [[ -z "$subscriptionId" ]]; then + echo "Your subscription ID can be looked up with the CLI using: az account show --out json " + echo "Enter your subscription ID:" + read subscriptionId + [[ "${subscriptionId:?}" ]] +fi + +if [[ -z "$resourceGroupName" ]]; then + echo "This script will look for an existing resource group, otherwise a new one will be created " + echo "You can create new resource groups with the CLI using: az group create " + echo "Enter a resource group name" + read resourceGroupName + [[ "${resourceGroupName:?}" ]] +fi + +if [[ -z "$deploymentName" ]]; then + echo "Enter a name for this deployment:" + read deploymentName +fi + +if [[ -z "$resourceGroupLocation" ]]; then + echo "If creating a *new* resource group, you need to set a location " + echo "You can lookup locations with the CLI using: az account list-locations " + + echo "Enter resource group location:" + read resourceGroupLocation +fi + +#templateFile Path - template file to be used +templateFilePath="template.json" + +if [ ! -f "$templateFilePath" ]; then + echo "$templateFilePath not found" + exit 1 +fi + +#parameter file path +parametersFilePath="parameters.json" + +if [ ! -f "$parametersFilePath" ]; then + echo "$parametersFilePath not found" + exit 1 +fi + +if [ -z "$subscriptionId" ] || [ -z "$resourceGroupName" ] || [ -z "$deploymentName" ]; then + echo "Either one of subscriptionId, resourceGroupName, deploymentName is empty" + usage +fi + +#login to azure using your credentials +az account show 1> /dev/null + +if [ $? != 0 ]; +then + az login +fi + +#set the default subscription id +az account set --subscription $subscriptionId + +set +e + +#Check for existing RG +az group show $resourceGroupName 1> /dev/null + +if [ $? != 0 ]; then + echo "Resource group with name" $resourceGroupName "could not be found. Creating new resource group.." + set -e + ( + set -x + az group create --name $resourceGroupName --location $resourceGroupLocation 1> /dev/null + ) + else + echo "Using existing resource group..." +fi + +#Start deployment +echo "Starting deployment..." +( + set -x + az group deployment create --name "$deploymentName" --resource-group "$resourceGroupName" --template-file "$templateFilePath" --parameters "@${parametersFilePath}" +) + +if [ $? == 0 ]; + then + echo "Template has been successfully deployed" +fi diff --git a/LogicApps/deployer.rb b/LogicApps/deployer.rb new file mode 100644 index 0000000..4943008 --- /dev/null +++ b/LogicApps/deployer.rb @@ -0,0 +1,71 @@ +require 'azure_mgmt_resources' + +class Deployer + + # Initialize the deployer class with subscription, resource group and resource group location. The class will raise an + # ArgumentError if there are empty values for Tenant Id, Client Id or Client Secret environment variables. + # + # @param [String] subscription_id the subscription to deploy the template + # @param [String] resource_group the resource group to create or update and then deploy the template + # @param [String] resource_group_location the location of the resource group + def initialize(subscription_id, resource_group, resource_group_location) + raise ArgumentError.new("Missing template file 'template.json' in current directory.") unless File.exist?('template.json') + raise ArgumentError.new("Missing parameters file 'parameters.json' in current directory.") unless File.exist?('parameters.json') + @resource_group = resource_group + @subscription_id = subscription_id + @resource_group_location = resource_group_location + provider = MsRestAzure::ApplicationTokenProvider.new( + ENV['AZURE_TENANT_ID'], + ENV['AZURE_CLIENT_ID'], + ENV['AZURE_CLIENT_SECRET']) + credentials = MsRest::TokenCredentials.new(provider) + @client = Azure::ARM::Resources::ResourceManagementClient.new(credentials) + @client.subscription_id = @subscription_id + end + + # Deploy the template to a resource group + def deploy + # ensure the resource group is created + params = Azure::ARM::Resources::Models::ResourceGroup.new.tap do |rg| + rg.location = @resource_group_location + end + @client.resource_groups.create_or_update(@resource_group, params).value! + + # build the deployment from a json file template from parameters + template = File.read(File.expand_path(File.join(__dir__, 'template.json'))) + deployment = Azure::ARM::Resources::Models::Deployment.new + deployment.properties = Azure::ARM::Resources::Models::DeploymentProperties.new + deployment.properties.template = JSON.parse(template) + deployment.properties.mode = Azure::ARM::Resources::Models::DeploymentMode::Incremental + + # build the deployment template parameters from Hash to {key: {value: value}} format + deploy_params = File.read(File.expand_path(File.join(__dir__, 'parameters.json'))) + deployment.properties.parameters = JSON.parse(deploy_params)["parameters"] + + # put the deployment to the resource group + @client.deployments.create_or_update(@resource_group, 'azure-sample', deployment) + end +end + +# Get user inputs and execute the script +if(ARGV.empty?) + puts "Please specify subscriptionId resourceGroupName resourceGroupLocation as command line arguments" + exit +end + +subscription_id = ARGV[0] # Azure Subscription Id +resource_group = ARGV[1] # The resource group for deployment +resource_group_location = ARGV[2] # The resource group location + +msg = "\nInitializing the Deployer class with subscription id: #{subscription_id}, resource group: #{resource_group}" +msg += "\nand resource group location: #{resource_group_location}...\n\n" +puts msg + +# Initialize the deployer class +deployer = Deployer.new(subscription_id, resource_group, resource_group_location) + +puts "Beginning the deployment... \n\n" +# Deploy the template +deployment = deployer.deploy + +puts "Done deploying!!" \ No newline at end of file diff --git a/LogicApps/parameters.json b/LogicApps/parameters.json new file mode 100644 index 0000000..9d3582a --- /dev/null +++ b/LogicApps/parameters.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "workflows_FileValidation_BatchReceiver_name": { + "value": null + }, + "workflows_FileValidation_BatchProcessor_name": { + "value": null + } + } +} \ No newline at end of file diff --git a/LogicApps/template.json b/LogicApps/template.json new file mode 100644 index 0000000..28c4df7 --- /dev/null +++ b/LogicApps/template.json @@ -0,0 +1,227 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "workflows_FileValidation_BatchReceiver_name": { + "defaultValue": "FileValidation-BatchReceiver", + "type": "String" + }, + "workflows_FileValidation_BatchProcessor_name": { + "defaultValue": "FileValidation-BatchProcessor", + "type": "String" + } + }, + "variables": {}, + "resources": [ + { + "comments": "Generalized from resource: '/subscriptions/0c249eea-065b-4034-955e-795d56b1e5d1/resourceGroups/serverless-demo-test/providers/Microsoft.Logic/workflows/FileValidation-BatchProcessor'.", + "type": "Microsoft.Logic/workflows", + "name": "[parameters('workflows_FileValidation_BatchProcessor_name')]", + "apiVersion": "2017-07-01", + "location": "westus2", + "tags": {}, + "scale": null, + "properties": { + "state": "Enabled", + "definition": { + "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "triggers": { + "manual": { + "type": "Request", + "kind": "EventGrid", + "inputs": { + "schema": { + "properties": { + "data": {} + }, + "type": "object" + } + } + } + }, + "actions": { + "BatchReceiver_2": { + "runAfter": { + "Partition": [ + "Succeeded" + ] + }, + "type": "SendToBatch", + "inputs": { + "batchName": "BatchReciever", + "content": "@outputs('Compose')", + "host": { + "triggerName": "Batch_messages", + "workflow": { + "id": "[resourceId('Microsoft.Logic/workflows', parameters('workflows_FileValidation_BatchReceiver_name'))]" + } + }, + "partitionName": "@{outputs('Partition')}" + } + }, + "Compose": { + "runAfter": { + "Parse_JSON": [ + "Succeeded" + ] + }, + "type": "Compose", + "inputs": "@body('Parse_JSON')[0]?['data']['url']" + }, + "Parse_JSON": { + "runAfter": {}, + "type": "ParseJson", + "inputs": { + "content": "@triggerBody()", + "schema": { + "items": { + "properties": { + "data": { + "properties": { + "api": { + "type": "string" + }, + "blobType": { + "type": "string" + }, + "clientRequestId": { + "type": "string" + }, + "contentLength": { + "type": "number" + }, + "contentType": { + "type": "string" + }, + "eTag": { + "type": "string" + }, + "requestId": { + "type": "string" + }, + "sequencer": { + "type": "string" + }, + "storageDiagnostics": { + "properties": { + "batchId": { + "type": "string" + } + }, + "type": "object" + }, + "url": { + "type": "string" + } + }, + "type": "object" + }, + "dataVersion": { + "type": "string" + }, + "eventTime": { + "type": "string" + }, + "eventType": { + "type": "string" + }, + "id": { + "type": "string" + }, + "metadataVersion": { + "type": "string" + }, + "subject": { + "type": "string" + }, + "topic": { + "type": "string" + } + }, + "required": [ + "topic", + "subject", + "eventType", + "eventTime", + "id", + "data", + "dataVersion", + "metadataVersion" + ], + "type": "object" + }, + "type": "array" + } + } + }, + "Partition": { + "runAfter": { + "Compose": [ + "Succeeded" + ] + }, + "type": "Compose", + "inputs": "@substring(outputs('Compose'), 50, 14)" + } + }, + "outputs": {} + }, + "parameters": {} + }, + "dependsOn": [ + "[resourceId('Microsoft.Logic/workflows', parameters('workflows_FileValidation_BatchReceiver_name'))]" + ] + }, + { + "comments": "Generalized from resource: '/subscriptions/0c249eea-065b-4034-955e-795d56b1e5d1/resourceGroups/serverless-demo-test/providers/Microsoft.Logic/workflows/FileValidation-BatchReceiver'.", + "type": "Microsoft.Logic/workflows", + "name": "[parameters('workflows_FileValidation_BatchReceiver_name')]", + "apiVersion": "2017-07-01", + "location": "westus2", + "tags": {}, + "scale": null, + "properties": { + "state": "Enabled", + "definition": { + "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#", + "contentVersion": "1.0.0.0", + "parameters": {}, + "triggers": { + "Batch_messages": { + "type": "Batch", + "inputs": { + "configurations": { + "BatchReciever": { + "releaseCriteria": { + "messageCount": 3 + } + } + }, + "mode": "Inline" + } + } + }, + "actions": { + "For_each": { + "foreach": "@triggerBody()['items']", + "actions": { + "Change_to_run_validation_on_each_item's_content": { + "runAfter": {}, + "type": "Compose", + "inputs": "@items('For_each')['content']" + } + }, + "runAfter": {}, + "type": "Foreach" + } + }, + "outputs": {} + }, + "parameters": {} + }, + "dependsOn": [] + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a112c7 --- /dev/null +++ b/README.md @@ -0,0 +1,135 @@ +--- +page_type: sample +languages: + - csharp +products: + - azure + - azure-blob-storage + - azure-event-grid + - azure-functions + - azure-logic-apps + - azure-storage + - azure-table-storage + - dotnet + - dotnet-core + - dotnet-standard +description: This sample outlines ways to accomplish validation across files received in a batch format using Azure Serverless technologies. +--- + +# File processing and validation using Azure Functions, Logic Apps, and Durable Functions +This sample outlines multiple ways to accomplish the following set of requirements using Azure Serverless technologies. One way uses the "traditional" serverless approach, another Logic Apps, and another Azure Functions' _Durable Functions_ feature. + +## Problem statement +Given a set of customers, assume each customer uploads data to our backend for historical record keeping and analysis. This data arrives in the form of a **set** of `.csv` files with each file containing different data. Think of them almost as SQL Table dumps in CSV format. + +When the customer uploads the files, we have two primary objectives: +1. Ensure that all the files required for the customer are present for a particular "set" (aka "batch") of data +2. Only when we have all the files for a set, continue on to validate the structure of each file ensuring a handful of requirements: + * Each file must be UTF-8 encoded + * Depending on the file (type1, type2, etc), ensure the correct # of columns are present in the CSV file + +## Setup +To accomplish this sample, you'll need to set up a few things: + +1. Azure General Purpose Storage + * For the Functions SDK to store its dashboard info, and the Durable Functions to store their state data +1. Azure Blob Storage + * For the customer files to be uploaded in to +1. Azure Event Grid (with Storage Events) +1. ngrok to enable local Azure Function triggering from Event Grid (see this blog post for more) +1. Visual Studio 2017 v15.5.4+ with the **Azure Workload** installed. +1. The *Azure Functions and Web Jobs Tools* extension to VS, version 15.0.40108+ +1. Azure Storage Explorer (makes testing easier) + +## Execution + +Pull down the code. + +Create a new file in the `AzureFunctions.v2` **project** called `local.settings.json` with the following content: +```js +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "", + "AzureWebJobsDashboard": "", + + "CustomerBlobStorage": "", + "ValidateFunctionUrl": "http://localhost:7071/api/Validate" + } +} +``` + +This file will be used across the functions, durable or otherwise. + +Next, run any of the Function apps in this solution. You can use the v1 (.Net Framework) or the v2 (.Net Core) version, it's only needed for Event Grid validation. +With the function running, add an Event Grid Subscription to the Blob Storage account (from step 2), pointing to the ngrok-piped endpoint you created in step 4. The URL should look something like this: `https://b3252cc3.ngrok.io/api/Orchestrator` + +![](https://brandonhmsdnblog.blob.core.windows.net/images/2018/01/17/s2018-01-17_14-59-32.png) + +Upon saving this subscription, you'll see your locally-running Function get hit with a request and return HTTP OK, then the Subscription will go green in Azure and you're set. + +Now, open Azure Storage Explorer and connect to the *Blob* Storage Account you've created. In here, create a container named `cust1`. Inside the container, create a new folder called `inbound`. + +Take one of the `.csv` files from the `sampledata` folder of this repo, and drop it in to the inbound folder. + +If you're using one of the Durable functions, you should see your local function's `/api/Orchestrator` endpoint get hit. + +### Durable Function Execution +1. Determine the "batch prefix" of the file that was dropped. This consists of the customer name (cust1), and a datetime stamp in the format YYYYMMDD_HHMM, making the batch prefix for the first batch in `sampledata` defined as `cust1_20171010_1112` +1. Check to see if a sub-orchestration for this batch already exists. +2. If not, spin one up and pass along the Event Grid data that triggered this execution +3. If so, use `RaiseEvent` to pass the filename along to the instance. + +In the `EnsureAllFiles` sub-orchestration, we look up what files we need for this customer (cust1) and check to see which files have come through thus far. As long as we do *not* have the files we need, we loop within the orchestration. Each time waiting for an external `newfile` event to be thrown to let us know a new file has come through and should be processed. + +When we find we have all the files that constitute a "batch" for the customer, we call the `ValidateFileSet` activity function to process each file in the set and validate the structure of them according to our rules. + +When Validation completes successfully, all files from the batch are moved to a `valid-set` subfolder in the blob storage container. If validation fails (try removing a column in one of the lines in one of the files), the whole set gets moved to `invalid-set` + +#### Resetting Execution +Because of the persistent behavior of state for Durable Functions, if you need to reset the execution because something goes wrong it's not as simple as just re-running the function. To do this properly, you must: +* **Delete the `DurableFunctionsHubHistory` Table** in the "General Purpose" Storage Account you created in Step 1 above. +* Delete any files you uploaded to the `/inbound` directory of the blob storage container triggering the Functions. + +**Note**: after doing these steps you'll have to wait a minute or so before running either of the Durable Function implementations as the storage table creation will error with 409 CONFLICT while deletion takes place. + +### "Classic" Function execution +1. Determine the "batch prefix" of the file that was dropped. This consists of the customer name (cust1), and a datetime stamp in the format YYYYMMDD_HHMM, making the batch prefix for the first batch in `sampledata` defined as `cust1_20171010_1112` +1. Check to see if we have all necessary files in blob storage with this prefix. +1. If we do, check to see if there's a lock entry in the `FileProcessingLocks` table of the General Purpose Storage Account containing this prefix. If so, bail. If not, create one, then call the `ValidateFunctionUrl` endpoint with the batch prefix as payload. +1. The Validate function gets the request & checks to see if the lock is marked as 'in progress'. If so, bail. If not, mark it as such and continue validating the files in the Blob Storage account which match the prefix passed in. + +When Validation completes successfully, all files from the batch are moved to a `valid-set` subfolder in the blob storage container. If validation fails (try removing a column in one of the lines in one of the files), the whole set gets moved to `invalid-set` + +#### Resetting Execution +* Delete the `FileProcessingLocks` table from the General Purpose Storage Account. +* Delete any files you uploaded to the `/inbound` directory of the blob storage container triggering the Functions. + +**Note**: after doing these steps you'll have to wait a minute or so before running either of the Durable Function implementations as the storage table creation will error with 409 CONFLICT while deletion takes place. + +### Logic Apps +While not identically behaved, this repo also contains deployment scripts for two Logic App instances which perform roughly the same flow. +#### Batch Processor +This LA gets Storage Events from event grid, pulls off the full prefix of the file (also containing the URL), and sends this on to... +#### Batch Receiver +This receives events from the Processor and waits for 3 containing the same prefix to arrive before sending the batch on to the next step (you can change this to be whatever you want after deployment) + +## Known issues +#### Functions (all up) +* The `400 BAD REQUEST` return if errors are found in the set suffers from [this bug](https://github.com/Azure/azure-functions-host/issues/2475) on the Functions v2 runtime as of this writing. +#### Durable Functions +* If you drop all the files in at once, there exists a race condition when the events fired from Event Grid hit the top-level Orchestrator endpoint; it doesn't execute `StartNewAsync` fast enough and instead of one instance per batch, you'll end up with multiple instances for the same prefix (even though we desire one instance per, acting like a singleton). + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. + +When you submit a pull request, a CLA bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..e0dfff5 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + diff --git a/sampledata/cust1_20171010_1112_type1.csv b/sampledata/cust1_20171010_1112_type1.csv new file mode 100644 index 0000000..c91cfca --- /dev/null +++ b/sampledata/cust1_20171010_1112_type1.csv @@ -0,0 +1,5 @@ +"lorem","lorem","lorem","lorem" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type10.csv b/sampledata/cust1_20171010_1112_type10.csv new file mode 100644 index 0000000..97a7e9c --- /dev/null +++ b/sampledata/cust1_20171010_1112_type10.csv @@ -0,0 +1,8 @@ +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type2.csv b/sampledata/cust1_20171010_1112_type2.csv new file mode 100644 index 0000000..22785b6 --- /dev/null +++ b/sampledata/cust1_20171010_1112_type2.csv @@ -0,0 +1,6 @@ +"lorem","lorem","lorem","lorem" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type3.csv b/sampledata/cust1_20171010_1112_type3.csv new file mode 100644 index 0000000..ed18a98 --- /dev/null +++ b/sampledata/cust1_20171010_1112_type3.csv @@ -0,0 +1,2 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","","","lorem","lorem","","","","","","","","","" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type4.csv b/sampledata/cust1_20171010_1112_type4.csv new file mode 100644 index 0000000..fec03fe --- /dev/null +++ b/sampledata/cust1_20171010_1112_type4.csv @@ -0,0 +1,3 @@ +"lorem","lorem","lorem" +"lorem","lorem","" +"lorem","lorem","" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type5.csv b/sampledata/cust1_20171010_1112_type5.csv new file mode 100644 index 0000000..5af9a37 --- /dev/null +++ b/sampledata/cust1_20171010_1112_type5.csv @@ -0,0 +1,2 @@ +"lorem","lorem" +"lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type7.csv b/sampledata/cust1_20171010_1112_type7.csv new file mode 100644 index 0000000..328823c --- /dev/null +++ b/sampledata/cust1_20171010_1112_type7.csv @@ -0,0 +1,6 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type8.csv b/sampledata/cust1_20171010_1112_type8.csv new file mode 100644 index 0000000..da9ec39 --- /dev/null +++ b/sampledata/cust1_20171010_1112_type8.csv @@ -0,0 +1,3 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" +"lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171010_1112_type9.csv b/sampledata/cust1_20171010_1112_type9.csv new file mode 100644 index 0000000..ed18a98 --- /dev/null +++ b/sampledata/cust1_20171010_1112_type9.csv @@ -0,0 +1,2 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","","","lorem","lorem","","","","","","","","","" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type1.csv b/sampledata/cust1_20171011_1112_type1.csv new file mode 100644 index 0000000..c91cfca --- /dev/null +++ b/sampledata/cust1_20171011_1112_type1.csv @@ -0,0 +1,5 @@ +"lorem","lorem","lorem","lorem" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type10.csv b/sampledata/cust1_20171011_1112_type10.csv new file mode 100644 index 0000000..97a7e9c --- /dev/null +++ b/sampledata/cust1_20171011_1112_type10.csv @@ -0,0 +1,8 @@ +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" +"lorem","lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type2.csv b/sampledata/cust1_20171011_1112_type2.csv new file mode 100644 index 0000000..22785b6 --- /dev/null +++ b/sampledata/cust1_20171011_1112_type2.csv @@ -0,0 +1,6 @@ +"lorem","lorem","lorem","lorem" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" +"lorem","lorem","","" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type3.csv b/sampledata/cust1_20171011_1112_type3.csv new file mode 100644 index 0000000..ed18a98 --- /dev/null +++ b/sampledata/cust1_20171011_1112_type3.csv @@ -0,0 +1,2 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","","","lorem","lorem","","","","","","","","","" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type4.csv b/sampledata/cust1_20171011_1112_type4.csv new file mode 100644 index 0000000..fec03fe --- /dev/null +++ b/sampledata/cust1_20171011_1112_type4.csv @@ -0,0 +1,3 @@ +"lorem","lorem","lorem" +"lorem","lorem","" +"lorem","lorem","" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type5.csv b/sampledata/cust1_20171011_1112_type5.csv new file mode 100644 index 0000000..5af9a37 --- /dev/null +++ b/sampledata/cust1_20171011_1112_type5.csv @@ -0,0 +1,2 @@ +"lorem","lorem" +"lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type7.csv b/sampledata/cust1_20171011_1112_type7.csv new file mode 100644 index 0000000..328823c --- /dev/null +++ b/sampledata/cust1_20171011_1112_type7.csv @@ -0,0 +1,6 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type8.csv b/sampledata/cust1_20171011_1112_type8.csv new file mode 100644 index 0000000..da9ec39 --- /dev/null +++ b/sampledata/cust1_20171011_1112_type8.csv @@ -0,0 +1,3 @@ +"lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem","lorem" +"lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" +"lorem","lorem","","","","lorem","","","","","","","","","","","","","","","lorem" \ No newline at end of file diff --git a/sampledata/cust1_20171011_1112_type9.csv b/sampledata/cust1_20171011_1112_type9.csv new file mode 100644 index 0000000..b3f4ab5 --- /dev/null +++ b/sampledata/cust1_20171011_1112_type9.csv @@ -0,0 +1,2 @@ +"lorem","lorem","lorem","lorem","lorem" +"lorem","","","lorem","lorem" \ No newline at end of file