From 77ddb8cb342e29cdf6ceaae7b35eed421fce843a Mon Sep 17 00:00:00 2001 From: zhilingc Date: Thu, 12 Dec 2019 18:34:29 +0800 Subject: [PATCH 01/12] Async job management --- .../java/feast/core/config/JobConfig.java | 49 +--- .../feast/core/dao/FeatureSetRepository.java | 6 +- .../feast/core/dao/JobInfoRepository.java | 2 +- .../SourceRepository.java} | 15 +- .../java/feast/core/grpc/CoreServiceImpl.java | 69 +---- .../main/java/feast/core/job/JobManager.java | 9 + .../main/java/feast/core/job/JobMonitor.java | 31 -- .../java/feast/core/job/JobUpdateTask.java | 219 ++++++++++++++ .../feast/core/job/ScheduledJobMonitor.java | 87 ------ .../core/job/dataflow/DataflowJobManager.java | 31 ++ .../core/job/dataflow/DataflowJobMonitor.java | 77 ----- .../job/dataflow/DataflowJobStateMapper.java | 2 +- .../core/job/direct/DirectJobStateMapper.java | 2 +- .../job/direct/DirectRunnerJobManager.java | 17 ++ .../job/direct/DirectRunnerJobMonitor.java | 43 --- .../java/feast/core/model/FeatureSet.java | 25 +- .../core/service/JobCoordinatorService.java | 236 ++++++--------- .../feast/core/service/JobStatusService.java | 80 ----- .../java/feast/core/service/SpecService.java | 16 +- .../feast/core/grpc/CoreServiceImplTest.java | 153 ---------- .../feast/core/job/JobUpdateTaskTest.java | 219 ++++++++++++++ .../core/job/ScheduledJobMonitorTest.java | 98 ------- .../job/dataflow/DataflowJobMonitorTest.java | 123 -------- .../service/JobCoordinatorServiceTest.java | 275 ++++++++++-------- .../feast/core/service/SpecServiceTest.java | 30 +- 25 files changed, 791 insertions(+), 1123 deletions(-) rename core/src/main/java/feast/core/{job/NoopJobMonitor.java => dao/SourceRepository.java} (71%) delete mode 100644 core/src/main/java/feast/core/job/JobMonitor.java create mode 100644 core/src/main/java/feast/core/job/JobUpdateTask.java delete mode 100644 core/src/main/java/feast/core/job/ScheduledJobMonitor.java delete mode 100644 core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java delete mode 100644 core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java delete mode 100644 core/src/main/java/feast/core/service/JobStatusService.java delete mode 100644 core/src/test/java/feast/core/grpc/CoreServiceImplTest.java create mode 100644 core/src/test/java/feast/core/job/JobUpdateTaskTest.java delete mode 100644 core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java delete mode 100644 core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java diff --git a/core/src/main/java/feast/core/config/JobConfig.java b/core/src/main/java/feast/core/config/JobConfig.java index c47bb78400..2402a14fe2 100644 --- a/core/src/main/java/feast/core/config/JobConfig.java +++ b/core/src/main/java/feast/core/config/JobConfig.java @@ -24,14 +24,10 @@ import com.google.common.base.Strings; import feast.core.config.FeastProperties.JobProperties; import feast.core.job.JobManager; -import feast.core.job.JobMonitor; -import feast.core.job.NoopJobMonitor; import feast.core.job.Runner; import feast.core.job.dataflow.DataflowJobManager; -import feast.core.job.dataflow.DataflowJobMonitor; import feast.core.job.direct.DirectJobRegistry; import feast.core.job.direct.DirectRunnerJobManager; -import feast.core.job.direct.DirectRunnerJobMonitor; import java.io.IOException; import java.security.GeneralSecurityException; import java.util.HashMap; @@ -54,7 +50,7 @@ public class JobConfig { @Bean @Autowired public JobManager getJobManager( - FeastProperties feastProperties, DirectJobRegistry directJobRegistry) throws Exception { + FeastProperties feastProperties, DirectJobRegistry directJobRegistry) { JobProperties jobProperties = feastProperties.getJobs(); Runner runner = Runner.fromString(jobProperties.getRunner()); @@ -97,49 +93,6 @@ public JobManager getJobManager( } } - /** Get a Job Monitor given the runner type and dataflow configuration. */ - @Bean - public JobMonitor getJobMonitor( - FeastProperties feastProperties, DirectJobRegistry directJobRegistry) throws Exception { - - JobProperties jobProperties = feastProperties.getJobs(); - Runner runner = Runner.fromString(jobProperties.getRunner()); - Map jobOptions = jobProperties.getOptions(); - - switch (runner) { - case DATAFLOW: - if (Strings.isNullOrEmpty(jobOptions.getOrDefault("region", null)) - || Strings.isNullOrEmpty(jobOptions.getOrDefault("project", null))) { - log.warn( - "Project and location of the Dataflow runner is not configured, will not do job monitoring"); - return new NoopJobMonitor(); - } - try { - GoogleCredential credential = - GoogleCredential.getApplicationDefault().createScoped(DataflowScopes.all()); - Dataflow dataflow = - new Dataflow( - GoogleNetHttpTransport.newTrustedTransport(), - JacksonFactory.getDefaultInstance(), - credential); - - return new DataflowJobMonitor( - dataflow, jobOptions.get("project"), jobOptions.get("region")); - } catch (IOException e) { - log.error( - "Unable to find credential required for Dataflow monitoring API: {}", e.getMessage()); - } catch (GeneralSecurityException e) { - log.error("Security exception while "); - } catch (Exception e) { - log.error("Unable to initialize DataflowJobMonitor", e); - } - case DIRECT: - return new DirectRunnerJobMonitor(directJobRegistry); - default: - return new NoopJobMonitor(); - } - } - /** Get a direct job registry */ @Bean public DirectJobRegistry directJobRegistry() { diff --git a/core/src/main/java/feast/core/dao/FeatureSetRepository.java b/core/src/main/java/feast/core/dao/FeatureSetRepository.java index ca4d6b9d1c..fd996b331c 100644 --- a/core/src/main/java/feast/core/dao/FeatureSetRepository.java +++ b/core/src/main/java/feast/core/dao/FeatureSetRepository.java @@ -36,11 +36,11 @@ public interface FeatureSetRepository extends JpaRepository List findByName(String name); // find all versions of featureSets with names matching the regex - @Query(nativeQuery = true, value = "SELECT * FROM feature_sets " - + "WHERE name LIKE ?1 ORDER BY name ASC, version ASC") + @Query( + nativeQuery = true, + value = "SELECT * FROM feature_sets " + "WHERE name LIKE ?1 ORDER BY name ASC, version ASC") List findByNameWithWildcardOrderByNameAscVersionAsc(String name); // find all feature sets and order by name and version List findAllByOrderByNameAscVersionAsc(); - } diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobInfoRepository.java index 6e5820eae7..97de53ecf2 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobInfoRepository.java @@ -28,5 +28,5 @@ public interface JobInfoRepository extends JpaRepository { List findByStatusNotIn(Collection statuses); - List findBySourceIdAndStoreName(String sourceId, String storeName); + List findBySourceIdAndStoreNameOrderByLastUpdatedDesc(String sourceId, String storeName); } diff --git a/core/src/main/java/feast/core/job/NoopJobMonitor.java b/core/src/main/java/feast/core/dao/SourceRepository.java similarity index 71% rename from core/src/main/java/feast/core/job/NoopJobMonitor.java rename to core/src/main/java/feast/core/dao/SourceRepository.java index c71010c242..09214a9b68 100644 --- a/core/src/main/java/feast/core/job/NoopJobMonitor.java +++ b/core/src/main/java/feast/core/dao/SourceRepository.java @@ -14,15 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.core.job; +package feast.core.dao; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; +import feast.core.model.Source; +import org.springframework.data.jpa.repository.JpaRepository; -public class NoopJobMonitor implements JobMonitor { - - @Override - public JobStatus getJobStatus(JobInfo job) { - return JobStatus.UNKNOWN; - } -} +/** JPA repository supplying Source objects keyed by id. */ +public interface SourceRepository extends JpaRepository {} diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index 6387fd806b..1d42cfb355 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -16,7 +16,6 @@ */ package feast.core.grpc; -import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceGrpc.CoreServiceImplBase; import feast.core.CoreServiceProto.ApplyFeatureSetRequest; @@ -28,43 +27,28 @@ import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.ListStoresRequest; -import feast.core.CoreServiceProto.ListStoresRequest.Filter; import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; -import feast.core.CoreServiceProto.UpdateStoreResponse.Status; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto; -import feast.core.StoreProto.Store; -import feast.core.StoreProto.Store.Subscription; import feast.core.exception.RetrievalException; import feast.core.grpc.interceptors.MonitoringInterceptor; -import feast.core.service.JobCoordinatorService; import feast.core.service.SpecService; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.lognet.springboot.grpc.GRpcService; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.transaction.annotation.Transactional; -/** - * Implementation of the feast core GRPC service. - */ +/** Implementation of the feast core GRPC service. */ @Slf4j @GRpcService(interceptors = {MonitoringInterceptor.class}) public class CoreServiceImpl extends CoreServiceImplBase { private SpecService specService; - private JobCoordinatorService jobCoordinatorService; @Autowired - public CoreServiceImpl(SpecService specService, JobCoordinatorService jobCoordinatorService) { + public CoreServiceImpl(SpecService specService) { this.specService = specService; - this.jobCoordinatorService = jobCoordinatorService; } @Override @@ -118,31 +102,6 @@ public void applyFeatureSet( ApplyFeatureSetRequest request, StreamObserver responseObserver) { try { ApplyFeatureSetResponse response = specService.applyFeatureSet(request.getFeatureSet()); - ListStoresResponse stores = specService.listStores(Filter.newBuilder().build()); - for (Store store : stores.getStoreList()) { - Set featureSetSpecs = new HashSet<>(); - for (Subscription subscription : store.getSubscriptionsList()) { - featureSetSpecs.addAll( - specService - .listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion()) - .build()) - .getFeatureSetsList()); - } - if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { - // We use the response featureSet source because it contains the information - // about whether to default to the default feature stream or not - SourceProto.Source source = response.getFeatureSet().getSource(); - featureSetSpecs = - featureSetSpecs.stream() - .filter(fs -> fs.getSource().equals(source)) - .collect(Collectors.toSet()); - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSetSpecs), source, store); - } - } responseObserver.onNext(response); responseObserver.onCompleted(); } catch (Exception e) { @@ -158,30 +117,6 @@ public void updateStore( UpdateStoreResponse response = specService.updateStore(request); responseObserver.onNext(response); responseObserver.onCompleted(); - - if (!response.getStatus().equals(Status.NO_CHANGE)) { - Set featureSetSpecs = new HashSet<>(); - Store store = response.getStore(); - for (Subscription subscription : store.getSubscriptionsList()) { - featureSetSpecs.addAll( - specService - .listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion()) - .build()) - .getFeatureSetsList()); - } - if (featureSetSpecs.size() == 0) { - return; - } - featureSetSpecs.stream() - .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) - .entrySet() - .stream() - .forEach( - kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), kv.getKey(), store)); - } } catch (Exception e) { log.error("Exception has occurred in UpdateStore method: ", e); responseObserver.onError(e); diff --git a/core/src/main/java/feast/core/job/JobManager.java b/core/src/main/java/feast/core/job/JobManager.java index 5147671c84..d48ee77c3f 100644 --- a/core/src/main/java/feast/core/job/JobManager.java +++ b/core/src/main/java/feast/core/job/JobManager.java @@ -19,6 +19,7 @@ import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto.Store; import feast.core.model.JobInfo; +import feast.core.model.JobStatus; import java.util.List; public interface JobManager { @@ -54,4 +55,12 @@ public interface JobManager { * @param extId runner specific job id. */ void abortJob(String extId); + + /** + * Get status of a job given runner-specific job ID. + * + * @param job job. + * @return job status. + */ + JobStatus getJobStatus(JobInfo job); } diff --git a/core/src/main/java/feast/core/job/JobMonitor.java b/core/src/main/java/feast/core/job/JobMonitor.java deleted file mode 100644 index 740f4bdb87..0000000000 --- a/core/src/main/java/feast/core/job/JobMonitor.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job; - -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; - -public interface JobMonitor { - - /** - * Get status of a job given runner-specific job ID. - * - * @param job job. - * @return job status. - */ - JobStatus getJobStatus(JobInfo job); -} diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java new file mode 100644 index 0000000000..5becbc6ee7 --- /dev/null +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job; + +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.StoreProto; +import feast.core.log.Action; +import feast.core.log.AuditLogger; +import feast.core.log.Resource; +import feast.core.model.FeatureSet; +import feast.core.model.JobInfo; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + +/** + * JobUpdateTask is a callable that starts or updates a job given a set of featureSetSpecs, as well + * as their source and sink. + * + *

When complete, the JobUpdateTask returns the updated JobInfo object to be pushed to the db. + */ +@Slf4j +public class JobUpdateTask implements Callable { + + private final long JOB_UPDATE_TIMEOUT_SECONDS = 240; // 4 minutes + + private final List featureSetSpecs; + private final SourceProto.Source sourceSpec; + private final StoreProto.Store store; + private final Optional originalJob; + private JobManager jobManager; + + public JobUpdateTask( + List featureSetSpecs, + SourceProto.Source sourceSpec, + StoreProto.Store store, + Optional originalJob, + JobManager jobManager) { + + this.featureSetSpecs = featureSetSpecs; + this.sourceSpec = sourceSpec; + this.store = store; + this.originalJob = originalJob; + this.jobManager = jobManager; + } + + @Override + public JobInfo call() { + ExecutorService executorService = Executors.newSingleThreadExecutor(); + Source source = Source.fromProto(sourceSpec); + Future submittedJob; + if (originalJob.isPresent()) { + Set existingFeatureSetsPopulatedByJob = + originalJob.get().getFeatureSets().stream() + .map(FeatureSet::getId) + .collect(Collectors.toSet()); + Set newFeatureSetsPopulatedByJob = + featureSetSpecs.stream() + .map(fs -> fs.getName() + ":" + fs.getVersion()) + .collect(Collectors.toSet()); + if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() + && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { + JobInfo job = originalJob.get(); + JobStatus newJobStatus = jobManager.getJobStatus(job); + if (newJobStatus != job.getStatus()) { + AuditLogger.log( + Resource.JOB, + job.getId(), + Action.STATUS_CHANGE, + "Job status updated: changed from %s to %s", + job.getStatus(), + newJobStatus); + } + job.setStatus(newJobStatus); + return job; + } else { + submittedJob = + executorService.submit(() -> updateJob(originalJob.get(), featureSetSpecs, store)); + } + } else { + String jobId = createJobId(source.getId(), store.getName()); + submittedJob = + executorService.submit(() -> startJob(jobId, featureSetSpecs, sourceSpec, store)); + } + + JobInfo job = null; + try { + job = submittedJob.get(getJobUpdateTimeoutSeconds(), TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + log.warn("Unable to start job for source {} and sink {}: {}", source, store, e.getMessage()); + executorService.shutdownNow(); + } + return job; + } + + /** Start or update the job to ingest data to the sink. */ + private JobInfo startJob( + String jobId, + List featureSetSpecs, + SourceProto.Source source, + StoreProto.Store sinkSpec) { + + List featureSets = + featureSetSpecs.stream() + .map( + spec -> { + FeatureSet featureSet = new FeatureSet(); + featureSet.setId(spec.getName() + ":" + spec.getVersion()); + return featureSet; + }) + .collect(Collectors.toList()); + String extId = ""; + try { + AuditLogger.log( + Resource.JOB, + jobId, + Action.SUBMIT, + "Building graph and submitting to %s", + jobManager.getRunnerType().getName()); + + extId = jobManager.startJob(jobId, featureSetSpecs, sinkSpec); + if (extId.isEmpty()) { + throw new RuntimeException( + String.format("Could not submit job: \n%s", "unable to retrieve job external id")); + } + + AuditLogger.log( + Resource.JOB, + jobId, + Action.STATUS_CHANGE, + "Job submitted to runner %s with ext id %s.", + jobManager.getRunnerType().getName(), + extId); + + return new JobInfo( + jobId, + extId, + jobManager.getRunnerType().getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(sinkSpec), + featureSets, + JobStatus.RUNNING); + } catch (Exception e) { + AuditLogger.log( + Resource.JOB, + jobId, + Action.STATUS_CHANGE, + "Job failed to be submitted to runner %s. Job status changed to ERROR.", + jobManager.getRunnerType().getName()); + + return new JobInfo( + jobId, + extId, + jobManager.getRunnerType().getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(sinkSpec), + featureSets, + JobStatus.ERROR); + } + } + + /** Update the given job */ + private JobInfo updateJob( + JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { + jobInfo.setFeatureSets( + featureSetSpecs.stream() + .map(spec -> FeatureSet.fromProto(spec)) + .collect(Collectors.toList())); + jobInfo.setStore(feast.core.model.Store.fromProto(store)); + AuditLogger.log( + Resource.JOB, + jobInfo.getId(), + Action.UPDATE, + "Updating job %s for runner %s", + jobInfo.getId(), + jobManager.getRunnerType().getName()); + String extId = jobManager.updateJob(jobInfo); + jobInfo.setExtId(extId); + return jobInfo; + } + + String createJobId(String sourceId, String storeName) { + String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); + String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); + String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; + return jobId.replaceAll("_", "-"); + } + + long getJobUpdateTimeoutSeconds() { + return JOB_UPDATE_TIMEOUT_SECONDS; + } +} diff --git a/core/src/main/java/feast/core/job/ScheduledJobMonitor.java b/core/src/main/java/feast/core/job/ScheduledJobMonitor.java deleted file mode 100644 index cc87d5fcf6..0000000000 --- a/core/src/main/java/feast/core/job/ScheduledJobMonitor.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job; - -import com.google.common.base.Strings; -import feast.core.dao.JobInfoRepository; -import feast.core.log.Action; -import feast.core.log.AuditLogger; -import feast.core.log.Resource; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import java.util.Collection; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Component; -import org.springframework.transaction.annotation.Transactional; - -@Slf4j -@Component -public class ScheduledJobMonitor { - - private final JobMonitor jobMonitor; - private final JobInfoRepository jobInfoRepository; - - @Autowired - public ScheduledJobMonitor(JobMonitor jobMonitor, JobInfoRepository jobInfoRepository) { - this.jobMonitor = jobMonitor; - this.jobInfoRepository = jobInfoRepository; - } - - // TODO: Keep receiving the following exception with these arguments below - // Caused by: java.lang.IllegalStateException: Encountered invalid @Scheduled method - // 'pollStatusAndMetrics': Circular placeholder reference .. in property definitions - // @Scheduled( - // fixedDelayString = "${feast.jobs.monitor.fixedDelay}", - // initialDelayString = "${feast.jobs.monitor.initialDelay}") - // - @Transactional - @Scheduled(cron = "* * * * * *") - public void pollStatusAndMetrics() { - updateJobStatus(); - } - - /** Periodically pull status of job which is not in terminal state and update the status in DB. */ - /* package */ void updateJobStatus() { - if (jobMonitor instanceof NoopJobMonitor) { - return; - } - - Collection nonTerminalJobs = - jobInfoRepository.findByStatusNotIn(JobStatus.getTerminalState()); - - for (JobInfo job : nonTerminalJobs) { - String jobId = job.getExtId(); - if (Strings.isNullOrEmpty(jobId)) { - continue; - } - JobStatus jobStatus = jobMonitor.getJobStatus(job); - if (job.getStatus() != jobStatus) { - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job status updated from %s to %s", - job.getStatus(), - jobStatus); - } - job.setStatus(jobStatus); - jobInfoRepository.save(job); - } - } -} diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 4e4533c4c9..08b1acb1b0 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -32,6 +32,7 @@ import feast.core.job.Runner; import feast.core.model.FeatureSet; import feast.core.model.JobInfo; +import feast.core.model.JobStatus; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -125,6 +126,36 @@ public void abortJob(String dataflowJobId) { } } + /** + * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. + * + * @param jobInfo JobInfo containing dataflow job id + * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. + */ + @Override + public JobStatus getJobStatus(JobInfo jobInfo) { + if (!Runner.DATAFLOW.getName().equals(jobInfo.getRunner())) { + return jobInfo.getStatus(); + } + + try { + Job job = + dataflow + .projects() + .locations() + .jobs() + .get(projectId, location, jobInfo.getExtId()) + .execute(); + return DataflowJobStateMapper.map(job.getCurrentState()); + } catch (Exception e) { + log.error( + "Unable to retrieve status of a dataflow job with id : {}\ncause: {}", + jobInfo.getExtId(), + e.getMessage()); + } + return JobStatus.UNKNOWN; + } + private String submitDataflowJob( String jobName, List featureSets, Store sink, boolean update) { try { diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java deleted file mode 100644 index 9394878548..0000000000 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.dataflow; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.model.Job; -import feast.core.job.JobMonitor; -import feast.core.job.Runner; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -public class DataflowJobMonitor implements JobMonitor { - - private final String projectId; - private final String location; - private final Dataflow dataflow; - private final DataflowJobStateMapper jobStateMaper; - - private static final String METRICS_NAMESPACE_KEY = "namespace"; - private static final String FEAST_METRICS_NAMESPACE = "feast"; - - public DataflowJobMonitor(Dataflow dataflow, String projectId, String location) { - checkNotNull(projectId); - checkNotNull(location); - this.projectId = projectId; - this.location = location; - this.dataflow = dataflow; - this.jobStateMaper = new DataflowJobStateMapper(); - } - - /** - * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. - * - * @param jobInfo dataflow job id. - * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. - */ - public JobStatus getJobStatus(JobInfo jobInfo) { - if (!Runner.DATAFLOW.getName().equals(jobInfo.getRunner())) { - return jobInfo.getStatus(); - } - - try { - Job job = - dataflow - .projects() - .locations() - .jobs() - .get(projectId, location, jobInfo.getExtId()) - .execute(); - return jobStateMaper.map(job.getCurrentState()); - } catch (Exception e) { - log.error( - "Unable to retrieve status of a dataflow job with id : {}\ncause: {}", - jobInfo.getExtId(), - e.getMessage()); - } - return JobStatus.UNKNOWN; - } -} diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java index c94c84ce8e..ec5738be69 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java @@ -58,7 +58,7 @@ public class DataflowJobStateMapper { * @return JobStatus. * @throws IllegalArgumentException if jobState is invalid. */ - public JobStatus map(String jobState) { + public static JobStatus map(String jobState) { DataflowJobState dfJobState = DataflowJobState.valueOf(jobState); if (DATAFLOW_TO_FEAST_JOB_STATUS.containsKey(dfJobState)) { return DATAFLOW_TO_FEAST_JOB_STATUS.get(dfJobState); diff --git a/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java b/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java index e0e521e6a4..dd1c81d83e 100644 --- a/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java +++ b/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java @@ -42,7 +42,7 @@ public class DirectJobStateMapper { * @param jobState beam PipelineResult State * @return JobStatus */ - public JobStatus map(State jobState) { + public static JobStatus map(State jobState) { return BEAM_TO_FEAT_JOB_STATUS.get(jobState); } } diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index a09fd39495..5027cabdef 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -28,6 +28,7 @@ import feast.core.job.Runner; import feast.core.model.FeatureSet; import feast.core.model.JobInfo; +import feast.core.model.JobStatus; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -157,4 +158,20 @@ public void abortJob(String extId) { public PipelineResult runPipeline(ImportOptions pipelineOptions) throws IOException { return ImportJob.runPipeline(pipelineOptions); } + + /** + * Gets the state of the direct runner job. Direct runner jobs only have 2 states: RUNNING and + * ABORTED. + * + * @param job JobInfo of the desired job. + * @return JobStatus of the job. + */ + @Override + public JobStatus getJobStatus(JobInfo job) { + DirectJob directJob = jobs.get(job.getId()); + if (directJob == null) { + return JobStatus.ABORTED; + } + return DirectJobStateMapper.map(directJob.getPipelineResult().getState()); + } } diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java deleted file mode 100644 index 50d02b2728..0000000000 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.direct; - -import feast.core.job.JobMonitor; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -public class DirectRunnerJobMonitor implements JobMonitor { - - private final DirectJobRegistry jobs; - private final DirectJobStateMapper jobStateMapper; - - public DirectRunnerJobMonitor(DirectJobRegistry jobs) { - this.jobs = jobs; - jobStateMapper = new DirectJobStateMapper(); - } - - @Override - public JobStatus getJobStatus(JobInfo job) { - DirectJob directJob = jobs.get(job.getId()); - if (directJob == null) { - return JobStatus.ABORTED; - } - return jobStateMapper.map(directJob.getPipelineResult().getState()); - } -} diff --git a/core/src/main/java/feast/core/model/FeatureSet.java b/core/src/main/java/feast/core/model/FeatureSet.java index 8ba7162d2f..755ef687e3 100644 --- a/core/src/main/java/feast/core/model/FeatureSet.java +++ b/core/src/main/java/feast/core/model/FeatureSet.java @@ -155,50 +155,49 @@ public FeatureSetSpec toProto() throws InvalidProtocolBufferException { * @return boolean denoting if the source or schema have changed. */ public boolean equalTo(FeatureSet other) { - if(!name.equals(other.getName())){ + if (!name.equals(other.getName())) { return false; } - if (!source.equalTo(other.getSource())){ + if (!source.equalTo(other.getSource())) { return false; } - if (maxAgeSeconds != other.maxAgeSeconds){ + if (maxAgeSeconds != other.maxAgeSeconds) { return false; } // Create a map of all fields in this feature set Map fields = new HashMap<>(); - for (Field e : entities){ + for (Field e : entities) { fields.putIfAbsent(e.getName(), e); } - for (Field f : features){ + for (Field f : features) { fields.putIfAbsent(f.getName(), f); } // Ensure map size is consistent with existing fields - if (fields.size() != other.features.size() + other.entities.size()) - { + if (fields.size() != other.features.size() + other.entities.size()) { return false; } // Ensure the other entities and fields exist in the field map - for (Field e : other.entities){ - if(!fields.containsKey(e.getName())){ + for (Field e : other.entities) { + if (!fields.containsKey(e.getName())) { return false; } - if (!e.equals(fields.get(e.getName()))){ + if (!e.equals(fields.get(e.getName()))) { return false; } } - for (Field f : features){ - if(!fields.containsKey(f.getName())){ + for (Field f : features) { + if (!fields.containsKey(f.getName())) { return false; } - if (!f.equals(fields.get(f.getName()))){ + if (!f.equals(fields.get(f.getName()))) { return false; } } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index c56531a9da..521991d58f 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -16,30 +16,33 @@ */ package feast.core.service; -import com.google.common.base.Strings; +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.CoreServiceProto.ListFeatureSetsRequest; +import feast.core.CoreServiceProto.ListStoresRequest.Filter; +import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto; import feast.core.StoreProto; +import feast.core.StoreProto.Store.Subscription; import feast.core.dao.JobInfoRepository; -import feast.core.exception.JobExecutionException; -import feast.core.exception.RetrievalException; import feast.core.job.JobManager; -import feast.core.log.Action; -import feast.core.log.AuditLogger; -import feast.core.log.Resource; -import feast.core.model.FeatureSet; +import feast.core.job.JobUpdateTask; import feast.core.model.JobInfo; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; -import java.time.Instant; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -47,165 +50,102 @@ @Service public class JobCoordinatorService { + private final long POLLING_INTERVAL_MILLISECONDS = 60000; // 1 min private JobInfoRepository jobInfoRepository; + private SpecService specService; private JobManager jobManager; @Autowired - public JobCoordinatorService(JobInfoRepository jobInfoRepository, JobManager jobManager) { + public JobCoordinatorService( + JobInfoRepository jobInfoRepository, SpecService specService, JobManager jobManager) { this.jobInfoRepository = jobInfoRepository; + this.specService = specService; this.jobManager = jobManager; } /** - * Start or update a job given the list of FeatureSets to populate and the store to sink to. If - * there has been no change in the featureSet, and there is a running job for the featureSet, this - * method will do nothing. + * Poll does the following: + * + *

1) Checks DB and extracts jobs that have to run based on the specs available + * + *

2) Does a diff with the current set of jobs, starts/updates job(s) if necessary + * + *

3) Updates job object in DB with status, feature sets */ @Transactional - public JobInfo startOrUpdateJob( - List featureSetSpecs, SourceProto.Source sourceSpec, StoreProto.Store store) { - Source source = Source.fromProto(sourceSpec); - Optional job = getJob(source.getId(), store.getName()); - if (job.isPresent()) { - Set existingFeatureSetsPopulatedByJob = - job.get().getFeatureSets().stream().map(FeatureSet::getId).collect(Collectors.toSet()); - Set newFeatureSetsPopulatedByJob = + @Scheduled(fixedDelay = POLLING_INTERVAL_MILLISECONDS) + public void Poll() { + log.info("Polling for new jobs..."); + List jobUpdateTasks = new ArrayList<>(); + ListStoresResponse listStoresResponse = specService.listStores(Filter.newBuilder().build()); + for (StoreProto.Store store : listStoresResponse.getStoreList()) { + Set featureSetSpecs = new HashSet<>(); + try { + for (Subscription subscription : store.getSubscriptionsList()) { + featureSetSpecs.addAll( + specService + .listFeatureSets( + ListFeatureSetsRequest.Filter.newBuilder() + .setFeatureSetName(subscription.getName()) + .setFeatureSetVersion(subscription.getVersion()) + .build()) + .getFeatureSetsList()); + } + if (!featureSetSpecs.isEmpty()) { featureSetSpecs.stream() - .map(fs -> fs.getName() + ":" + fs.getVersion()) - .collect(Collectors.toSet()); - if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() - && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { - return job.get(); - } else { - return updateJob(job.get(), featureSetSpecs, store); + .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) + .entrySet() + .stream() + .forEach( + kv -> { + Optional originalJob = + getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); + jobUpdateTasks.add( + new JobUpdateTask( + kv.getValue(), kv.getKey(), store, originalJob, jobManager)); + }); + } + } catch (InvalidProtocolBufferException e) { + log.warn("Unable to retrieve feature sets for store {}: {}", store, e.getMessage()); } - } else { - return startJob( - createJobId(source.getId(), store.getName()), featureSetSpecs, sourceSpec, store); } - } - - /** Get the non-terminal job associated with the given featureSet name and store name, if any. */ - private Optional getJob(String sourceId, String storeName) { - List jobs = jobInfoRepository.findBySourceIdAndStoreName(sourceId, storeName); - if (jobs.isEmpty()) { - return Optional.empty(); + if (jobUpdateTasks.size() == 0) { + log.info("No jobs found."); + return; } - return jobs.stream() - .filter(job -> !(JobStatus.getTerminalState().contains(job.getStatus()))) - .findFirst(); - } - - /** Start or update the job to ingest data to the sink. */ - private JobInfo startJob( - String jobId, - List featureSetSpecs, - SourceProto.Source source, - StoreProto.Store sinkSpec) { - try { - AuditLogger.log( - Resource.JOB, - jobId, - Action.SUBMIT, - "Building graph and submitting to %s", - jobManager.getRunnerType().getName()); - String extId = jobManager.startJob(jobId, featureSetSpecs, sinkSpec); - if (extId.isEmpty()) { - throw new RuntimeException( - String.format("Could not submit job: \n%s", "unable to retrieve job external id")); + log.info("Creating/Updating {} jobs...", jobUpdateTasks.size()); + ExecutorService executorService = Executors.newFixedThreadPool(jobUpdateTasks.size()); + ExecutorCompletionService ecs = new ExecutorCompletionService<>(executorService); + jobUpdateTasks.forEach(ecs::submit); + + int completedTasks = 0; + while (completedTasks < jobUpdateTasks.size()) { + try { + JobInfo jobInfo = ecs.take().get(); + if (jobInfo != null) { + jobInfoRepository.saveAndFlush(jobInfo); + } + } catch (ExecutionException | InterruptedException e) { + log.warn("Unable to start or update job: {}", e.getMessage()); } - - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job submitted to runner %s with ext id %s.", - jobManager.getRunnerType().getName(), - extId); - - List featureSets = new ArrayList<>(); - - for (FeatureSetSpec featureSetSpec : featureSetSpecs) { - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(featureSetSpec.getName() + ":" + featureSetSpec.getVersion()); - featureSets.add(featureSet); - } - - JobInfo jobInfo = - new JobInfo( - jobId, - extId, - jobManager.getRunnerType().getName(), - Source.fromProto(source), - Store.fromProto(sinkSpec), - featureSets, - JobStatus.RUNNING); - - return jobInfoRepository.save(jobInfo); - } catch (Exception e) { - updateJobStatus(jobId, JobStatus.ERROR); - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job failed to be submitted to runner %s. Job status changed to ERROR.", - jobManager.getRunnerType().getName()); - throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); + completedTasks++; } } - /** Update the given job */ - private JobInfo updateJob( - JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { - jobInfo.setFeatureSets( - featureSetSpecs.stream() - .map(spec -> FeatureSet.fromProto(spec)) - .collect(Collectors.toList())); - jobInfo.setStore(Store.fromProto(store)); - String extId = jobManager.updateJob(jobInfo); - jobInfo.setExtId(extId); - return jobInfoRepository.save(jobInfo); - } - - /** - * Drain the given job. If this is successful, the job will start the draining process. When the - * draining process is complete, the job will be cleaned up and removed. - * - *

Batch jobs will be cancelled, as draining these jobs is not supported by beam. - * - * @param id feast-internal id of a job - */ - public void abortJob(String id) { - Optional jobOptional = jobInfoRepository.findById(id); - if (!jobOptional.isPresent()) { - throw new RetrievalException(Strings.lenientFormat("Unable to retrieve job with id %s", id)); - } - JobInfo job = jobOptional.get(); - if (JobStatus.getTerminalState().contains(job.getStatus())) { - throw new IllegalStateException("Unable to stop job already in terminal state"); - } - jobManager.abortJob(job.getExtId()); - job.setStatus(JobStatus.ABORTING); - - AuditLogger.log(Resource.JOB, id, Action.ABORT, "Triggering draining of job"); - jobInfoRepository.saveAndFlush(job); - } - - /** Update a given job's status */ - public void updateJobStatus(String jobId, JobStatus status) { - Optional jobRecordOptional = jobInfoRepository.findById(jobId); - if (jobRecordOptional.isPresent()) { - JobInfo jobRecord = jobRecordOptional.get(); - jobRecord.setStatus(status); - jobInfoRepository.save(jobRecord); + @Transactional + public Optional getJob(Source source, Store store) { + List jobs = + jobInfoRepository.findBySourceIdAndStoreNameOrderByLastUpdatedDesc( + source.getId(), store.getName()); + jobs = + jobs.stream() + .filter(job -> !JobStatus.getTerminalState().contains(job.getStatus())) + .collect(Collectors.toList()); + if (jobs.size() == 0) { + return Optional.empty(); } - } - - public String createJobId(String sourceId, String storeName) { - String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); - String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); - String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; - return jobId.replaceAll("_", "-"); + // return the latest + return Optional.of(jobs.get(0)); } } diff --git a/core/src/main/java/feast/core/service/JobStatusService.java b/core/src/main/java/feast/core/service/JobStatusService.java deleted file mode 100644 index db6cd41ee8..0000000000 --- a/core/src/main/java/feast/core/service/JobStatusService.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.service; - -import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Service; - -@Slf4j -@Service -public class JobStatusService { - // - // private JobInfoRepository jobInfoRepository; - // private MetricsRepository metricsRepository; - // - // @Autowired - // public JobStatusService( - // JobInfoRepository jobInfoRepository, - // MetricsRepository metricsRepository) { - // this.jobInfoRepository = jobInfoRepository; - // this.metricsRepository = metricsRepository; - // } - // - // /** - // * Lists all jobs registered to the db, sorted by provided orderBy - // * - // * @param orderBy list order - // * @return list of JobDetails - // */ - // @Transactional - // public List listJobs(Sort orderBy) { - // List jobs = jobInfoRepository.findAll(orderBy); - // return jobs.stream().map(JobInfo::getJobDetail).collect(Collectors.toList()); - // } - // - // /** - // * Lists all jobs registered to the db, sorted chronologically by creation time - // * - // * @return list of JobDetails - // */ - // @Transactional - // public List listJobs() { - // return listJobs(Sort.by(Sort.Direction.ASC, "created")); - // } - // - // /** - // * Gets information regarding a single job. - // * - // * @param id feast-internal job id - // * @return JobDetail for that job - // */ - // @Transactional - // public JobDetail getJob(String id) { - // Optional job = jobInfoRepository.findById(id); - // if (!job.isPresent()) { - // throw new RetrievalException(Strings.lenientFormat("Unable to retrieve job with id %s", - // id)); - // } - // JobDetail.Builder jobDetailBuilder = job.get().getJobDetail().toBuilder(); - // List metrics = metricsRepository.findByJobInfo_Id(id); - // for (Metrics metric : metrics) { - // jobDetailBuilder.putMetrics(metric.getName(), metric.getValue()); - // } - // return jobDetailBuilder.build(); - // } - -} diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index 4ea2d288f2..64c747641f 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -110,8 +110,9 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) if (featureSet == null) { throw io.grpc.Status.NOT_FOUND - .withDescription(String.format("Feature set with name \"%s\" could not be found.", - request.getName())) + .withDescription( + String.format( + "Feature set with name \"%s\" could not be found.", request.getName())) .asRuntimeException(); } } else { @@ -121,13 +122,14 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) if (featureSet == null) { throw io.grpc.Status.NOT_FOUND - .withDescription(String.format("Feature set with name \"%s\" and version \"%s\" could " - + "not be found.", request.getName(), request.getVersion())) + .withDescription( + String.format( + "Feature set with name \"%s\" and version \"%s\" could " + "not be found.", + request.getName(), request.getVersion())) .asRuntimeException(); } } - // Only a single item in list, return successfully return GetFeatureSetResponse.newBuilder().setFeatureSet(featureSet.toProto()).build(); } @@ -154,7 +156,9 @@ public ListFeatureSetsResponse listFeatureSets(ListFeatureSetsRequest.Filter fil if (name.equals("")) { featureSets = featureSetRepository.findAllByOrderByNameAscVersionAsc(); } else { - featureSets = featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc(name.replace('*', '%')); + featureSets = + featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc( + name.replace('*', '%')); featureSets = featureSets.stream() .filter(getVersionFilter(filter.getFeatureSetVersion())) diff --git a/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java b/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java deleted file mode 100644 index 5a3794cc65..0000000000 --- a/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.grpc; - -import static org.hamcrest.Matchers.containsInAnyOrder; -import static org.junit.Assert.*; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.mockito.MockitoAnnotations.initMocks; - -import com.google.protobuf.InvalidProtocolBufferException; -import feast.core.CoreServiceProto.ApplyFeatureSetRequest; -import feast.core.CoreServiceProto.ApplyFeatureSetResponse; -import feast.core.CoreServiceProto.ApplyFeatureSetResponse.Status; -import feast.core.CoreServiceProto.ListFeatureSetsRequest; -import feast.core.CoreServiceProto.ListFeatureSetsResponse; -import feast.core.CoreServiceProto.ListStoresResponse; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.Source; -import feast.core.SourceProto.SourceType; -import feast.core.StoreProto.Store; -import feast.core.StoreProto.Store.RedisConfig; -import feast.core.StoreProto.Store.StoreType; -import feast.core.StoreProto.Store.Subscription; -import feast.core.service.JobCoordinatorService; -import feast.core.service.SpecService; -import io.grpc.stub.StreamObserver; -import java.util.ArrayList; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.ArgumentMatchers; -import org.mockito.Captor; -import org.mockito.Mock; - -public class CoreServiceImplTest { - - @Mock private JobCoordinatorService jobCoordinatorService; - - @Mock private SpecService specService; - - @Captor private ArgumentCaptor> fsListArgCaptor; - - @Before - public void setUp() { - initMocks(this); - } - - @Test - public void shouldPassCorrectListOfFeatureSetsToJobService() - throws InvalidProtocolBufferException { - CoreServiceImpl coreService = new CoreServiceImpl(specService, jobCoordinatorService); - Store store = - Store.newBuilder() - .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0")) - .build(); - FeatureSetSpec fs1Sc1 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(1) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic1") - .build())) - .build(); - FeatureSetSpec fs2Sc1 = - FeatureSetSpec.newBuilder() - .setName("feature_set_other") - .setVersion(1) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic1") - .build())) - .build(); - FeatureSetSpec fs3Sc2 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(2) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic2") - .build())) - .build(); - when(specService.applyFeatureSet(fs1Sc1)) - .thenReturn( - ApplyFeatureSetResponse.newBuilder() - .setStatus(Status.CREATED) - .setFeatureSet(fs1Sc1) - .build()); - when(specService.listStores(ArgumentMatchers.any())) - .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(specService.listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName("*") - .setFeatureSetVersion(">0") - .build())) - .thenReturn( - ListFeatureSetsResponse.newBuilder() - .addFeatureSets(fs1Sc1) - .addFeatureSets(fs3Sc2) - .addFeatureSets(fs2Sc1) - .build()); - - coreService.applyFeatureSet( - ApplyFeatureSetRequest.newBuilder().setFeatureSet(fs1Sc1).build(), - new StreamObserver() { - @Override - public void onNext(ApplyFeatureSetResponse applyFeatureSetResponse) {} - - @Override - public void onError(Throwable throwable) {} - - @Override - public void onCompleted() {} - }); - - verify(jobCoordinatorService, times(1)) - .startOrUpdateJob(fsListArgCaptor.capture(), eq(fs1Sc1.getSource()), eq(store)); - - assertThat(fsListArgCaptor.getValue(), containsInAnyOrder(fs1Sc1, fs2Sc1)); - } -} diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java new file mode 100644 index 0000000000..fbbf67e5a2 --- /dev/null +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job; + +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertThat; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.initMocks; + +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; +import feast.core.StoreProto; +import feast.core.StoreProto.Store.RedisConfig; +import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; +import feast.core.model.FeatureSet; +import feast.core.model.JobInfo; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; +import java.util.Arrays; +import java.util.Optional; +import org.hamcrest.core.IsNull; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; + +public class JobUpdateTaskTest { + + @Mock private JobManager jobManager; + + private StoreProto.Store store; + private SourceProto.Source source; + + @Before + public void setUp() { + initMocks(this); + store = + StoreProto.Store.newBuilder() + .setName("test") + .setType(StoreType.REDIS) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .build(); + + source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + } + + @Test + public void shouldUpdateJobIfPresent() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + FeatureSetSpec featureSet2 = + FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source).build(); + JobInfo originalJob = + new JobInfo( + "job", + "old_ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + JobUpdateTask jobUpdateTask = + new JobUpdateTask( + Arrays.asList(featureSet1, featureSet2), + source, + store, + Optional.of(originalJob), + jobManager); + JobInfo submittedJob = + new JobInfo( + "job", + "old_ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.RUNNING); + + when(jobManager.updateJob(submittedJob)).thenReturn("new_ext"); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + JobInfo expected = + new JobInfo( + "job", + "new_ext", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.RUNNING); + JobInfo actual = jobUpdateTask.call(); + + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldCreateJobIfNotPresent() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); + doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob("job", Arrays.asList(featureSet1), store)).thenReturn("new_ext"); + + JobInfo expected = + new JobInfo( + "job", + "ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + JobInfo actual = jobUpdateTask.call(); + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldUpdateJobStatusIfNotCreateOrUpdate() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + JobInfo originalJob = + new JobInfo( + "job", + "ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + JobUpdateTask jobUpdateTask = + new JobUpdateTask( + Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager); + + when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); + JobInfo expected = + new JobInfo( + "job", + "ext", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.ABORTING); + JobInfo actual = jobUpdateTask.call(); + + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); + doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob("job", Arrays.asList(featureSet1), store)) + .thenThrow(new RuntimeException("Something went wrong")); + + JobInfo expected = + new JobInfo( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.ERROR); + JobInfo actual = jobUpdateTask.call(); + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldTimeout() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); + doReturn(0L).when(jobUpdateTask).getJobUpdateTimeoutSeconds(); + JobInfo actual = jobUpdateTask.call(); + assertThat(actual, is(IsNull.nullValue())); + } +} diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java deleted file mode 100644 index 24d1747ce4..0000000000 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job; - -import static org.hamcrest.core.IsEqual.equalTo; -import static org.junit.Assert.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.SourceType; -import feast.core.dao.JobInfoRepository; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; -import java.util.Collection; -import java.util.Collections; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -public class ScheduledJobMonitorTest { - - ScheduledJobMonitor scheduledJobMonitor; - - @Mock JobMonitor jobMonitor; - - @Mock JobInfoRepository jobInfoRepository; - - @Before - public void setUp() { - MockitoAnnotations.initMocks(this); - scheduledJobMonitor = new ScheduledJobMonitor(jobMonitor, jobInfoRepository); - } - - @Test - public void getJobStatus_shouldUpdateJobInfoForRunningJob() { - Source source = - new Source( - SourceType.KAFKA, - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("feast-topic") - .build(), - true); - JobInfo job = - new JobInfo( - "jobId", - "extId1", - "DataflowRunner", - source, - new Store(), - Collections.emptyList(), - Collections.emptyList(), - JobStatus.RUNNING); - - when(jobInfoRepository.findByStatusNotIn((Collection) any(Collection.class))) - .thenReturn(Collections.singletonList(job)); - when(jobMonitor.getJobStatus(job)).thenReturn(JobStatus.COMPLETED); - - scheduledJobMonitor.updateJobStatus(); - - ArgumentCaptor argCaptor = ArgumentCaptor.forClass(JobInfo.class); - verify(jobInfoRepository).save(argCaptor.capture()); - - JobInfo jobInfos = argCaptor.getValue(); - assertThat(jobInfos.getStatus(), equalTo(JobStatus.COMPLETED)); - } - - @Test - public void getJobStatus_shouldNotUpdateJobInfoForTerminalJob() { - when(jobInfoRepository.findByStatusNotIn((Collection) any(Collection.class))) - .thenReturn(Collections.emptyList()); - - scheduledJobMonitor.updateJobStatus(); - - verify(jobInfoRepository, never()).save(any(JobInfo.class)); - } -} diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java deleted file mode 100644 index 1311fcbdfc..0000000000 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.dataflow; - -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.Dataflow.Projects; -import com.google.api.services.dataflow.Dataflow.Projects.Locations; -import com.google.api.services.dataflow.Dataflow.Projects.Locations.Jobs; -import com.google.api.services.dataflow.Dataflow.Projects.Locations.Jobs.Get; -import com.google.api.services.dataflow.model.Job; -import com.google.common.collect.Lists; -import feast.core.job.Runner; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import feast.types.FieldProto.Field; -import feast.types.ValueProto.BoolList; -import feast.types.ValueProto.Value; -import java.io.IOException; -import org.junit.Before; -import org.junit.Test; - -public class DataflowJobMonitorTest { - - private DataflowJobMonitor monitor; - private String location; - private String projectId; - private Jobs jobService; - - @Before - public void setUp() throws Exception { - projectId = "myProject"; - location = "asia-east1"; - Dataflow dataflow = mock(Dataflow.class); - Dataflow.Projects projects = mock(Projects.class); - Dataflow.Projects.Locations locations = mock(Locations.class); - jobService = mock(Jobs.class); - when(dataflow.projects()).thenReturn(projects); - when(projects.locations()).thenReturn(locations); - when(locations.jobs()).thenReturn(jobService); - - monitor = new DataflowJobMonitor(dataflow, projectId, location); - } - - @Test - public void getJobStatus_shouldReturnCorrectJobStatusForValidDataflowJobState() - throws IOException { - String jobId = "myJobId"; - - Get getOp = mock(Get.class); - Job job = mock(Job.class); - when(getOp.execute()).thenReturn(job); - when(job.getCurrentState()).thenReturn(DataflowJobState.JOB_STATE_RUNNING.toString()); - when(jobService.get(projectId, location, jobId)).thenReturn(getOp); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.RUNNING)); - } - - @Test - public void getJobStatus_shouldReturnUnknownStateForInvalidDataflowJobState() throws IOException { - String jobId = "myJobId"; - - Get getOp = mock(Get.class); - Job job = mock(Job.class); - when(getOp.execute()).thenReturn(job); - when(job.getCurrentState()).thenReturn("Random String"); - when(jobService.get(projectId, location, jobId)).thenReturn(getOp); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.UNKNOWN)); - } - - @Test - public void getJobStatus_shouldReturnUnknownStateWhenExceptionHappen() throws IOException { - String jobId = "myJobId"; - - when(jobService.get(projectId, location, jobId)) - .thenThrow(new RuntimeException("some thing wrong")); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.UNKNOWN)); - } - - @Test - public void test() { - Field field = - Field.newBuilder() - .setName("Hello") - .setValue( - Value.newBuilder() - .setBoolListVal( - BoolList.newBuilder() - .addAllVal(Lists.newArrayList(true, false, true, true)) - .build())) - .build(); - field.getName(); - } -} diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 9bc641f92b..0d7e48f690 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -16,31 +16,40 @@ */ package feast.core.service; +import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.mockito.Mockito.spy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; -import com.google.common.collect.Lists; +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.CoreServiceProto.ListFeatureSetsRequest.Filter; +import feast.core.CoreServiceProto.ListFeatureSetsResponse; +import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.Source; import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; +import java.util.Arrays; +import java.util.List; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.ArgumentCaptor; import org.mockito.Mock; public class JobCoordinatorServiceTest { @@ -48,147 +57,179 @@ public class JobCoordinatorServiceTest { @Rule public final ExpectedException exception = ExpectedException.none(); @Mock JobInfoRepository jobInfoRepository; @Mock JobManager jobManager; - - private JobCoordinatorService jobCoordinatorService; - private JobInfo existingJob; - private Source defaultSource; + @Mock SpecService specService; @Before public void setUp() { initMocks(this); + } - Store store = - Store.fromProto( - StoreProto.Store.newBuilder() - .setName("SERVING") - .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) - .build()); - defaultSource = - new Source( - SourceType.KAFKA, - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("feast-topic") - .build(), - true); - FeatureSet featureSet1 = new FeatureSet(); - featureSet1.setId("featureSet1:1"); - featureSet1.setSource(defaultSource); - FeatureSet featureSet2 = new FeatureSet(); - featureSet2.setId("featureSet2:1"); - featureSet2.setSource(defaultSource); - existingJob = - new JobInfo( - "extid", - "name", - "DirectRunner", - defaultSource, - store, - Lists.newArrayList(featureSet1, featureSet2), - Lists.newArrayList(), - JobStatus.RUNNING); - when(jobInfoRepository.findBySourceIdAndStoreName(defaultSource.getId(), "SERVING")) - .thenReturn(Lists.newArrayList(existingJob)); - - jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); - jobCoordinatorService = spy(jobCoordinatorService); + @Test + public void shouldDoNothingIfNoStoresFound() { + when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); + JobCoordinatorService jcs = + new JobCoordinatorService(jobInfoRepository, specService, jobManager); + jcs.Poll(); + verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @Test - public void shouldNotStartOrUpdateJobIfNoChanges() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder() - .setName("featureSet1") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); - FeatureSetSpec featureSet2 = - FeatureSetSpec.newBuilder() - .setName("featureSet2") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) .build(); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet1, featureSet2), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(existingJob)); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) + .thenReturn(ListFeatureSetsResponse.newBuilder().build()); + JobCoordinatorService jcs = + new JobCoordinatorService(jobInfoRepository, specService, jobManager); + jcs.Poll(); + verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @Test - public void shouldStartJobIfNotExists() { - FeatureSetSpec featureSet = - FeatureSetSpec.newBuilder() - .setName("featureSet") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions( + Subscription.newBuilder().setName("features").setVersion(">0").build()) + .build(); + Source source = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) .build(); - String jobId = "featureSet-to-SERVING"; - String extJobId = "extId123"; - when(jobCoordinatorService.createJobId("featureSet", "SERVING")).thenReturn(jobId); - when(jobManager.startJob(jobId, Lists.newArrayList(featureSet), store)).thenReturn(extJobId); - when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); - FeatureSet expectedFeatureSet = new FeatureSet(); - expectedFeatureSet.setId("featureSet:1"); - JobInfo expectedJobInfo = + + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source).build(); + FeatureSetSpec featureSet2 = + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source).build(); + String extId = "ext"; + ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(JobInfo.class); + + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + .thenReturn( + ListFeatureSetsResponse.newBuilder() + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) + .build()); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1, featureSet2)), eq(store))) + .thenReturn(extId); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + + JobCoordinatorService jcs = + new JobCoordinatorService(jobInfoRepository, specService, jobManager); + jcs.Poll(); + verify(jobInfoRepository, times(1)).saveAndFlush(jobInfoArgCaptor.capture()); + JobInfo actual = jobInfoArgCaptor.getValue(); + JobInfo expected = new JobInfo( - jobId, - extJobId, - "DirectRunner", - defaultSource, - Store.fromProto(store), - Lists.newArrayList(expectedFeatureSet), + actual.getId(), + extId, + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); - when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(expectedJobInfo)); + assertThat(actual, equalTo(expected)); } @Test - public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { - FeatureSetSpec featureSet = - FeatureSetSpec.newBuilder() - .setName("featureSet1") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions( + Subscription.newBuilder().setName("features").setVersion(">0").build()) + .build(); + Source source1 = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) .build(); - String extId = "extId123"; - JobInfo modifiedJob = + Source source2 = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("other.servers:9092") + .build()) + .build(); + + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1).build(); + FeatureSetSpec featureSet2 = + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2).build(); + String extId1 = "ext1"; + String extId2 = "ext2"; + ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(JobInfo.class); + + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + .thenReturn( + ListFeatureSetsResponse.newBuilder() + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) + .build()); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(store))) + .thenReturn(extId1); + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(store))) + .thenReturn(extId2); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + + JobCoordinatorService jcs = + new JobCoordinatorService(jobInfoRepository, specService, jobManager); + jcs.Poll(); + + verify(jobInfoRepository, times(2)).saveAndFlush(jobInfoArgCaptor.capture()); + List actual = jobInfoArgCaptor.getAllValues(); + JobInfo expected1 = + new JobInfo( + actual.get(0).getId(), + extId1, + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source1), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + assertThat(actual.get(0), equalTo(expected1)); + + JobInfo expected2 = new JobInfo( - existingJob.getId(), - existingJob.getExtId(), - existingJob.getRunner(), - defaultSource, - Store.fromProto(store), - Lists.newArrayList(FeatureSet.fromProto(featureSet)), + actual.get(1).getId(), + extId2, + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source2), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); - when(jobManager.updateJob(modifiedJob)).thenReturn(extId); - JobInfo expectedJobInfo = modifiedJob; - expectedJobInfo.setExtId(extId); - when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(expectedJobInfo)); + assertThat(actual.get(1), equalTo(expected2)); } } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index a11adf022b..6d11267635 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -68,14 +68,11 @@ public class SpecServiceTest { - @Mock - private FeatureSetRepository featureSetRepository; + @Mock private FeatureSetRepository featureSetRepository; - @Mock - private StoreRepository storeRepository; + @Mock private StoreRepository storeRepository; - @Rule - public final ExpectedException expectedException = ExpectedException.none(); + @Rule public final ExpectedException expectedException = ExpectedException.none(); private SpecService specService; private List featureSets; @@ -102,11 +99,12 @@ public void setUp() { Field f3f1 = new Field("f3", "f3f1", Enum.INT64); Field f3f2 = new Field("f3", "f3f2", Enum.INT64); Field f3e1 = new Field("f3", "f3e1", Enum.STRING); - FeatureSet featureSet3v1 = new FeatureSet( - "f3", 1, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource); + FeatureSet featureSet3v1 = + new FeatureSet( + "f3", 1, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource); - featureSets = Arrays - .asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); + featureSets = + Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); when(featureSetRepository.findAll()).thenReturn(featureSets); when(featureSetRepository.findAllByOrderByNameAscVersionAsc()).thenReturn(featureSets); when(featureSetRepository.findByName("f1")).thenReturn(featureSets.subList(0, 3)); @@ -347,7 +345,6 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); } - @Test public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() throws InvalidProtocolBufferException { @@ -355,20 +352,21 @@ public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() Field f3f1 = new Field("f3", "f3f1", Enum.INT64); Field f3f2 = new Field("f3", "f3f2", Enum.INT64); Field f3e1 = new Field("f3", "f3e1", Enum.STRING); - FeatureSetProto.FeatureSetSpec incomingFeatureSet = (new FeatureSet( - "f3", 5, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource)).toProto(); + FeatureSetProto.FeatureSetSpec incomingFeatureSet = + (new FeatureSet( + "f3", 5, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource)) + .toProto(); FeatureSetSpec expected = incomingFeatureSet; ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); assertThat(applyFeatureSetResponse.getFeatureSet().getMaxAge(), equalTo(expected.getMaxAge())); - assertThat(applyFeatureSetResponse.getFeatureSet().getEntities(0), - equalTo(expected.getEntities(0))); + assertThat( + applyFeatureSetResponse.getFeatureSet().getEntities(0), equalTo(expected.getEntities(0))); assertThat(applyFeatureSetResponse.getFeatureSet().getName(), equalTo(expected.getName())); } - @Test public void shouldUpdateStoreIfConfigChanges() throws InvalidProtocolBufferException { when(storeRepository.findById("SERVING")).thenReturn(Optional.of(stores.get(0))); From 70a91b20eeb231459a0ff0257ac0cc5fae8d4900 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Mon, 16 Dec 2019 16:44:27 +0800 Subject: [PATCH 02/12] Add feature set status for safe ingestion --- .../java/feast/core/job/JobUpdateTask.java | 4 +- .../core/job/dataflow/DataflowJobManager.java | 2 +- .../job/direct/DirectRunnerJobManager.java | 2 +- .../core/model/AbstractTimestampEntity.java | 7 + .../java/feast/core/model/FeatureSet.java | 38 +++-- .../core/service/JobCoordinatorService.java | 53 ++++++- .../java/feast/core/service/SpecService.java | 4 +- .../feast/core/job/JobUpdateTaskTest.java | 14 +- .../service/JobCoordinatorServiceTest.java | 33 ++-- .../feast/core/service/SpecServiceTest.java | 51 +++--- .../ingestion/transform/WriteToStore.java | 18 ++- .../java/feast/ingestion/ImportJobTest.java | 87 ++++++----- .../src/test/java/feast/test/TestUtil.java | 11 +- protos/feast/core/CoreService.proto | 6 +- protos/feast/core/FeatureSet.proto | 29 ++++ sdk/python/feast/client.py | 23 ++- sdk/python/feast/core/CoreService_pb2.py | 70 ++++----- sdk/python/feast/core/CoreService_pb2.pyi | 13 +- sdk/python/feast/core/FeatureSet_pb2.py | 145 +++++++++++++++++- sdk/python/feast/core/FeatureSet_pb2.pyi | 75 +++++++++ sdk/python/feast/feature_set.py | 75 +++++++-- sdk/python/tests/feast_core_server.py | 18 ++- sdk/python/tests/test_client.py | 138 +++++++++++------ sdk/python/tests/test_stores.py | 4 +- .../serving/service/CachedSpecService.java | 6 +- .../bigquery/BatchRetrievalQueryRunnable.java | 4 +- .../service/CachedSpecServiceTest.java | 11 +- 27 files changed, 703 insertions(+), 238 deletions(-) diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java index 5becbc6ee7..94ec85c2eb 100644 --- a/core/src/main/java/feast/core/job/JobUpdateTask.java +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -38,6 +38,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** @@ -47,6 +48,7 @@ *

When complete, the JobUpdateTask returns the updated JobInfo object to be pushed to the db. */ @Slf4j +@Getter public class JobUpdateTask implements Callable { private final long JOB_UPDATE_TIMEOUT_SECONDS = 240; // 4 minutes @@ -191,7 +193,7 @@ private JobInfo updateJob( JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { jobInfo.setFeatureSets( featureSetSpecs.stream() - .map(spec -> FeatureSet.fromProto(spec)) + .map(spec -> FeatureSet.fromSpec(spec)) .collect(Collectors.toList())); jobInfo.setStore(feast.core.model.Store.fromProto(store)); AuditLogger.log( diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 08b1acb1b0..c66a0e03aa 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -88,7 +88,7 @@ public String updateJob(JobInfo jobInfo) { try { List featureSetSpecs = new ArrayList<>(); for (FeatureSet featureSet : jobInfo.getFeatureSets()) { - featureSetSpecs.add(featureSet.toProto()); + featureSetSpecs.add(featureSet.toProto().getSpec()); } return submitDataflowJob( jobInfo.getId(), featureSetSpecs, jobInfo.getStore().toProto(), true); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 5027cabdef..77ef3b5935 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -129,7 +129,7 @@ public String updateJob(JobInfo jobInfo) { try { List featureSetSpecs = new ArrayList<>(); for (FeatureSet featureSet : jobInfo.getFeatureSets()) { - featureSetSpecs.add(featureSet.toProto()); + featureSetSpecs.add(featureSet.toProto().getSpec()); } startJob(jobId, featureSetSpecs, jobInfo.getStore().toProto()); } catch (JobExecutionException | InvalidProtocolBufferException e) { diff --git a/core/src/main/java/feast/core/model/AbstractTimestampEntity.java b/core/src/main/java/feast/core/model/AbstractTimestampEntity.java index cacaa51adb..7a544a9cbe 100644 --- a/core/src/main/java/feast/core/model/AbstractTimestampEntity.java +++ b/core/src/main/java/feast/core/model/AbstractTimestampEntity.java @@ -16,6 +16,7 @@ */ package feast.core.model; +import java.time.Instant; import java.util.Date; import javax.persistence.*; import lombok.Data; @@ -44,4 +45,10 @@ protected void onCreate() { protected void onUpdate() { lastUpdated = new Date(); } + + // This constructor is used for testing. + public AbstractTimestampEntity() { + this.created = Date.from(Instant.ofEpochMilli(0L)); + this.lastUpdated = Date.from(Instant.ofEpochMilli(0L)); + } } diff --git a/core/src/main/java/feast/core/model/FeatureSet.java b/core/src/main/java/feast/core/model/FeatureSet.java index 755ef687e3..d6729469a7 100644 --- a/core/src/main/java/feast/core/model/FeatureSet.java +++ b/core/src/main/java/feast/core/model/FeatureSet.java @@ -17,9 +17,12 @@ package feast.core.model; import com.google.protobuf.Duration; -import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Timestamp; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSetMeta; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.FeatureSetProto.FeatureSpec; import feast.types.ValueProto.ValueType; import java.util.ArrayList; @@ -79,6 +82,10 @@ public class FeatureSet extends AbstractTimestampEntity implements Comparable features = new ArrayList<>(); @@ -120,7 +128,7 @@ public static FeatureSet fromProto(FeatureSetSpec featureSetSpec) { source); } - public FeatureSetSpec toProto() throws InvalidProtocolBufferException { + public FeatureSetProto.FeatureSet toProto() { List entitySpecs = new ArrayList<>(); for (Field entity : entities) { entitySpecs.add( @@ -138,14 +146,22 @@ public FeatureSetSpec toProto() throws InvalidProtocolBufferException { .setValueType(ValueType.Enum.valueOf(feature.getType())) .build()); } - return FeatureSetSpec.newBuilder() - .setName(name) - .setVersion(version) - .setMaxAge(Duration.newBuilder().setSeconds(maxAgeSeconds)) - .addAllEntities(entitySpecs) - .addAllFeatures(featureSpecs) - .setSource(source.toProto()) - .build(); + FeatureSetMeta.Builder meta = + FeatureSetMeta.newBuilder() + .setCreatedTimestamp( + Timestamp.newBuilder().setSeconds(super.getCreated().getTime() / 1000L)) + .setStatus(FeatureSetStatus.valueOf(status)); + + FeatureSetSpec.Builder spec = + FeatureSetSpec.newBuilder() + .setName(name) + .setVersion(version) + .setMaxAge(Duration.newBuilder().setSeconds(maxAgeSeconds)) + .addAllEntities(entitySpecs) + .addAllFeatures(featureSpecs) + .setSource(source.toProto()); + + return FeatureSetProto.FeatureSet.newBuilder().setMeta(meta).setSpec(spec).build(); } /** diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 521991d58f..414f589f57 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -20,12 +20,16 @@ import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListStoresRequest.Filter; import feast.core.CoreServiceProto.ListStoresResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.StoreProto; import feast.core.StoreProto.Store.Subscription; +import feast.core.dao.FeatureSetRepository; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; import feast.core.job.JobUpdateTask; +import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; import feast.core.model.Source; @@ -52,13 +56,18 @@ public class JobCoordinatorService { private final long POLLING_INTERVAL_MILLISECONDS = 60000; // 1 min private JobInfoRepository jobInfoRepository; + private FeatureSetRepository featureSetRepository; private SpecService specService; private JobManager jobManager; @Autowired public JobCoordinatorService( - JobInfoRepository jobInfoRepository, SpecService specService, JobManager jobManager) { + JobInfoRepository jobInfoRepository, + FeatureSetRepository featureSetRepository, + SpecService specService, + JobManager jobManager) { this.jobInfoRepository = jobInfoRepository; + this.featureSetRepository = featureSetRepository; this.specService = specService; this.jobManager = jobManager; } @@ -70,7 +79,9 @@ public JobCoordinatorService( * *

2) Does a diff with the current set of jobs, starts/updates job(s) if necessary * - *

3) Updates job object in DB with status, feature sets + *

3) Updates job object in DB with status, feature sets\ + * + *

4) Updates Feature set statuses */ @Transactional @Scheduled(fixedDelay = POLLING_INTERVAL_MILLISECONDS) @@ -89,7 +100,9 @@ public void Poll() { .setFeatureSetName(subscription.getName()) .setFeatureSetVersion(subscription.getVersion()) .build()) - .getFeatureSetsList()); + .getFeatureSetsList().stream() + .map(FeatureSetProto.FeatureSet::getSpec) + .collect(Collectors.toList())); } if (!featureSetSpecs.isEmpty()) { featureSetSpecs.stream() @@ -131,6 +144,40 @@ public void Poll() { } completedTasks++; } + + log.info("Updating feature set status"); + updateFeatureSetStatuses(jobUpdateTasks); + } + + // TODO: make this more efficient + private void updateFeatureSetStatuses(List jobUpdateTasks) { + Set ready = new HashSet<>(); + Set pending = new HashSet<>(); + for (JobUpdateTask jobUpdateTask : jobUpdateTasks) { + Optional job = + getJob( + Source.fromProto(jobUpdateTask.getSourceSpec()), + Store.fromProto(jobUpdateTask.getStore())); + if (job.isPresent()) { + if (job.get().getStatus() == JobStatus.RUNNING) { + ready.addAll(job.get().getFeatureSets()); + } else { + pending.addAll(job.get().getFeatureSets()); + } + } + } + ready.removeAll(pending); + ready.forEach( + fs -> { + fs.setStatus(FeatureSetStatus.STATUS_READY.toString()); + featureSetRepository.save(fs); + }); + pending.forEach( + fs -> { + fs.setStatus(FeatureSetStatus.STATUS_PENDING.toString()); + featureSetRepository.save(fs); + }); + featureSetRepository.flush(); } @Transactional diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index 64c747641f..79cbbdc99c 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -227,7 +227,7 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } else { existingFeatureSets = Ordering.natural().reverse().sortedCopy(existingFeatureSets); FeatureSet latest = existingFeatureSets.get(0); - FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); + FeatureSet featureSet = FeatureSet.fromSpec(newFeatureSetSpec); // If the featureSet remains unchanged, we do nothing. if (featureSet.equalTo(latest)) { @@ -238,7 +238,7 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } newFeatureSetSpec = newFeatureSetSpec.toBuilder().setVersion(latest.getVersion() + 1).build(); } - FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); + FeatureSet featureSet = FeatureSet.fromSpec(newFeatureSetSpec); if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { featureSet.setSource(defaultSource); } diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java index fbbf67e5a2..42647fa6fd 100644 --- a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -86,7 +86,7 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( @@ -102,7 +102,7 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), JobStatus.RUNNING); when(jobManager.updateJob(submittedJob)).thenReturn("new_ext"); @@ -114,7 +114,7 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), JobStatus.RUNNING); JobInfo actual = jobUpdateTask.call(); @@ -140,7 +140,7 @@ public void shouldCreateJobIfNotPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.RUNNING); JobInfo actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); @@ -157,7 +157,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( @@ -171,7 +171,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.ABORTING); JobInfo actual = jobUpdateTask.call(); @@ -198,7 +198,7 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.ERROR); JobInfo actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 0d7e48f690..c929677af6 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -29,6 +29,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsRequest.Filter; import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.ListStoresResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.Source; @@ -37,6 +38,7 @@ import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; +import feast.core.dao.FeatureSetRepository; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; import feast.core.job.Runner; @@ -58,6 +60,7 @@ public class JobCoordinatorServiceTest { @Mock JobInfoRepository jobInfoRepository; @Mock JobManager jobManager; @Mock SpecService specService; + @Mock FeatureSetRepository featureSetRepository; @Before public void setUp() { @@ -68,7 +71,7 @@ public void setUp() { public void shouldDoNothingIfNoStoresFound() { when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -88,7 +91,7 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) .thenReturn(ListFeatureSetsResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -124,8 +127,8 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) .thenReturn( ListFeatureSetsResponse.newBuilder() - .addFeatureSets(featureSet1) - .addFeatureSets(featureSet2) + .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet1)) + .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet2)) .build()); when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); @@ -135,7 +138,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); jcs.Poll(); verify(jobInfoRepository, times(1)).saveAndFlush(jobInfoArgCaptor.capture()); JobInfo actual = jobInfoArgCaptor.getValue(); @@ -146,7 +149,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), JobStatus.RUNNING); assertThat(actual, equalTo(expected)); } @@ -189,23 +192,21 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(JobInfo.class); when(specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) .thenReturn( ListFeatureSetsResponse.newBuilder() - .addFeatureSets(featureSet1) - .addFeatureSets(featureSet2) + .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet1)) + .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet2)) .build()); when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(store))) - .thenReturn(extId1); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(store))) - .thenReturn(extId2); + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(store))).thenReturn(extId1); + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(store))).thenReturn(extId2); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); jcs.Poll(); verify(jobInfoRepository, times(2)).saveAndFlush(jobInfoArgCaptor.capture()); @@ -217,7 +218,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source1), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet1)), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.RUNNING); assertThat(actual.get(0), equalTo(expected1)); @@ -228,7 +229,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source2), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromProto(featureSet2)), + Arrays.asList(FeatureSet.fromSpec(featureSet2)), JobStatus.RUNNING); assertThat(actual.get(1), equalTo(expected2)); } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index 6d11267635..d08880afc1 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -53,6 +53,8 @@ import feast.core.model.Store; import feast.types.ValueProto.ValueType.Enum; import io.grpc.StatusRuntimeException; +import java.sql.Date; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -131,9 +133,9 @@ public void setUp() { public void shouldGetAllFeatureSetsIfNoFilterProvided() throws InvalidProtocolBufferException { ListFeatureSetsResponse actual = specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("").build()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet featureSet : featureSets) { - FeatureSetSpec toProto = featureSet.toProto(); + FeatureSetProto.FeatureSet toProto = featureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -148,9 +150,9 @@ public void shouldGetAllFeatureSetsMatchingNameIfNoVersionProvided() specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("f1").build()); List expectedFeatureSets = featureSets.stream().filter(fs -> fs.getName().equals("f1")).collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -167,9 +169,9 @@ public void shouldGetAllFeatureSetsMatchingNameWithWildcardSearch() featureSets.stream() .filter(fs -> fs.getName().startsWith("f")) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -188,9 +190,9 @@ public void shouldGetAllFeatureSetsMatchingVersionIfNoComparator() .filter(fs -> fs.getName().equals("f1")) .filter(fs -> fs.getVersion() == 1) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -209,9 +211,9 @@ public void shouldGetAllFeatureSetsGivenVersionWithComparator() .filter(fs -> fs.getName().equals("f1")) .filter(fs -> fs.getVersion() > 1) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -301,7 +303,7 @@ public void shouldThrowRetrievalExceptionIfNoStoresFoundWithName() { public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHasNotChanged() throws InvalidProtocolBufferException { FeatureSetSpec incomingFeatureSet = - featureSets.get(2).toProto().toBuilder().clearVersion().build(); + featureSets.get(2).toProto().getSpec().toBuilder().clearVersion().build(); ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); @@ -315,14 +317,14 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() throws InvalidProtocolBufferException { when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); FeatureSetSpec incomingFeatureSet = - newDummyFeatureSet("f2", 1).toProto().toBuilder().clearVersion().build(); + newDummyFeatureSet("f2", 1).toProto().getSpec().toBuilder().clearVersion().build(); ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); FeatureSetSpec expected = incomingFeatureSet.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); + assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); } @Test @@ -332,6 +334,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() featureSets .get(2) .toProto() + .getSpec() .toBuilder() .clearVersion() .addFeatures(FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) @@ -342,7 +345,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() specService.applyFeatureSet(incomingFeatureSet); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); + assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); } @Test @@ -355,16 +358,21 @@ public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() FeatureSetProto.FeatureSetSpec incomingFeatureSet = (new FeatureSet( "f3", 5, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource)) - .toProto(); + .toProto() + .getSpec(); FeatureSetSpec expected = incomingFeatureSet; ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); - assertThat(applyFeatureSetResponse.getFeatureSet().getMaxAge(), equalTo(expected.getMaxAge())); assertThat( - applyFeatureSetResponse.getFeatureSet().getEntities(0), equalTo(expected.getEntities(0))); - assertThat(applyFeatureSetResponse.getFeatureSet().getName(), equalTo(expected.getName())); + applyFeatureSetResponse.getFeatureSet().getSpec().getMaxAge(), + equalTo(expected.getMaxAge())); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getEntities(0), + equalTo(expected.getEntities(0))); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getName(), equalTo(expected.getName())); } @Test @@ -408,8 +416,11 @@ public void shouldDoNothingIfNoChange() throws InvalidProtocolBufferException { private FeatureSet newDummyFeatureSet(String name, int version) { Field feature = new Field(name, "feature", Enum.INT64); Field entity = new Field(name, "entity", Enum.STRING); - return new FeatureSet( - name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), defaultSource); + FeatureSet fs = + new FeatureSet( + name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), defaultSource); + fs.setCreated(Date.from(Instant.ofEpochSecond(10L))); + return fs; } private Store newDummyStore(String name) { diff --git a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java index 6f697f1c6f..2e3a0a5dde 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java +++ b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java @@ -50,7 +50,6 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PDone; import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.sdk.values.ValueInSingleWindow; import org.slf4j.Logger; @AutoValue @@ -61,8 +60,8 @@ public abstract class WriteToStore extends PTransform, P public static final String METRIC_NAMESPACE = "WriteToStore"; public static final String ELEMENTS_WRITTEN_METRIC = "elements_written"; - private static final Counter elementsWritten = Metrics - .counter(METRIC_NAMESPACE, ELEMENTS_WRITTEN_METRIC); + private static final Counter elementsWritten = + Metrics.counter(METRIC_NAMESPACE, ELEMENTS_WRITTEN_METRIC); public abstract Store getStore(); @@ -151,11 +150,14 @@ public void processElement(ProcessContext context) { break; } - input.apply("IncrementWriteToStoreElementsWrittenCounter", - MapElements.into(TypeDescriptors.booleans()).via((FeatureRow row) -> { - elementsWritten.inc(); - return true; - })); + input.apply( + "IncrementWriteToStoreElementsWrittenCounter", + MapElements.into(TypeDescriptors.booleans()) + .via( + (FeatureRow row) -> { + elementsWritten.inc(); + return true; + })); return PDone.in(input.getPipeline()); } diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index bd034341ec..4a09bee82f 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -170,12 +170,14 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() Map expected = new HashMap<>(); LOGGER.info("Generating test data ..."); - IntStream.range(0, IMPORT_JOB_SAMPLE_FEATURE_ROW_SIZE).forEach(i -> { - FeatureRow randomRow = TestUtil.createRandomFeatureRow(spec); - RedisKey redisKey = TestUtil.createRedisKey(spec, randomRow); - input.add(randomRow); - expected.put(redisKey, randomRow); - }); + IntStream.range(0, IMPORT_JOB_SAMPLE_FEATURE_ROW_SIZE) + .forEach( + i -> { + FeatureRow randomRow = TestUtil.createRandomFeatureRow(spec); + RedisKey redisKey = TestUtil.createRedisKey(spec, randomRow); + input.add(randomRow); + expected.put(redisKey, randomRow); + }); LOGGER.info("Starting Import Job with the following options: {}", options.toString()); PipelineResult pipelineResult = ImportJob.runPipeline(options); @@ -183,43 +185,50 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() Assert.assertEquals(pipelineResult.getState(), State.RUNNING); LOGGER.info("Publishing {} Feature Row messages to Kafka ...", input.size()); - TestUtil.publishFeatureRowsToKafka(KAFKA_BOOTSTRAP_SERVERS, KAFKA_TOPIC, input, - ByteArraySerializer.class, KAFKA_PUBLISH_TIMEOUT_SEC); - TestUtil.waitUntilAllElementsAreWrittenToStore(pipelineResult, + TestUtil.publishFeatureRowsToKafka( + KAFKA_BOOTSTRAP_SERVERS, + KAFKA_TOPIC, + input, + ByteArraySerializer.class, + KAFKA_PUBLISH_TIMEOUT_SEC); + TestUtil.waitUntilAllElementsAreWrittenToStore( + pipelineResult, Duration.standardSeconds(IMPORT_JOB_MAX_RUN_DURATION_SEC), Duration.standardSeconds(IMPORT_JOB_CHECK_INTERVAL_DURATION_SEC)); LOGGER.info("Validating the actual values written to Redis ..."); Jedis jedis = new Jedis(REDIS_HOST, REDIS_PORT); - expected.forEach((key, expectedValue) -> { - - // Ensure ingested key exists. - byte[] actualByteValue = jedis.get(key.toByteArray()); - if (actualByteValue == null) { - LOGGER.error("Key not found in Redis: " + key); - LOGGER.info("Redis INFO:"); - LOGGER.info(jedis.info()); - String randomKey = jedis.randomKey(); - if (randomKey != null) { - LOGGER.info("Sample random key, value (for debugging purpose):"); - LOGGER.info("Key: " + randomKey); - LOGGER.info("Value: " + jedis.get(randomKey)); - } - Assert.fail("Missing key in Redis."); - } - - // Ensure value is a valid serialized FeatureRow object. - FeatureRow actualValue = null; - try { - actualValue = FeatureRow.parseFrom(actualByteValue); - } catch (InvalidProtocolBufferException e) { - Assert.fail(String - .format("Actual Redis value cannot be parsed as FeatureRow, key: %s, value :%s", - key, new String(actualByteValue, StandardCharsets.UTF_8))); - } - - // Ensure the retrieved FeatureRow is equal to the ingested FeatureRow. - Assert.assertEquals(expectedValue, actualValue); - }); + expected.forEach( + (key, expectedValue) -> { + + // Ensure ingested key exists. + byte[] actualByteValue = jedis.get(key.toByteArray()); + if (actualByteValue == null) { + LOGGER.error("Key not found in Redis: " + key); + LOGGER.info("Redis INFO:"); + LOGGER.info(jedis.info()); + String randomKey = jedis.randomKey(); + if (randomKey != null) { + LOGGER.info("Sample random key, value (for debugging purpose):"); + LOGGER.info("Key: " + randomKey); + LOGGER.info("Value: " + jedis.get(randomKey)); + } + Assert.fail("Missing key in Redis."); + } + + // Ensure value is a valid serialized FeatureRow object. + FeatureRow actualValue = null; + try { + actualValue = FeatureRow.parseFrom(actualByteValue); + } catch (InvalidProtocolBufferException e) { + Assert.fail( + String.format( + "Actual Redis value cannot be parsed as FeatureRow, key: %s, value :%s", + key, new String(actualByteValue, StandardCharsets.UTF_8))); + } + + // Ensure the retrieved FeatureRow is equal to the ingested FeatureRow. + Assert.assertEquals(expectedValue, actualValue); + }); } } diff --git a/ingestion/src/test/java/feast/test/TestUtil.java b/ingestion/src/test/java/feast/test/TestUtil.java index ef41f3950a..d66ef4a97d 100644 --- a/ingestion/src/test/java/feast/test/TestUtil.java +++ b/ingestion/src/test/java/feast/test/TestUtil.java @@ -352,15 +352,16 @@ public static Field field(String name, Object value, ValueType.Enum valueType) { /** * This blocking method waits until an ImportJob pipeline has written all elements to the store. - *

- * The pipeline must be in the RUNNING state before calling this method. * - * @param pipelineResult result of running the Pipeline + *

The pipeline must be in the RUNNING state before calling this method. + * + * @param pipelineResult result of running the Pipeline * @param maxWaitDuration wait until this max amount of duration * @throws InterruptedException if the thread is interruped while waiting */ - public static void waitUntilAllElementsAreWrittenToStore(PipelineResult pipelineResult, - Duration maxWaitDuration, Duration checkInterval) throws InterruptedException { + public static void waitUntilAllElementsAreWrittenToStore( + PipelineResult pipelineResult, Duration maxWaitDuration, Duration checkInterval) + throws InterruptedException { if (pipelineResult.getState().isTerminal()) { return; } diff --git a/protos/feast/core/CoreService.proto b/protos/feast/core/CoreService.proto index 2e0646e973..1704623db9 100644 --- a/protos/feast/core/CoreService.proto +++ b/protos/feast/core/CoreService.proto @@ -72,7 +72,7 @@ message GetFeatureSetRequest { // Response containing a single feature set message GetFeatureSetResponse { - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; } // Retrieves details for all versions of a specific feature set @@ -95,7 +95,7 @@ message ListFeatureSetsRequest { } message ListFeatureSetsResponse { - repeated feast.core.FeatureSetSpec feature_sets = 1; + repeated feast.core.FeatureSet feature_sets = 1; } message ListStoresRequest { @@ -129,7 +129,7 @@ message ApplyFeatureSetResponse { } // Feature set response has been enriched with version and source information - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; Status status = 2; } diff --git a/protos/feast/core/FeatureSet.proto b/protos/feast/core/FeatureSet.proto index a80ae36f08..a5adf139bf 100644 --- a/protos/feast/core/FeatureSet.proto +++ b/protos/feast/core/FeatureSet.proto @@ -25,6 +25,15 @@ option go_package = "github.com/gojek/feast/sdk/go/protos/feast/core"; import "feast/types/Value.proto"; import "feast/core/Source.proto"; import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; + +message FeatureSet { + // User-specified specifications of this feature set. + FeatureSetSpec spec = 1; + + // System-populated metadata for this feature set. + FeatureSetMeta meta = 2; +} message FeatureSetSpec { // Name of the featureSet. Must be unique. @@ -67,3 +76,23 @@ message FeatureSpec { // Value type of the feature. feast.types.ValueType.Enum value_type = 2; } + + +message FeatureSetMeta { + // Created timestamp of this specific feature set. + google.protobuf.Timestamp created_timestamp = 1; + + // Status of the feature set. + // Used to indicate whether the feature set is ready for consumption or ingestion. + // Currently supports 2 states: + // 1) STATUS_PENDING - A feature set is in pending state if Feast has not spun up the jobs + // necessary to push rows for this feature set to stores subscribing to this feature set. + // 2) STATUS_READY - Feature set is ready for consumption or ingestion + FeatureSetStatus status = 2; +} + +enum FeatureSetStatus { + STATUS_INVALID = 0; + STATUS_PENDING = 1; + STATUS_READY = 2; +} \ No newline at end of file diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 3af2e12a91..3a78c51535 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -20,6 +20,7 @@ from typing import Dict, Union from typing import List import grpc +import time import pandas as pd import pyarrow as pa import pyarrow.parquet as pq @@ -33,6 +34,7 @@ GetFeatureSetResponse, ) from feast.core.CoreService_pb2_grpc import CoreServiceStub +from feast.core.FeatureSet_pb2 import FeatureSetStatus from feast.exceptions import format_grpc_exception from feast.feature_set import FeatureSet, Entity from feast.job import Job @@ -239,7 +241,7 @@ def _apply_feature_set(self, feature_set: FeatureSet): # Convert the feature set to a request and send to Feast Core apply_fs_response = self._core_service_stub.ApplyFeatureSet( - ApplyFeatureSetRequest(feature_set=feature_set.to_proto()), + ApplyFeatureSetRequest(feature_set=feature_set.to_proto().spec), timeout=GRPC_CONNECTION_TIMEOUT_APPLY, ) # type: ApplyFeatureSetResponse @@ -519,10 +521,25 @@ def ingest( ref_df, discard_unused_fields=True, replace_existing_features=True ) self.apply(feature_set) - - feature_set = self.get_feature_set(name, version) + current_time = time.time() + + print("Waiting for feature set to be ready for ingestion...") + while True: + if timeout is not None and time.time() - current_time >= timeout: + raise TimeoutError("Timed out waiting for feature set to be ready") + feature_set = self.get_feature_set(name, version) + if ( + feature_set is not None + and feature_set.status == FeatureSetStatus.STATUS_READY + ): + break + time.sleep(3) + + if timeout is not None: + timeout = timeout - int(time.time() - current_time) if feature_set.source.source_type == "Kafka": + print("Ingesting to kafka...") ingest_table_to_kafka( feature_set=feature_set, table=table, diff --git a/sdk/python/feast/core/CoreService_pb2.py b/sdk/python/feast/core/CoreService_pb2.py index 69a5498d87..b54dc89221 100644 --- a/sdk/python/feast/core/CoreService_pb2.py +++ b/sdk/python/feast/core/CoreService_pb2.py @@ -22,7 +22,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\020CoreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"H\n\x15GetFeatureSetResponse\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"K\n\x17ListFeatureSetsResponse\x12\x30\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x1a.feast.core.FeatureSetSpec\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"I\n\x16\x41pplyFeatureSetRequest\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\xb7\x01\n\x17\x41pplyFeatureSetResponse\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"I\n\x16\x41pplyFeatureSetRequest\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , dependencies=[feast_dot_core_dot_FeatureSet__pb2.DESCRIPTOR,feast_dot_core_dot_Store__pb2.DESCRIPTOR,]) @@ -49,8 +49,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=821, - serialized_end=868, + serialized_start=809, + serialized_end=856, ) _sym_db.RegisterEnumDescriptor(_APPLYFEATURESETRESPONSE_STATUS) @@ -71,8 +71,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1118, - serialized_end=1154, + serialized_start=1106, + serialized_end=1142, ) _sym_db.RegisterEnumDescriptor(_UPDATESTORERESPONSE_STATUS) @@ -142,7 +142,7 @@ oneofs=[ ], serialized_start=152, - serialized_end=224, + serialized_end=220, ) @@ -179,8 +179,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=312, - serialized_end=375, + serialized_start=308, + serialized_end=371, ) _LISTFEATURESETSREQUEST = _descriptor.Descriptor( @@ -209,8 +209,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=227, - serialized_end=375, + serialized_start=223, + serialized_end=371, ) @@ -240,8 +240,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=377, - serialized_end=452, + serialized_start=373, + serialized_end=444, ) @@ -271,8 +271,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=529, - serialized_end=551, + serialized_start=521, + serialized_end=543, ) _LISTSTORESREQUEST = _descriptor.Descriptor( @@ -301,8 +301,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=454, - serialized_end=551, + serialized_start=446, + serialized_end=543, ) @@ -332,8 +332,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=553, - serialized_end=607, + serialized_start=545, + serialized_end=599, ) @@ -363,8 +363,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=609, - serialized_end=682, + serialized_start=601, + serialized_end=674, ) @@ -402,8 +402,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=685, - serialized_end=868, + serialized_start=677, + serialized_end=856, ) @@ -426,8 +426,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=870, - serialized_end=898, + serialized_start=858, + serialized_end=886, ) @@ -457,8 +457,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=900, - serialized_end=946, + serialized_start=888, + serialized_end=934, ) @@ -488,8 +488,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=948, - serialized_end=1002, + serialized_start=936, + serialized_end=990, ) @@ -527,19 +527,19 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1005, - serialized_end=1154, + serialized_start=993, + serialized_end=1142, ) -_GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _LISTFEATURESETSREQUEST_FILTER.containing_type = _LISTFEATURESETSREQUEST _LISTFEATURESETSREQUEST.fields_by_name['filter'].message_type = _LISTFEATURESETSREQUEST_FILTER -_LISTFEATURESETSRESPONSE.fields_by_name['feature_sets'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_LISTFEATURESETSRESPONSE.fields_by_name['feature_sets'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _LISTSTORESREQUEST_FILTER.containing_type = _LISTSTORESREQUEST _LISTSTORESREQUEST.fields_by_name['filter'].message_type = _LISTSTORESREQUEST_FILTER _LISTSTORESRESPONSE.fields_by_name['store'].message_type = feast_dot_core_dot_Store__pb2._STORE _APPLYFEATURESETREQUEST.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC -_APPLYFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_APPLYFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _APPLYFEATURESETRESPONSE.fields_by_name['status'].enum_type = _APPLYFEATURESETRESPONSE_STATUS _APPLYFEATURESETRESPONSE_STATUS.containing_type = _APPLYFEATURESETRESPONSE _UPDATESTOREREQUEST.fields_by_name['store'].message_type = feast_dot_core_dot_Store__pb2._STORE @@ -669,8 +669,8 @@ file=DESCRIPTOR, index=0, serialized_options=None, - serialized_start=1157, - serialized_end=1701, + serialized_start=1145, + serialized_end=1689, methods=[ _descriptor.MethodDescriptor( name='GetFeastCoreVersion', diff --git a/sdk/python/feast/core/CoreService_pb2.pyi b/sdk/python/feast/core/CoreService_pb2.pyi index 0bf897000b..6b83807aba 100644 --- a/sdk/python/feast/core/CoreService_pb2.pyi +++ b/sdk/python/feast/core/CoreService_pb2.pyi @@ -1,6 +1,7 @@ # @generated by generate_proto_mypy_stubs.py. Do not edit! import sys from feast.core.FeatureSet_pb2 import ( + FeatureSet as feast___core___FeatureSet_pb2___FeatureSet, FeatureSetSpec as feast___core___FeatureSet_pb2___FeatureSetSpec, ) @@ -58,11 +59,11 @@ class GetFeatureSetResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> GetFeatureSetResponse: ... @@ -119,11 +120,11 @@ class ListFeatureSetsResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[feast___core___FeatureSet_pb2___FeatureSetSpec]: ... + def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[feast___core___FeatureSet_pb2___FeatureSet]: ... def __init__(self, *, - feature_sets : typing___Optional[typing___Iterable[feast___core___FeatureSet_pb2___FeatureSetSpec]] = None, + feature_sets : typing___Optional[typing___Iterable[feast___core___FeatureSet_pb2___FeatureSet]] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> ListFeatureSetsResponse: ... @@ -236,11 +237,11 @@ class ApplyFeatureSetResponse(google___protobuf___message___Message): status = ... # type: ApplyFeatureSetResponse.Status @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, status : typing___Optional[ApplyFeatureSetResponse.Status] = None, ) -> None: ... @classmethod diff --git a/sdk/python/feast/core/FeatureSet_pb2.py b/sdk/python/feast/core/FeatureSet_pb2.py index 8c331db16b..0cb77f1a70 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.py +++ b/sdk/python/feast/core/FeatureSet_pb2.py @@ -4,6 +4,7 @@ import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -16,6 +17,7 @@ from feast.types import Value_pb2 as feast_dot_types_dot_Value__pb2 from feast.core import Source_pb2 as feast_dot_core_dot_Source__pb2 from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 DESCRIPTOR = _descriptor.FileDescriptor( @@ -23,11 +25,79 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\017FeatureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\"\xd4\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.EnumBN\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"`\n\nFeatureSet\x12(\n\x04spec\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12(\n\x04meta\x18\x02 \x01(\x0b\x32\x1a.feast.core.FeatureSetMeta\"\xd4\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"u\n\x0e\x46\x65\x61tureSetMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x06status\x18\x02 \x01(\x0e\x32\x1c.feast.core.FeatureSetStatus*L\n\x10\x46\x65\x61tureSetStatus\x12\x12\n\x0eSTATUS_INVALID\x10\x00\x12\x12\n\x0eSTATUS_PENDING\x10\x01\x12\x10\n\x0cSTATUS_READY\x10\x02\x42N\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , - dependencies=[feast_dot_types_dot_Value__pb2.DESCRIPTOR,feast_dot_core_dot_Source__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,]) + dependencies=[feast_dot_types_dot_Value__pb2.DESCRIPTOR,feast_dot_core_dot_Source__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + +_FEATURESETSTATUS = _descriptor.EnumDescriptor( + name='FeatureSetStatus', + full_name='feast.core.FeatureSetStatus', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='STATUS_INVALID', index=0, number=0, + serialized_options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='STATUS_PENDING', index=1, number=1, + serialized_options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='STATUS_READY', index=2, number=2, + serialized_options=None, + type=None), + ], + containing_type=None, + serialized_options=None, + serialized_start=745, + serialized_end=821, +) +_sym_db.RegisterEnumDescriptor(_FEATURESETSTATUS) + +FeatureSetStatus = enum_type_wrapper.EnumTypeWrapper(_FEATURESETSTATUS) +STATUS_INVALID = 0 +STATUS_PENDING = 1 +STATUS_READY = 2 + +_FEATURESET = _descriptor.Descriptor( + name='FeatureSet', + full_name='feast.core.FeatureSet', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='spec', full_name='feast.core.FeatureSet.spec', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='meta', full_name='feast.core.FeatureSet.meta', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=158, + serialized_end=254, +) _FEATURESETSPEC = _descriptor.Descriptor( @@ -91,8 +161,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=126, - serialized_end=338, + serialized_start=257, + serialized_end=469, ) @@ -129,8 +199,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=340, - serialized_end=415, + serialized_start=471, + serialized_end=546, ) @@ -167,21 +237,73 @@ extension_ranges=[], oneofs=[ ], - serialized_start=417, - serialized_end=493, + serialized_start=548, + serialized_end=624, ) + +_FEATURESETMETA = _descriptor.Descriptor( + name='FeatureSetMeta', + full_name='feast.core.FeatureSetMeta', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='created_timestamp', full_name='feast.core.FeatureSetMeta.created_timestamp', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='status', full_name='feast.core.FeatureSetMeta.status', index=1, + number=2, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=626, + serialized_end=743, +) + +_FEATURESET.fields_by_name['spec'].message_type = _FEATURESETSPEC +_FEATURESET.fields_by_name['meta'].message_type = _FEATURESETMETA _FEATURESETSPEC.fields_by_name['entities'].message_type = _ENTITYSPEC _FEATURESETSPEC.fields_by_name['features'].message_type = _FEATURESPEC _FEATURESETSPEC.fields_by_name['max_age'].message_type = google_dot_protobuf_dot_duration__pb2._DURATION _FEATURESETSPEC.fields_by_name['source'].message_type = feast_dot_core_dot_Source__pb2._SOURCE _ENTITYSPEC.fields_by_name['value_type'].enum_type = feast_dot_types_dot_Value__pb2._VALUETYPE_ENUM _FEATURESPEC.fields_by_name['value_type'].enum_type = feast_dot_types_dot_Value__pb2._VALUETYPE_ENUM +_FEATURESETMETA.fields_by_name['created_timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_FEATURESETMETA.fields_by_name['status'].enum_type = _FEATURESETSTATUS +DESCRIPTOR.message_types_by_name['FeatureSet'] = _FEATURESET DESCRIPTOR.message_types_by_name['FeatureSetSpec'] = _FEATURESETSPEC DESCRIPTOR.message_types_by_name['EntitySpec'] = _ENTITYSPEC DESCRIPTOR.message_types_by_name['FeatureSpec'] = _FEATURESPEC +DESCRIPTOR.message_types_by_name['FeatureSetMeta'] = _FEATURESETMETA +DESCRIPTOR.enum_types_by_name['FeatureSetStatus'] = _FEATURESETSTATUS _sym_db.RegisterFileDescriptor(DESCRIPTOR) +FeatureSet = _reflection.GeneratedProtocolMessageType('FeatureSet', (_message.Message,), { + 'DESCRIPTOR' : _FEATURESET, + '__module__' : 'feast.core.FeatureSet_pb2' + # @@protoc_insertion_point(class_scope:feast.core.FeatureSet) + }) +_sym_db.RegisterMessage(FeatureSet) + FeatureSetSpec = _reflection.GeneratedProtocolMessageType('FeatureSetSpec', (_message.Message,), { 'DESCRIPTOR' : _FEATURESETSPEC, '__module__' : 'feast.core.FeatureSet_pb2' @@ -203,6 +325,13 @@ }) _sym_db.RegisterMessage(FeatureSpec) +FeatureSetMeta = _reflection.GeneratedProtocolMessageType('FeatureSetMeta', (_message.Message,), { + 'DESCRIPTOR' : _FEATURESETMETA, + '__module__' : 'feast.core.FeatureSet_pb2' + # @@protoc_insertion_point(class_scope:feast.core.FeatureSetMeta) + }) +_sym_db.RegisterMessage(FeatureSetMeta) + DESCRIPTOR._options = None # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/core/FeatureSet_pb2.pyi b/sdk/python/feast/core/FeatureSet_pb2.pyi index 5d93721fe1..6fa03ed359 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.pyi +++ b/sdk/python/feast/core/FeatureSet_pb2.pyi @@ -10,6 +10,7 @@ from feast.types.Value_pb2 import ( from google.protobuf.descriptor import ( Descriptor as google___protobuf___descriptor___Descriptor, + EnumDescriptor as google___protobuf___descriptor___EnumDescriptor, ) from google.protobuf.duration_pb2 import ( @@ -24,10 +25,17 @@ from google.protobuf.message import ( Message as google___protobuf___message___Message, ) +from google.protobuf.timestamp_pb2 import ( + Timestamp as google___protobuf___timestamp_pb2___Timestamp, +) + from typing import ( Iterable as typing___Iterable, + List as typing___List, Optional as typing___Optional, Text as typing___Text, + Tuple as typing___Tuple, + cast as typing___cast, ) from typing_extensions import ( @@ -35,6 +43,50 @@ from typing_extensions import ( ) +class FeatureSetStatus(int): + DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ... + @classmethod + def Name(cls, number: int) -> str: ... + @classmethod + def Value(cls, name: str) -> FeatureSetStatus: ... + @classmethod + def keys(cls) -> typing___List[str]: ... + @classmethod + def values(cls) -> typing___List[FeatureSetStatus]: ... + @classmethod + def items(cls) -> typing___List[typing___Tuple[str, FeatureSetStatus]]: ... + STATUS_INVALID = typing___cast(FeatureSetStatus, 0) + STATUS_PENDING = typing___cast(FeatureSetStatus, 1) + STATUS_READY = typing___cast(FeatureSetStatus, 2) +STATUS_INVALID = typing___cast(FeatureSetStatus, 0) +STATUS_PENDING = typing___cast(FeatureSetStatus, 1) +STATUS_READY = typing___cast(FeatureSetStatus, 2) + +class FeatureSet(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + + @property + def spec(self) -> FeatureSetSpec: ... + + @property + def meta(self) -> FeatureSetMeta: ... + + def __init__(self, + *, + spec : typing___Optional[FeatureSetSpec] = None, + meta : typing___Optional[FeatureSetMeta] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> FeatureSet: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def HasField(self, field_name: typing_extensions___Literal[u"meta",u"spec"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"meta",u"spec"]) -> None: ... + else: + def HasField(self, field_name: typing_extensions___Literal[u"meta",b"meta",u"spec",b"spec"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"meta",b"meta",u"spec",b"spec"]) -> None: ... + class FeatureSetSpec(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... name = ... # type: typing___Text @@ -109,3 +161,26 @@ class FeatureSpec(google___protobuf___message___Message): def ClearField(self, field_name: typing_extensions___Literal[u"name",u"value_type"]) -> None: ... else: def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"value_type",b"value_type"]) -> None: ... + +class FeatureSetMeta(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + status = ... # type: FeatureSetStatus + + @property + def created_timestamp(self) -> google___protobuf___timestamp_pb2___Timestamp: ... + + def __init__(self, + *, + created_timestamp : typing___Optional[google___protobuf___timestamp_pb2___Timestamp] = None, + status : typing___Optional[FeatureSetStatus] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> FeatureSetMeta: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def HasField(self, field_name: typing_extensions___Literal[u"created_timestamp"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"created_timestamp",u"status"]) -> None: ... + else: + def HasField(self, field_name: typing_extensions___Literal[u"created_timestamp",b"created_timestamp"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"created_timestamp",b"created_timestamp",u"status",b"status"]) -> None: ... diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index 893378e8fa..cea36c3274 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -22,7 +22,11 @@ from feast.entity import Entity from feast.feature import Feature, Field from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto +from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto +from feast.core.FeatureSet_pb2 import FeatureSetStatus from google.protobuf.duration_pb2 import Duration +from google.protobuf.timestamp_pb2 import Timestamp from feast.type_map import python_type_to_feast_value_type from google.protobuf.json_format import MessageToJson from google.protobuf import json_format @@ -42,6 +46,8 @@ def __init__( entities: List[Entity] = None, source: Source = None, max_age: Optional[Duration] = None, + status: FeatureSetStatus = None, + created_timestamp: Optional[Timestamp] = None, ): self._name = name self._fields = OrderedDict() # type: Dict[str, Field] @@ -56,6 +62,8 @@ def __init__( self._max_age = max_age self._version = None self._client = None + self._status = status + self._created_timestamp = created_timestamp def __eq__(self, other): if not isinstance(other, FeatureSet): @@ -195,6 +203,34 @@ def max_age(self, max_age): """ self._max_age = max_age + @property + def status(self): + """ + Returns the status of this feature set + """ + return self._status + + @status.setter + def status(self, status): + """ + Sets the status of this feature set + """ + self._status = status + + @property + def created_timestamp(self): + """ + Returns the created_timestamp of this feature set + """ + return self._created_timestamp + + @created_timestamp.setter + def created_timestamp(self, created_timestamp): + """ + Sets the status of this feature set + """ + self._created_timestamp = created_timestamp + def add(self, resource): """ Adds a resource (Feature, Entity) to this Feature Set. @@ -388,6 +424,8 @@ def update_from_feature_set(self, feature_set): self.features = feature_set.features self.entities = feature_set.entities self.source = feature_set.source + self.status = feature_set.status + self._created_timestamp = feature_set.created_timestamp def get_kafka_source_brokers(self) -> str: """ @@ -448,44 +486,55 @@ def from_dict(cls, fs_dict): return cls.from_proto(feature_set_proto) @classmethod - def from_proto(cls, feature_set_proto: FeatureSetSpecProto): + def from_proto(cls, feature_set_proto: FeatureSetProto): """ Creates a feature set from a protobuf representation of a feature set Args: - from_proto: A protobuf representation of a feature set + feature_set_proto: A protobuf representation of a feature set Returns: Returns a FeatureSet object based on the feature set protobuf """ feature_set = cls( - name=feature_set_proto.name, + name=feature_set_proto.spec.name, features=[ - Feature.from_proto(feature) for feature in feature_set_proto.features + Feature.from_proto(feature) + for feature in feature_set_proto.spec.features ], entities=[ - Entity.from_proto(entity) for entity in feature_set_proto.entities + Entity.from_proto(entity) for entity in feature_set_proto.spec.entities ], - max_age=feature_set_proto.max_age, + max_age=feature_set_proto.spec.max_age, source=( None - if feature_set_proto.source.type == 0 - else Source.from_proto(feature_set_proto.source) + if feature_set_proto.spec.source.type == 0 + else Source.from_proto(feature_set_proto.spec.source) ), + status=( + None + if feature_set_proto.meta.status == 0 + else feature_set_proto.meta.status + ), + created_timestamp=feature_set_proto.meta.created_timestamp, ) - feature_set._version = feature_set_proto.version + feature_set._version = feature_set_proto.spec.version return feature_set - def to_proto(self) -> FeatureSetSpecProto: + def to_proto(self) -> FeatureSetProto: """ Converts a feature set object to its protobuf representation Returns: - FeatureSetSpec protobuf + FeatureSetProto protobuf """ - return FeatureSetSpecProto( + meta = FeatureSetMetaProto( + created_timestamp=self.created_timestamp, status=self.status + ) + + spec = FeatureSetSpecProto( name=self.name, version=self.version, max_age=self.max_age, @@ -502,6 +551,8 @@ def to_proto(self) -> FeatureSetSpecProto: ], ) + return FeatureSetProto(spec=spec, meta=meta) + def _infer_pd_column_type(column, series, rows_to_sample): dtype = None diff --git a/sdk/python/tests/feast_core_server.py b/sdk/python/tests/feast_core_server.py index f547ba273b..fe1ef36967 100644 --- a/sdk/python/tests/feast_core_server.py +++ b/sdk/python/tests/feast_core_server.py @@ -10,11 +10,17 @@ ListFeatureSetsResponse, ListFeatureSetsRequest, ) -from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpec +from google.protobuf.timestamp_pb2 import Timestamp +from feast.core.FeatureSet_pb2 import ( + FeatureSetSpec as FeatureSetSpec, + FeatureSetMeta, + FeatureSetStatus, +) from feast.core.Source_pb2 import ( SourceType as SourceTypeProto, KafkaSourceConfig as KafkaSourceConfigProto, ) +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from typing import List _logger = logging.getLogger(__name__) @@ -60,7 +66,12 @@ def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): ) feature_set.source.type = SourceTypeProto.KAFKA - self._feature_sets[feature_set.name] = feature_set + feature_set_meta = FeatureSetMeta( + status=FeatureSetStatus.STATUS_READY, + created_timestamp=Timestamp(seconds=10), + ) + applied_feature_set = FeatureSetProto(spec=feature_set, meta=feature_set_meta) + self._feature_sets[feature_set.name] = applied_feature_set _logger.info( "registered feature set " @@ -73,7 +84,8 @@ def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): ) return ApplyFeatureSetResponse( - feature_set=feature_set, status=ApplyFeatureSetResponse.Status.CREATED + feature_set=applied_feature_set, + status=ApplyFeatureSetResponse.Status.CREATED, ) diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py index 2243ebfd1b..8996b7543c 100644 --- a/sdk/python/tests/test_client.py +++ b/sdk/python/tests/test_client.py @@ -26,7 +26,14 @@ from feast.entity import Entity from feast.feature_set import Feature from feast.source import KafkaSource -from feast.core.FeatureSet_pb2 import FeatureSetSpec, FeatureSpec, EntitySpec +from feast.core.FeatureSet_pb2 import ( + FeatureSetSpec, + FeatureSpec, + EntitySpec, + FeatureSetMeta, + FeatureSetStatus, +) +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from feast.core.Source_pb2 import SourceType, KafkaSourceConfig, Source from feast.core.CoreService_pb2 import ( GetFeastCoreVersionResponse, @@ -178,29 +185,34 @@ def test_get_feature_set(self, mock_client, mocker): mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( - feature_set=FeatureSetSpec( - name="my_feature_set", - version=2, - max_age=Duration(seconds=3600), - features=[ - FeatureSpec( - name="my_feature_1", value_type=ValueProto.ValueType.FLOAT - ), - FeatureSpec( - name="my_feature_2", value_type=ValueProto.ValueType.FLOAT - ), - ], - entities=[ - EntitySpec( - name="my_entity_1", value_type=ValueProto.ValueType.INT64 - ) - ], - source=Source( - type=SourceType.KAFKA, - kafka_source_config=KafkaSourceConfig( - bootstrap_servers="localhost:9092", topic="topic" + feature_set=FeatureSetProto( + spec=FeatureSetSpec( + name="my_feature_set", + version=2, + max_age=Duration(seconds=3600), + features=[ + FeatureSpec( + name="my_feature_1", + value_type=ValueProto.ValueType.FLOAT, + ), + FeatureSpec( + name="my_feature_2", + value_type=ValueProto.ValueType.FLOAT, + ), + ], + entities=[ + EntitySpec( + name="my_entity_1", + value_type=ValueProto.ValueType.INT64, + ) + ], + source=Source( + type=SourceType.KAFKA, + kafka_source_config=KafkaSourceConfig( + bootstrap_servers="localhost:9092", topic="topic" + ), ), - ), + ) ) ), ) @@ -229,27 +241,31 @@ def test_get_batch_features(self, mock_client, mocker): mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( - feature_set=FeatureSetSpec( - name="customer_fs", - version=1, - entities=[ - EntitySpec( - name="customer", value_type=ValueProto.ValueType.INT64 - ), - EntitySpec( - name="transaction", value_type=ValueProto.ValueType.INT64 - ), - ], - features=[ - FeatureSpec( - name="customer_feature_1", - value_type=ValueProto.ValueType.FLOAT, - ), - FeatureSpec( - name="customer_feature_2", - value_type=ValueProto.ValueType.STRING, - ), - ], + feature_set=FeatureSetProto( + spec=FeatureSetSpec( + name="customer_fs", + version=1, + entities=[ + EntitySpec( + name="customer", value_type=ValueProto.ValueType.INT64 + ), + EntitySpec( + name="transaction", + value_type=ValueProto.ValueType.INT64, + ), + ], + features=[ + FeatureSpec( + name="customer_feature_1", + value_type=ValueProto.ValueType.FLOAT, + ), + FeatureSpec( + name="customer_feature_2", + value_type=ValueProto.ValueType.STRING, + ), + ], + ), + meta=FeatureSetMeta(status=FeatureSetStatus.STATUS_READY), ) ), ) @@ -373,11 +389,13 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): # Register with Feast core client.apply(driver_fs) + driver_fs = driver_fs.to_proto() + driver_fs.meta.status = FeatureSetStatus.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", - return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()), + return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer @@ -385,6 +403,36 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): # Ingest data into Feast client.ingest("driver-feature-set", dataframe) + @pytest.mark.parametrize("dataframe,exception", [(dataframes.GOOD, TimeoutError)]) + def test_feature_set_ingest_fail_if_pending( + self, dataframe, exception, client, mocker + ): + with pytest.raises(exception): + driver_fs = FeatureSet( + "driver-feature-set", + source=KafkaSource(brokers="kafka:9092", topic="test"), + ) + driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) + driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) + driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) + driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) + + # Register with Feast core + client.apply(driver_fs) + driver_fs = driver_fs.to_proto() + driver_fs.meta.status = FeatureSetStatus.STATUS_PENDING + + mocker.patch.object( + client._core_service_stub, + "GetFeatureSet", + return_value=GetFeatureSetResponse(feature_set=driver_fs), + ) + + # Need to create a mock producer + with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: + # Ingest data into Feast + client.ingest("driver-feature-set", dataframe, timeout=1) + @pytest.mark.parametrize( "dataframe,exception", [ diff --git a/sdk/python/tests/test_stores.py b/sdk/python/tests/test_stores.py index 92445ecb7a..330f272dac 100644 --- a/sdk/python/tests/test_stores.py +++ b/sdk/python/tests/test_stores.py @@ -38,9 +38,9 @@ def test_register_feature_set(self, sqlite_store): fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.add(Entity(name="my-entity-1", dtype=ValueType.INT64)) fs._version = 1 - feature_set_proto = fs.to_proto() + feature_set_spec_proto = fs.to_proto().spec - sqlite_store.register_feature_set(feature_set_proto) + sqlite_store.register_feature_set(feature_set_spec_proto) feature_row = FeatureRowProto.FeatureRow( feature_set="feature_set_1", event_timestamp=Timestamp(), diff --git a/serving/src/main/java/feast/serving/service/CachedSpecService.java b/serving/src/main/java/feast/serving/service/CachedSpecService.java index 64a2c6e084..edf2da37a0 100644 --- a/serving/src/main/java/feast/serving/service/CachedSpecService.java +++ b/serving/src/main/java/feast/serving/service/CachedSpecService.java @@ -27,6 +27,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.Subscription; @@ -115,7 +116,7 @@ public FeatureSetSpec getFeatureSet(String name, int version) { "Unable to retrieve featureSet with id %s from core, featureSet does not exist", id)); } - return featureSets.getFeatureSets(0); + return featureSets.getFeatureSets(0).getSpec(); } catch (ExecutionException e) { throw new SpecRetrievalException( String.format("Unable to retrieve featureSet with id %s", id), e); @@ -157,7 +158,8 @@ private Map getFeatureSetSpecMap() { .setFeatureSetVersion(subscription.getVersion())) .build()); - for (FeatureSetSpec featureSetSpec : featureSetsResponse.getFeatureSetsList()) { + for (FeatureSet featureSet : featureSetsResponse.getFeatureSetsList()) { + FeatureSetSpec featureSetSpec = featureSet.getSpec(); featureSetSpecs.put( String.format("%s:%s", featureSetSpec.getName(), featureSetSpec.getVersion()), featureSetSpec); diff --git a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java index 2d51547d0e..e16f5060c2 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java +++ b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java @@ -175,7 +175,6 @@ Job runBatchQuery(List featureSetQueries) ExecutorCompletionService executorCompletionService = new ExecutorCompletionService<>(executorService); - List featureSetInfos = new ArrayList<>(); for (int i = 0; i < featureSetQueries.size(); i++) { @@ -191,7 +190,8 @@ Job runBatchQuery(List featureSetQueries) for (int i = 0; i < featureSetQueries.size(); i++) { try { - FeatureSetInfo featureSetInfo = executorCompletionService.take().get(SUBQUERY_TIMEOUT_SECS, TimeUnit.SECONDS); + FeatureSetInfo featureSetInfo = + executorCompletionService.take().get(SUBQUERY_TIMEOUT_SECS, TimeUnit.SECONDS); featureSetInfos.add(featureSetInfo); } catch (InterruptedException | ExecutionException | TimeoutException e) { jobService() diff --git a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java index 5b295e9ee7..5bd2038f2b 100644 --- a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java +++ b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java @@ -26,6 +26,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.RedisConfig; @@ -94,9 +95,13 @@ public void setUp() throws IOException { featureSetSpecs.put("fs1:2", FeatureSetSpec.newBuilder().setName("fs1").setVersion(2).build()); featureSetSpecs.put("fs2:1", FeatureSetSpec.newBuilder().setName("fs2").setVersion(1).build()); - List fs1FeatureSets = - Lists.newArrayList(featureSetSpecs.get("fs1:1"), featureSetSpecs.get("fs1:2")); - List fs2FeatureSets = Lists.newArrayList(featureSetSpecs.get("fs2:1")); + List fs1FeatureSets = + Lists.newArrayList( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs1:1")).build(), + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs1:2")).build()); + List fs2FeatureSets = + Lists.newArrayList( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs2:1")).build()); when(coreService.listFeatureSets( ListFeatureSetsRequest.newBuilder() .setFilter( From df8f86379bf8718f815e571dc36afed2fb84519a Mon Sep 17 00:00:00 2001 From: zhilingc Date: Mon, 16 Dec 2019 18:50:07 +0800 Subject: [PATCH 03/12] Make job update timeout configurable --- .../java/feast/core/config/FeastProperties.java | 8 ++++++++ core/src/main/java/feast/core/config/JobConfig.java | 9 +++++++++ .../src/main/java/feast/core/job/JobUpdateTask.java | 11 ++++------- .../feast/core/service/JobCoordinatorService.java | 8 ++++++-- core/src/main/resources/application.yml | 3 +++ .../test/java/feast/core/job/JobUpdateTaskTest.java | 11 +++++------ .../core/service/JobCoordinatorServiceTest.java | 13 +++++++++---- 7 files changed, 44 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/feast/core/config/FeastProperties.java b/core/src/main/java/feast/core/config/FeastProperties.java index e57a594305..1887caf5e6 100644 --- a/core/src/main/java/feast/core/config/FeastProperties.java +++ b/core/src/main/java/feast/core/config/FeastProperties.java @@ -37,6 +37,14 @@ public static class JobProperties { private String runner; private Map options; private MetricsProperties metrics; + private JobUpdatesProperties updates; + } + + @Getter + @Setter + public static class JobUpdatesProperties { + + private long timeoutSeconds; } @Getter diff --git a/core/src/main/java/feast/core/config/JobConfig.java b/core/src/main/java/feast/core/config/JobConfig.java index 2402a14fe2..238ae184d6 100644 --- a/core/src/main/java/feast/core/config/JobConfig.java +++ b/core/src/main/java/feast/core/config/JobConfig.java @@ -23,6 +23,7 @@ import com.google.api.services.dataflow.DataflowScopes; import com.google.common.base.Strings; import feast.core.config.FeastProperties.JobProperties; +import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.job.dataflow.DataflowJobManager; @@ -98,4 +99,12 @@ public JobManager getJobManager( public DirectJobRegistry directJobRegistry() { return new DirectJobRegistry(); } + + /** + * Extracts job update options from feast core options. + */ + @Bean + public JobUpdatesProperties jobUpdatesProperties(FeastProperties feastProperties) { + return feastProperties.getJobs().getUpdates(); + } } diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java index 94ec85c2eb..6adf9ac68b 100644 --- a/core/src/main/java/feast/core/job/JobUpdateTask.java +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -51,26 +51,27 @@ @Getter public class JobUpdateTask implements Callable { - private final long JOB_UPDATE_TIMEOUT_SECONDS = 240; // 4 minutes - private final List featureSetSpecs; private final SourceProto.Source sourceSpec; private final StoreProto.Store store; private final Optional originalJob; private JobManager jobManager; + private long jobUpdateTimeoutSeconds; public JobUpdateTask( List featureSetSpecs, SourceProto.Source sourceSpec, StoreProto.Store store, Optional originalJob, - JobManager jobManager) { + JobManager jobManager, + long jobUpdateTimeoutSeconds) { this.featureSetSpecs = featureSetSpecs; this.sourceSpec = sourceSpec; this.store = store; this.originalJob = originalJob; this.jobManager = jobManager; + this.jobUpdateTimeoutSeconds = jobUpdateTimeoutSeconds; } @Override @@ -214,8 +215,4 @@ String createJobId(String sourceId, String storeName) { String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; return jobId.replaceAll("_", "-"); } - - long getJobUpdateTimeoutSeconds() { - return JOB_UPDATE_TIMEOUT_SECONDS; - } } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 414f589f57..c386f86a2e 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -25,6 +25,7 @@ import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.StoreProto; import feast.core.StoreProto.Store.Subscription; +import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.dao.FeatureSetRepository; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; @@ -59,17 +60,20 @@ public class JobCoordinatorService { private FeatureSetRepository featureSetRepository; private SpecService specService; private JobManager jobManager; + private JobUpdatesProperties jobUpdatesProperties; @Autowired public JobCoordinatorService( JobInfoRepository jobInfoRepository, FeatureSetRepository featureSetRepository, SpecService specService, - JobManager jobManager) { + JobManager jobManager, + JobUpdatesProperties jobUpdatesProperties) { this.jobInfoRepository = jobInfoRepository; this.featureSetRepository = featureSetRepository; this.specService = specService; this.jobManager = jobManager; + this.jobUpdatesProperties = jobUpdatesProperties; } /** @@ -115,7 +119,7 @@ public void Poll() { getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); jobUpdateTasks.add( new JobUpdateTask( - kv.getValue(), kv.getKey(), store, originalJob, jobManager)); + kv.getValue(), kv.getKey(), store, originalJob, jobManager, jobUpdatesProperties.getTimeoutSeconds())); }); } } catch (InvalidProtocolBufferException e) { diff --git a/core/src/main/resources/application.yml b/core/src/main/resources/application.yml index ed11d6c9eb..7c0d90e5f4 100644 --- a/core/src/main/resources/application.yml +++ b/core/src/main/resources/application.yml @@ -30,6 +30,9 @@ feast: runner: DirectRunner # Key-value dict of job options to be passed to the population jobs. options: {} + updates: + # Timeout in seconds for each attempt to update or submit a new job to the runner. + timeoutSeconds: 240 metrics: # Enable metrics pushing for all ingestion jobs. enabled: false diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java index 42647fa6fd..4d0a821068 100644 --- a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -94,7 +94,7 @@ public void shouldUpdateJobIfPresent() { source, store, Optional.of(originalJob), - jobManager); + jobManager, 100L); JobInfo submittedJob = new JobInfo( "job", @@ -128,7 +128,7 @@ public void shouldCreateJobIfNotPresent() { JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); when(jobManager.startJob("job", Arrays.asList(featureSet1), store)).thenReturn("new_ext"); @@ -161,7 +161,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager); + Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager, 100L); when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); JobInfo expected = @@ -185,7 +185,7 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); when(jobManager.startJob("job", Arrays.asList(featureSet1), store)) @@ -211,8 +211,7 @@ public void shouldTimeout() { JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager)); - doReturn(0L).when(jobUpdateTask).getJobUpdateTimeoutSeconds(); + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 0L)); JobInfo actual = jobUpdateTask.call(); assertThat(actual, is(IsNull.nullValue())); } diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index c929677af6..ec53db8660 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -38,6 +38,7 @@ import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; +import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.dao.FeatureSetRepository; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; @@ -62,16 +63,20 @@ public class JobCoordinatorServiceTest { @Mock SpecService specService; @Mock FeatureSetRepository featureSetRepository; + private JobUpdatesProperties jobUpdatesProperties; + @Before public void setUp() { initMocks(this); + jobUpdatesProperties = new JobUpdatesProperties(); + jobUpdatesProperties.setTimeoutSeconds(5); } @Test public void shouldDoNothingIfNoStoresFound() { when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -91,7 +96,7 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) .thenReturn(ListFeatureSetsResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -138,7 +143,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(1)).saveAndFlush(jobInfoArgCaptor.capture()); JobInfo actual = jobInfoArgCaptor.getValue(); @@ -206,7 +211,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager); + new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(2)).saveAndFlush(jobInfoArgCaptor.capture()); From f4f97d8b129d47c05d0a40aece681a5b7fa4a077 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Mon, 16 Dec 2019 19:01:38 +0800 Subject: [PATCH 04/12] Set job update timeout --- .prow/scripts/test-end-to-end-batch.sh | 2 ++ .prow/scripts/test-end-to-end.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index b370c5b045..235a8da199 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -108,6 +108,8 @@ feast: jobs: runner: DirectRunner options: {} + updates: + timeoutSeconds: 240 metrics: enabled: false diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 2c6f4a098f..c012ef1ae7 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -91,6 +91,8 @@ feast: jobs: runner: DirectRunner options: {} + updates: + timeoutSeconds: 240 metrics: enabled: false From ae41473ac36ca9f5f632931421b494b13fb2c77f Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 12:02:31 +0800 Subject: [PATCH 05/12] Change JobInfo to Job, move Job object instantiation to JobManagers --- .../java/feast/core/config/JobConfig.java | 4 +- .../feast/core/dao/JobInfoRepository.java | 10 +- .../main/java/feast/core/job/JobManager.java | 16 ++-- .../java/feast/core/job/JobUpdateTask.java | 77 +++++++-------- .../core/job/dataflow/DataflowJobManager.java | 69 ++++++++++---- .../job/direct/DirectRunnerJobManager.java | 55 +++++++---- .../core/model/{JobInfo.java => Job.java} | 8 +- .../main/java/feast/core/model/Metrics.java | 6 +- .../core/service/JobCoordinatorService.java | 25 +++-- .../feast/core/job/JobUpdateTaskTest.java | 64 +++++++------ .../job/dataflow/DataflowJobManagerTest.java | 30 +++++- .../direct/DirectRunnerJobManagerTest.java | 20 +++- .../service/JobCoordinatorServiceTest.java | 93 ++++++++++--------- 13 files changed, 289 insertions(+), 188 deletions(-) rename core/src/main/java/feast/core/model/{JobInfo.java => Job.java} (94%) diff --git a/core/src/main/java/feast/core/config/JobConfig.java b/core/src/main/java/feast/core/config/JobConfig.java index 238ae184d6..728fc0545b 100644 --- a/core/src/main/java/feast/core/config/JobConfig.java +++ b/core/src/main/java/feast/core/config/JobConfig.java @@ -100,9 +100,7 @@ public DirectJobRegistry directJobRegistry() { return new DirectJobRegistry(); } - /** - * Extracts job update options from feast core options. - */ + /** Extracts job update options from feast core options. */ @Bean public JobUpdatesProperties jobUpdatesProperties(FeastProperties feastProperties) { return feastProperties.getJobs().getUpdates(); diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobInfoRepository.java index 97de53ecf2..b670243d0e 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobInfoRepository.java @@ -16,17 +16,17 @@ */ package feast.core.dao; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import java.util.Collection; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; -/** JPA repository supplying JobInfo objects keyed by ID. */ +/** JPA repository supplying Job objects keyed by ID. */ @Repository -public interface JobInfoRepository extends JpaRepository { - List findByStatusNotIn(Collection statuses); +public interface JobInfoRepository extends JpaRepository { + List findByStatusNotIn(Collection statuses); - List findBySourceIdAndStoreNameOrderByLastUpdatedDesc(String sourceId, String storeName); + List findBySourceIdAndStoreNameOrderByLastUpdatedDesc(String sourceId, String storeName); } diff --git a/core/src/main/java/feast/core/job/JobManager.java b/core/src/main/java/feast/core/job/JobManager.java index d48ee77c3f..21298d20fc 100644 --- a/core/src/main/java/feast/core/job/JobManager.java +++ b/core/src/main/java/feast/core/job/JobManager.java @@ -17,8 +17,9 @@ package feast.core.job; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto.Source; import feast.core.StoreProto.Store; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import java.util.List; @@ -36,18 +37,19 @@ public interface JobManager { * * @param name of job to run * @param featureSets list of featureSets to be populated by the job + * @param source Source to retrieve features from * @param sink Store to sink features to - * @return runner specific job id + * @return Job */ - String startJob(String name, List featureSets, Store sink); + Job startJob(String name, List featureSets, Source source, Store sink); /** * Update already running job with new set of features to ingest. * - * @param jobInfo jobInfo of target job to change - * @return job runner specific job id + * @param job job of target job to change + * @return Job */ - String updateJob(JobInfo jobInfo); + Job updateJob(Job job); /** * Abort a job given runner-specific job ID. @@ -62,5 +64,5 @@ public interface JobManager { * @param job job. * @return job status. */ - JobStatus getJobStatus(JobInfo job); + JobStatus getJobStatus(Job job); } diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java index 6adf9ac68b..a5b45e621c 100644 --- a/core/src/main/java/feast/core/job/JobUpdateTask.java +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -23,7 +23,7 @@ import feast.core.log.AuditLogger; import feast.core.log.Resource; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import java.time.Instant; @@ -45,16 +45,16 @@ * JobUpdateTask is a callable that starts or updates a job given a set of featureSetSpecs, as well * as their source and sink. * - *

When complete, the JobUpdateTask returns the updated JobInfo object to be pushed to the db. + *

When complete, the JobUpdateTask returns the updated Job object to be pushed to the db. */ @Slf4j @Getter -public class JobUpdateTask implements Callable { +public class JobUpdateTask implements Callable { private final List featureSetSpecs; private final SourceProto.Source sourceSpec; private final StoreProto.Store store; - private final Optional originalJob; + private final Optional currentJob; private JobManager jobManager; private long jobUpdateTimeoutSeconds; @@ -62,26 +62,26 @@ public JobUpdateTask( List featureSetSpecs, SourceProto.Source sourceSpec, StoreProto.Store store, - Optional originalJob, + Optional currentJob, JobManager jobManager, long jobUpdateTimeoutSeconds) { this.featureSetSpecs = featureSetSpecs; this.sourceSpec = sourceSpec; this.store = store; - this.originalJob = originalJob; + this.currentJob = currentJob; this.jobManager = jobManager; this.jobUpdateTimeoutSeconds = jobUpdateTimeoutSeconds; } @Override - public JobInfo call() { + public Job call() { ExecutorService executorService = Executors.newSingleThreadExecutor(); Source source = Source.fromProto(sourceSpec); - Future submittedJob; - if (originalJob.isPresent()) { + Future submittedJob; + if (currentJob.isPresent()) { Set existingFeatureSetsPopulatedByJob = - originalJob.get().getFeatureSets().stream() + currentJob.get().getFeatureSets().stream() .map(FeatureSet::getId) .collect(Collectors.toSet()); Set newFeatureSetsPopulatedByJob = @@ -90,7 +90,7 @@ public JobInfo call() { .collect(Collectors.toSet()); if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { - JobInfo job = originalJob.get(); + Job job = currentJob.get(); JobStatus newJobStatus = jobManager.getJobStatus(job); if (newJobStatus != job.getStatus()) { AuditLogger.log( @@ -105,7 +105,7 @@ public JobInfo call() { return job; } else { submittedJob = - executorService.submit(() -> updateJob(originalJob.get(), featureSetSpecs, store)); + executorService.submit(() -> updateJob(currentJob.get(), featureSetSpecs, store)); } } else { String jobId = createJobId(source.getId(), store.getName()); @@ -113,7 +113,7 @@ public JobInfo call() { executorService.submit(() -> startJob(jobId, featureSetSpecs, sourceSpec, store)); } - JobInfo job = null; + Job job = null; try { job = submittedJob.get(getJobUpdateTimeoutSeconds(), TimeUnit.SECONDS); } catch (InterruptedException | ExecutionException | TimeoutException e) { @@ -124,21 +124,12 @@ public JobInfo call() { } /** Start or update the job to ingest data to the sink. */ - private JobInfo startJob( + private Job startJob( String jobId, List featureSetSpecs, SourceProto.Source source, StoreProto.Store sinkSpec) { - List featureSets = - featureSetSpecs.stream() - .map( - spec -> { - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(spec.getName() + ":" + spec.getVersion()); - return featureSet; - }) - .collect(Collectors.toList()); String extId = ""; try { AuditLogger.log( @@ -148,8 +139,8 @@ private JobInfo startJob( "Building graph and submitting to %s", jobManager.getRunnerType().getName()); - extId = jobManager.startJob(jobId, featureSetSpecs, sinkSpec); - if (extId.isEmpty()) { + Job job = jobManager.startJob(jobId, featureSetSpecs, sourceSpec, sinkSpec); + if (job.getExtId().isEmpty()) { throw new RuntimeException( String.format("Could not submit job: \n%s", "unable to retrieve job external id")); } @@ -162,14 +153,7 @@ private JobInfo startJob( jobManager.getRunnerType().getName(), extId); - return new JobInfo( - jobId, - extId, - jobManager.getRunnerType().getName(), - feast.core.model.Source.fromProto(source), - feast.core.model.Store.fromProto(sinkSpec), - featureSets, - JobStatus.RUNNING); + return job; } catch (Exception e) { AuditLogger.log( Resource.JOB, @@ -178,7 +162,17 @@ private JobInfo startJob( "Job failed to be submitted to runner %s. Job status changed to ERROR.", jobManager.getRunnerType().getName()); - return new JobInfo( + List featureSets = + featureSetSpecs.stream() + .map( + spec -> { + FeatureSet featureSet = new FeatureSet(); + featureSet.setId(spec.getName() + ":" + spec.getVersion()); + return featureSet; + }) + .collect(Collectors.toList()); + + return new Job( jobId, extId, jobManager.getRunnerType().getName(), @@ -190,23 +184,20 @@ private JobInfo startJob( } /** Update the given job */ - private JobInfo updateJob( - JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { - jobInfo.setFeatureSets( + private Job updateJob(Job job, List featureSetSpecs, StoreProto.Store store) { + job.setFeatureSets( featureSetSpecs.stream() .map(spec -> FeatureSet.fromSpec(spec)) .collect(Collectors.toList())); - jobInfo.setStore(feast.core.model.Store.fromProto(store)); + job.setStore(feast.core.model.Store.fromProto(store)); AuditLogger.log( Resource.JOB, - jobInfo.getId(), + job.getId(), Action.UPDATE, "Updating job %s for runner %s", - jobInfo.getId(), + job.getId(), jobManager.getRunnerType().getName()); - String extId = jobManager.updateJob(jobInfo); - jobInfo.setExtId(extId); - return jobInfo; + return jobManager.updateJob(job); } String createJobId(String sourceId, String storeName) { diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index c66a0e03aa..7bc8f0c5e6 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -19,20 +19,22 @@ import static feast.core.util.PipelineUtil.detectClassPathResourcesToStage; import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.model.Job; import com.google.common.base.Strings; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.StoreProto.Store; +import feast.core.SourceProto; +import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -41,6 +43,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.dataflow.DataflowPipelineJob; import org.apache.beam.runners.dataflow.DataflowRunner; @@ -73,27 +76,32 @@ public Runner getRunnerType() { } @Override - public String startJob(String name, List featureSets, Store sink) { - return submitDataflowJob(name, featureSets, sink, false); + public Job startJob( + String name, + List featureSets, + SourceProto.Source source, + StoreProto.Store sink) { + return submitDataflowJob(name, featureSets, source, sink, false); } /** * Update an existing Dataflow job. * - * @param jobInfo jobInfo of target job to change + * @param job job of target job to change * @return Dataflow-specific job id */ @Override - public String updateJob(JobInfo jobInfo) { + public Job updateJob(Job job) { try { List featureSetSpecs = new ArrayList<>(); - for (FeatureSet featureSet : jobInfo.getFeatureSets()) { + for (FeatureSet featureSet : job.getFeatureSets()) { featureSetSpecs.add(featureSet.toProto().getSpec()); } return submitDataflowJob( - jobInfo.getId(), featureSetSpecs, jobInfo.getStore().toProto(), true); + job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), true); + } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to update job %s", jobInfo.getId()), e); + throw new RuntimeException(String.format("Unable to update job %s", job.getId()), e); } } @@ -105,9 +113,10 @@ public String updateJob(JobInfo jobInfo) { @Override public void abortJob(String dataflowJobId) { try { - Job job = + com.google.api.services.dataflow.model.Job job = dataflow.projects().locations().jobs().get(projectId, location, dataflowJobId).execute(); - Job content = new Job(); + com.google.api.services.dataflow.model.Job content = + new com.google.api.services.dataflow.model.Job(); if (job.getType().equals(DataflowJobType.JOB_TYPE_BATCH.toString())) { content.setRequestedState(DataflowJobState.JOB_STATE_CANCELLED.toString()); } else if (job.getType().equals(DataflowJobType.JOB_TYPE_STREAMING.toString())) { @@ -129,17 +138,17 @@ public void abortJob(String dataflowJobId) { /** * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. * - * @param jobInfo JobInfo containing dataflow job id + * @param jobInfo Job containing dataflow job id * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. */ @Override - public JobStatus getJobStatus(JobInfo jobInfo) { + public JobStatus getJobStatus(Job jobInfo) { if (!Runner.DATAFLOW.getName().equals(jobInfo.getRunner())) { return jobInfo.getStatus(); } try { - Job job = + com.google.api.services.dataflow.model.Job job = dataflow .projects() .locations() @@ -156,13 +165,33 @@ public JobStatus getJobStatus(JobInfo jobInfo) { return JobStatus.UNKNOWN; } - private String submitDataflowJob( - String jobName, List featureSets, Store sink, boolean update) { + private Job submitDataflowJob( + String jobName, + List featureSetSpecs, + SourceProto.Source source, + StoreProto.Store sink, + boolean update) { try { - ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSets, sink, update); + ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSetSpecs, sink, update); DataflowPipelineJob pipelineResult = runPipeline(pipelineOptions); + List featureSets = + featureSetSpecs.stream() + .map( + spec -> { + FeatureSet featureSet = new FeatureSet(); + featureSet.setId(spec.getName() + ":" + spec.getVersion()); + return featureSet; + }) + .collect(Collectors.toList()); String jobId = waitForJobToRun(pipelineResult); - return jobId; + return new Job( + jobName, + jobId, + getRunnerType().getName(), + Source.fromProto(source), + Store.fromProto(sink), + featureSets, + JobStatus.PENDING); } catch (Exception e) { log.error("Error submitting job", e); throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); @@ -170,7 +199,7 @@ private String submitDataflowJob( } private ImportOptions getPipelineOptions( - String jobName, List featureSets, Store sink, boolean update) + String jobName, List featureSets, StoreProto.Store sink, boolean update) throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 77ef3b5935..57d832c98e 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -21,14 +21,17 @@ import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -37,6 +40,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -68,19 +72,39 @@ public Runner getRunnerType() { /** * Start a direct runner job. * - * @param name of job to run + * @param jobName of job to run * @param featureSetSpecs list of specs for featureSets to be populated by the job - * @param sinkSpec Store to sink features to + * @param source Source to retrieve features from + * @param sink Store to sink features to */ @Override - public String startJob( - String name, List featureSetSpecs, StoreProto.Store sinkSpec) { + public Job startJob( + String jobName, + List featureSetSpecs, + SourceProto.Source source, + StoreProto.Store sink) { + List featureSets = + featureSetSpecs.stream() + .map( + spec -> { + FeatureSet featureSet = new FeatureSet(); + featureSet.setId(spec.getName() + ":" + spec.getVersion()); + return featureSet; + }) + .collect(Collectors.toList()); try { - ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, sinkSpec); + ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, sink); PipelineResult pipelineResult = runPipeline(pipelineOptions); - DirectJob directJob = new DirectJob(name, pipelineResult); + DirectJob directJob = new DirectJob(jobName, pipelineResult); jobs.add(directJob); - return name; + return new Job( + jobName, + jobName, + getRunnerType().getName(), + Source.fromProto(source), + Store.fromProto(sink), + featureSets, + JobStatus.RUNNING); } catch (Exception e) { log.error("Error submitting job", e); throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); @@ -119,23 +143,22 @@ private ImportOptions getPipelineOptions( * *

As a rule of thumb, direct jobs in feast should only be used for testing. * - * @param jobInfo jobInfo of target job to change + * @param job job of target job to change * @return jobId of the job */ @Override - public String updateJob(JobInfo jobInfo) { - String jobId = jobInfo.getExtId(); + public Job updateJob(Job job) { + String jobId = job.getExtId(); abortJob(jobId); try { List featureSetSpecs = new ArrayList<>(); - for (FeatureSet featureSet : jobInfo.getFeatureSets()) { + for (FeatureSet featureSet : job.getFeatureSets()) { featureSetSpecs.add(featureSet.toProto().getSpec()); } - startJob(jobId, featureSetSpecs, jobInfo.getStore().toProto()); + return startJob(jobId, featureSetSpecs, job.getSource().toProto(), job.getStore().toProto()); } catch (JobExecutionException | InvalidProtocolBufferException e) { throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); } - return jobId; } /** @@ -163,11 +186,11 @@ public PipelineResult runPipeline(ImportOptions pipelineOptions) throws IOExcept * Gets the state of the direct runner job. Direct runner jobs only have 2 states: RUNNING and * ABORTED. * - * @param job JobInfo of the desired job. + * @param job Job of the desired job. * @return JobStatus of the job. */ @Override - public JobStatus getJobStatus(JobInfo job) { + public JobStatus getJobStatus(Job job) { DirectJob directJob = jobs.get(job.getId()); if (directJob == null) { return JobStatus.ABORTED; diff --git a/core/src/main/java/feast/core/model/JobInfo.java b/core/src/main/java/feast/core/model/Job.java similarity index 94% rename from core/src/main/java/feast/core/model/JobInfo.java rename to core/src/main/java/feast/core/model/Job.java index 74d3402af5..851e68367b 100644 --- a/core/src/main/java/feast/core/model/JobInfo.java +++ b/core/src/main/java/feast/core/model/Job.java @@ -39,7 +39,7 @@ @Setter @Entity @Table(name = "jobs") -public class JobInfo extends AbstractTimestampEntity { +public class Job extends AbstractTimestampEntity { // Internal job name. Generated by feast ingestion upon invocation. @Id private String id; @@ -71,18 +71,18 @@ public class JobInfo extends AbstractTimestampEntity { private List featureSets; // Job Metrics - @OneToMany(mappedBy = "jobInfo", cascade = CascadeType.ALL) + @OneToMany(mappedBy = "job", cascade = CascadeType.ALL) private List metrics; @Enumerated(EnumType.STRING) @Column(name = "status", length = 16) private JobStatus status; - public JobInfo() { + public Job() { super(); } - public JobInfo( + public Job( String id, String extId, String runner, diff --git a/core/src/main/java/feast/core/model/Metrics.java b/core/src/main/java/feast/core/model/Metrics.java index 1e25222baf..0b7514816f 100644 --- a/core/src/main/java/feast/core/model/Metrics.java +++ b/core/src/main/java/feast/core/model/Metrics.java @@ -41,7 +41,7 @@ public class Metrics extends AbstractTimestampEntity { @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "job_id") - private JobInfo jobInfo; + private Job job; /** Metrics name */ private String name; @@ -56,8 +56,8 @@ public class Metrics extends AbstractTimestampEntity { * @param metricsName metrics name. * @param value metrics value. */ - public Metrics(JobInfo job, String metricsName, double value) { - this.jobInfo = job; + public Metrics(Job job, String metricsName, double value) { + this.job = job; this.name = metricsName; this.value = value; } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index c386f86a2e..5e90be26ac 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -31,7 +31,7 @@ import feast.core.job.JobManager; import feast.core.job.JobUpdateTask; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; @@ -115,11 +115,16 @@ public void Poll() { .stream() .forEach( kv -> { - Optional originalJob = + Optional originalJob = getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); jobUpdateTasks.add( new JobUpdateTask( - kv.getValue(), kv.getKey(), store, originalJob, jobManager, jobUpdatesProperties.getTimeoutSeconds())); + kv.getValue(), + kv.getKey(), + store, + originalJob, + jobManager, + jobUpdatesProperties.getTimeoutSeconds())); }); } } catch (InvalidProtocolBufferException e) { @@ -133,15 +138,15 @@ public void Poll() { log.info("Creating/Updating {} jobs...", jobUpdateTasks.size()); ExecutorService executorService = Executors.newFixedThreadPool(jobUpdateTasks.size()); - ExecutorCompletionService ecs = new ExecutorCompletionService<>(executorService); + ExecutorCompletionService ecs = new ExecutorCompletionService<>(executorService); jobUpdateTasks.forEach(ecs::submit); int completedTasks = 0; while (completedTasks < jobUpdateTasks.size()) { try { - JobInfo jobInfo = ecs.take().get(); - if (jobInfo != null) { - jobInfoRepository.saveAndFlush(jobInfo); + Job job = ecs.take().get(); + if (job != null) { + jobInfoRepository.saveAndFlush(job); } } catch (ExecutionException | InterruptedException e) { log.warn("Unable to start or update job: {}", e.getMessage()); @@ -158,7 +163,7 @@ private void updateFeatureSetStatuses(List jobUpdateTasks) { Set ready = new HashSet<>(); Set pending = new HashSet<>(); for (JobUpdateTask jobUpdateTask : jobUpdateTasks) { - Optional job = + Optional job = getJob( Source.fromProto(jobUpdateTask.getSourceSpec()), Store.fromProto(jobUpdateTask.getStore())); @@ -185,8 +190,8 @@ private void updateFeatureSetStatuses(List jobUpdateTasks) { } @Transactional - public Optional getJob(Source source, Store store) { - List jobs = + public Optional getJob(Source source, Store store) { + List jobs = jobInfoRepository.findBySourceIdAndStoreNameOrderByLastUpdatedDesc( source.getId(), store.getName()); jobs = diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java index 4d0a821068..5441cca0b3 100644 --- a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -33,7 +33,7 @@ import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; @@ -79,8 +79,8 @@ public void shouldUpdateJobIfPresent() { FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); FeatureSetSpec featureSet2 = FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source).build(); - JobInfo originalJob = - new JobInfo( + Job originalJob = + new Job( "job", "old_ext", Runner.DATAFLOW.getName(), @@ -94,9 +94,10 @@ public void shouldUpdateJobIfPresent() { source, store, Optional.of(originalJob), - jobManager, 100L); - JobInfo submittedJob = - new JobInfo( + jobManager, + 100L); + Job submittedJob = + new Job( "job", "old_ext", Runner.DATAFLOW.getName(), @@ -105,18 +106,18 @@ public void shouldUpdateJobIfPresent() { Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), JobStatus.RUNNING); - when(jobManager.updateJob(submittedJob)).thenReturn("new_ext"); - when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); - JobInfo expected = - new JobInfo( + Job expected = + new Job( "job", "new_ext", Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), - JobStatus.RUNNING); - JobInfo actual = jobUpdateTask.call(); + JobStatus.PENDING); + when(jobManager.updateJob(submittedJob)).thenReturn(expected); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + Job actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); } @@ -130,11 +131,9 @@ public void shouldCreateJobIfNotPresent() { new JobUpdateTask( Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); - when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); - when(jobManager.startJob("job", Arrays.asList(featureSet1), store)).thenReturn("new_ext"); - JobInfo expected = - new JobInfo( + Job expected = + new Job( "job", "ext", Runner.DATAFLOW.getName(), @@ -142,7 +141,12 @@ public void shouldCreateJobIfNotPresent() { feast.core.model.Store.fromProto(store), Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.RUNNING); - JobInfo actual = jobUpdateTask.call(); + + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob("job", Arrays.asList(featureSet1), source, store)) + .thenReturn(expected); + + Job actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); } @@ -150,8 +154,8 @@ public void shouldCreateJobIfNotPresent() { public void shouldUpdateJobStatusIfNotCreateOrUpdate() { FeatureSetSpec featureSet1 = FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); - JobInfo originalJob = - new JobInfo( + Job originalJob = + new Job( "job", "ext", Runner.DATAFLOW.getName(), @@ -164,8 +168,8 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager, 100L); when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); - JobInfo expected = - new JobInfo( + Job expected = + new Job( "job", "ext", Runner.DATAFLOW.getName(), @@ -173,7 +177,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Store.fromProto(store), Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.ABORTING); - JobInfo actual = jobUpdateTask.call(); + Job actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); } @@ -187,12 +191,9 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { new JobUpdateTask( Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); - when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); - when(jobManager.startJob("job", Arrays.asList(featureSet1), store)) - .thenThrow(new RuntimeException("Something went wrong")); - JobInfo expected = - new JobInfo( + Job expected = + new Job( "job", "", Runner.DATAFLOW.getName(), @@ -200,7 +201,12 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { feast.core.model.Store.fromProto(store), Arrays.asList(FeatureSet.fromSpec(featureSet1)), JobStatus.ERROR); - JobInfo actual = jobUpdateTask.call(); + + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob("job", Arrays.asList(featureSet1), source, store)) + .thenThrow(new RuntimeException("Something went wrong")); + + Job actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); } @@ -212,7 +218,7 @@ public void shouldTimeout() { spy( new JobUpdateTask( Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 0L)); - JobInfo actual = jobUpdateTask.call(); + Job actual = jobUpdateTask.call(); assertThat(actual, is(IsNull.nullValue())); } } diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index c2c47a8d03..8c34dfe186 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -31,11 +31,15 @@ import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; +import feast.core.model.Job; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -82,6 +86,16 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) .build(); + SourceProto.Source source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + FeatureSetSpec featureSetSpec = FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); @@ -108,7 +122,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { when(mockPipelineResult.getJobId()).thenReturn(expectedExtJobId); doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); - String jobId = dfJobManager.startJob(jobName, Lists.newArrayList(featureSetSpec), store); + Job job = dfJobManager.startJob(jobName, Lists.newArrayList(featureSetSpec), source, store); verify(dfJobManager, times(1)).runPipeline(captor.capture()); ImportOptions actualPipelineOptions = captor.getValue(); @@ -129,7 +143,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setFilesToStage(actualPipelineOptions.getFilesToStage()); assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); - assertThat(jobId, equalTo(expectedExtJobId)); + assertThat(job.getExtId(), equalTo(expectedExtJobId)); } @Test @@ -141,6 +155,16 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) .build(); + SourceProto.Source source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + FeatureSetSpec featureSetSpec = FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); @@ -152,6 +176,6 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); expectedException.expect(JobExecutionException.class); - dfJobManager.startJob("job", Lists.newArrayList(featureSetSpec), store); + dfJobManager.startJob("job", Lists.newArrayList(featureSetSpec), source, store); } } diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index f78060269c..d493bf330a 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -29,10 +29,15 @@ import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.Source; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.config.FeastProperties.MetricsProperties; +import feast.core.model.Job; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -76,6 +81,16 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) .build(); + SourceProto.Source source = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + FeatureSetSpec featureSetSpec = FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); @@ -100,7 +115,8 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { PipelineResult mockPipelineResult = Mockito.mock(PipelineResult.class); doReturn(mockPipelineResult).when(drJobManager).runPipeline(any()); - String jobId = drJobManager.startJob(expectedJobId, Lists.newArrayList(featureSetSpec), store); + Job job = + drJobManager.startJob(expectedJobId, Lists.newArrayList(featureSetSpec), source, store); verify(drJobManager, times(1)).runPipeline(pipelineOptionsCaptor.capture()); verify(directJobRegistry, times(1)).add(directJobCaptor.capture()); @@ -112,7 +128,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult)); assertThat(jobStarted.getJobId(), equalTo(expectedJobId)); - assertThat(jobId, equalTo(expectedJobId)); + assertThat(job.getExtId(), equalTo(expectedJobId)); } @Test diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index ec53db8660..12af759704 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -44,7 +44,7 @@ import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import java.util.Arrays; import java.util.List; @@ -76,7 +76,8 @@ public void setUp() { public void shouldDoNothingIfNoStoresFound() { when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + new JobCoordinatorService( + jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -96,7 +97,8 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) .thenReturn(ListFeatureSetsResponse.newBuilder().build()); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + new JobCoordinatorService( + jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(0)).saveAndFlush(any()); } @@ -126,7 +128,17 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep FeatureSetSpec featureSet2 = FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source).build(); String extId = "ext"; - ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(JobInfo.class); + ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); + + Job expected = + new Job( + "some_id", + extId, + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), + JobStatus.RUNNING); when(specService.listFeatureSets( Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) @@ -138,24 +150,17 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1, featureSet2)), eq(store))) - .thenReturn(extId); + when(jobManager.startJob( + any(), eq(Arrays.asList(featureSet1, featureSet2)), eq(source), eq(store))) + .thenReturn(expected); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + new JobCoordinatorService( + jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(1)).saveAndFlush(jobInfoArgCaptor.capture()); - JobInfo actual = jobInfoArgCaptor.getValue(); - JobInfo expected = - new JobInfo( - actual.getId(), - extId, - Runner.DATAFLOW.getName(), - feast.core.model.Source.fromProto(source), - feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), - JobStatus.RUNNING); + Job actual = jobInfoArgCaptor.getValue(); assertThat(actual, equalTo(expected)); } @@ -192,9 +197,26 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1).build(); FeatureSetSpec featureSet2 = FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2).build(); - String extId1 = "ext1"; - String extId2 = "ext2"; - ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(JobInfo.class); + Job expected1 = + new Job( + "name1", + "extId1", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source1), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromSpec(featureSet1)), + JobStatus.RUNNING); + + Job expected2 = + new Job( + "name2", + "extId2", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source2), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromSpec(featureSet2)), + JobStatus.RUNNING); + ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); when(specService.listFeatureSets( Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) @@ -206,36 +228,21 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(store))).thenReturn(extId1); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(store))).thenReturn(extId2); + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(source1), eq(store))) + .thenReturn(expected1); + when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(source2), eq(store))) + .thenReturn(expected2); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = - new JobCoordinatorService(jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + new JobCoordinatorService( + jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); verify(jobInfoRepository, times(2)).saveAndFlush(jobInfoArgCaptor.capture()); - List actual = jobInfoArgCaptor.getAllValues(); - JobInfo expected1 = - new JobInfo( - actual.get(0).getId(), - extId1, - Runner.DATAFLOW.getName(), - feast.core.model.Source.fromProto(source1), - feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), - JobStatus.RUNNING); - assertThat(actual.get(0), equalTo(expected1)); + List actual = jobInfoArgCaptor.getAllValues(); - JobInfo expected2 = - new JobInfo( - actual.get(1).getId(), - extId2, - Runner.DATAFLOW.getName(), - feast.core.model.Source.fromProto(source2), - feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet2)), - JobStatus.RUNNING); + assertThat(actual.get(0), equalTo(expected1)); assertThat(actual.get(1), equalTo(expected2)); } } From ff7657a820df9df23c18a84aa32674215cdbfac2 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 12:28:43 +0800 Subject: [PATCH 06/12] Increase kafka wait time --- .prow/scripts/test-end-to-end-batch.sh | 2 +- .prow/scripts/test-end-to-end.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index 235a8da199..bafb28506c 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -76,7 +76,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 10 +sleep 15 tail -n10 /var/log/kafka.log echo " diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index c012ef1ae7..9133d31682 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -59,7 +59,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 10 +sleep 15 tail -n10 /var/log/kafka.log echo " From 29b7eb5496813bfccc770fdc3b476756a8fa6a87 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 12:35:55 +0800 Subject: [PATCH 07/12] Remove Info from method --- core/src/main/java/feast/core/dao/MetricsRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/feast/core/dao/MetricsRepository.java b/core/src/main/java/feast/core/dao/MetricsRepository.java index c7bc483697..7146e1e3ec 100644 --- a/core/src/main/java/feast/core/dao/MetricsRepository.java +++ b/core/src/main/java/feast/core/dao/MetricsRepository.java @@ -23,5 +23,5 @@ @Repository public interface MetricsRepository extends JpaRepository { - List findByJobInfo_Id(String id); + List findByJob_Id(String id); } From 7109130b1d5177fdb756e9a34b40d96de0000ee2 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 13:46:09 +0800 Subject: [PATCH 08/12] Update feature set yamls to follow new format --- sdk/python/feast/feature_set.py | 4 +- .../all_types_parquet/all_types_parquet.yaml | 61 ++++++++++--------- tests/e2e/basic/cust_trans_fs.yaml | 21 ++++--- .../e2e/large_volume/cust_trans_large_fs.yaml | 23 +++---- 4 files changed, 56 insertions(+), 53 deletions(-) diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index cea36c3274..02c9e90cf2 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -425,7 +425,7 @@ def update_from_feature_set(self, feature_set): self.entities = feature_set.entities self.source = feature_set.source self.status = feature_set.status - self._created_timestamp = feature_set.created_timestamp + self.created_timestamp = feature_set.created_timestamp def get_kafka_source_brokers(self) -> str: """ @@ -481,7 +481,7 @@ def from_dict(cls, fs_dict): if ("kind" not in fs_dict) and (fs_dict["kind"].strip() != "feature_set"): raise Exception(f"Resource kind is not a feature set {str(fs_dict)}") feature_set_proto = json_format.ParseDict( - fs_dict, FeatureSetSpecProto(), ignore_unknown_fields=True + fs_dict, FeatureSetProto(), ignore_unknown_fields=True ) return cls.from_proto(feature_set_proto) diff --git a/tests/e2e/all_types_parquet/all_types_parquet.yaml b/tests/e2e/all_types_parquet/all_types_parquet.yaml index 85dd8c2c22..cf5ea70235 100644 --- a/tests/e2e/all_types_parquet/all_types_parquet.yaml +++ b/tests/e2e/all_types_parquet/all_types_parquet.yaml @@ -1,31 +1,32 @@ -name: all_types_parquet kind: feature_set -entities: - - name: customer_id - valueType: INT64 -features: - - name: int32_feature - valueType: INT64 - - name: int64_feature - valueType: INT64 - - name: float_feature - valueType: DOUBLE - - name: double_feature - valueType: DOUBLE - - name: string_feature - valueType: STRING - - name: bytes_feature - valueType: BYTES - - name: int32_list_feature - valueType: INT64_LIST - - name: int64_list_feature - valueType: INT64_LIST - - name: float_list_feature - valueType: DOUBLE_LIST - - name: double_list_feature - valueType: DOUBLE_LIST - - name: string_list_feature - valueType: STRING_LIST - - name: bytes_list_feature - valueType: BYTES_LIST -maxAge: 0s +spec: + name: all_types_parquet + entities: + - name: customer_id + valueType: INT64 + features: + - name: int32_feature + valueType: INT64 + - name: int64_feature + valueType: INT64 + - name: float_feature + valueType: DOUBLE + - name: double_feature + valueType: DOUBLE + - name: string_feature + valueType: STRING + - name: bytes_feature + valueType: BYTES + - name: int32_list_feature + valueType: INT64_LIST + - name: int64_list_feature + valueType: INT64_LIST + - name: float_list_feature + valueType: DOUBLE_LIST + - name: double_list_feature + valueType: DOUBLE_LIST + - name: string_list_feature + valueType: STRING_LIST + - name: bytes_list_feature + valueType: BYTES_LIST + maxAge: 0s diff --git a/tests/e2e/basic/cust_trans_fs.yaml b/tests/e2e/basic/cust_trans_fs.yaml index e72ee616eb..14d46794a6 100644 --- a/tests/e2e/basic/cust_trans_fs.yaml +++ b/tests/e2e/basic/cust_trans_fs.yaml @@ -1,11 +1,12 @@ -name: customer_transactions kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s +spec: + name: customer_transactions + entities: + - name: customer_id + valueType: INT64 + features: + - name: daily_transactions + valueType: FLOAT + - name: total_transactions + valueType: FLOAT + maxAge: 3600s diff --git a/tests/e2e/large_volume/cust_trans_large_fs.yaml b/tests/e2e/large_volume/cust_trans_large_fs.yaml index 04707412aa..0ea0a12427 100644 --- a/tests/e2e/large_volume/cust_trans_large_fs.yaml +++ b/tests/e2e/large_volume/cust_trans_large_fs.yaml @@ -1,11 +1,12 @@ -name: customer_transactions_large -kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s +spec: + name: customer_transactions_large + kind: feature_set + entities: + - name: customer_id + valueType: INT64 + features: + - name: daily_transactions + valueType: FLOAT + - name: total_transactions + valueType: FLOAT + maxAge: 3600s From ed4129485ca4a54451f5d05f0e10c534874386d8 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 15:26:26 +0800 Subject: [PATCH 09/12] Change toSpec to toProto, refactor job start signature, change Apply to take full FeatureSet object --- .../main/java/feast/core/job/JobManager.java | 11 +-- .../java/feast/core/job/JobUpdateTask.java | 48 ++++----- .../core/job/dataflow/DataflowJobManager.java | 26 +++-- .../job/direct/DirectRunnerJobManager.java | 46 +++------ .../java/feast/core/model/FeatureSet.java | 11 ++- .../core/service/JobCoordinatorService.java | 2 +- .../java/feast/core/service/SpecService.java | 11 ++- .../test/java/feast/core/job/JobMatcher.java | 38 +++++++ .../feast/core/job/JobUpdateTaskTest.java | 98 ++++++++++++++----- .../job/dataflow/DataflowJobManagerTest.java | 47 ++++++++- .../direct/DirectRunnerJobManagerTest.java | 33 ++++++- .../service/JobCoordinatorServiceTest.java | 86 +++++++++++----- .../feast/core/service/SpecServiceTest.java | 57 +++++++---- protos/feast/core/CoreService.proto | 2 +- sdk/python/feast/client.py | 2 +- sdk/python/feast/core/CoreService_pb2.py | 38 +++---- sdk/python/feast/core/CoreService_pb2.pyi | 5 +- sdk/python/tests/feast_core_server.py | 22 ++--- 18 files changed, 391 insertions(+), 192 deletions(-) create mode 100644 core/src/test/java/feast/core/job/JobMatcher.java diff --git a/core/src/main/java/feast/core/job/JobManager.java b/core/src/main/java/feast/core/job/JobManager.java index 21298d20fc..99880cdb76 100644 --- a/core/src/main/java/feast/core/job/JobManager.java +++ b/core/src/main/java/feast/core/job/JobManager.java @@ -16,12 +16,8 @@ */ package feast.core.job; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto.Source; -import feast.core.StoreProto.Store; import feast.core.model.Job; import feast.core.model.JobStatus; -import java.util.List; public interface JobManager { @@ -35,13 +31,10 @@ public interface JobManager { /** * Start an import job. * - * @param name of job to run - * @param featureSets list of featureSets to be populated by the job - * @param source Source to retrieve features from - * @param sink Store to sink features to + * @param job job to start * @return Job */ - Job startJob(String name, List featureSets, Source source, Store sink); + Job startJob(Job job); /** * Update already running job with new set of features to ingest. diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java index a5b45e621c..373a4a113d 100644 --- a/core/src/main/java/feast/core/job/JobUpdateTask.java +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -16,6 +16,7 @@ */ package feast.core.job; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.StoreProto; @@ -26,6 +27,7 @@ import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; +import feast.core.model.Store; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -130,7 +132,22 @@ private Job startJob( SourceProto.Source source, StoreProto.Store sinkSpec) { - String extId = ""; + List featureSets = + featureSetSpecs.stream() + .map( + spec -> + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) + .collect(Collectors.toList()); + Job job = + new Job( + jobId, + "", + jobManager.getRunnerType().toString(), + Source.fromProto(source), + Store.fromProto(sinkSpec), + featureSets, + JobStatus.PENDING); try { AuditLogger.log( Resource.JOB, @@ -139,7 +156,7 @@ private Job startJob( "Building graph and submitting to %s", jobManager.getRunnerType().getName()); - Job job = jobManager.startJob(jobId, featureSetSpecs, sourceSpec, sinkSpec); + job = jobManager.startJob(job); if (job.getExtId().isEmpty()) { throw new RuntimeException( String.format("Could not submit job: \n%s", "unable to retrieve job external id")); @@ -151,7 +168,7 @@ private Job startJob( Action.STATUS_CHANGE, "Job submitted to runner %s with ext id %s.", jobManager.getRunnerType().getName(), - extId); + job.getExtId()); return job; } catch (Exception e) { @@ -162,24 +179,8 @@ private Job startJob( "Job failed to be submitted to runner %s. Job status changed to ERROR.", jobManager.getRunnerType().getName()); - List featureSets = - featureSetSpecs.stream() - .map( - spec -> { - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(spec.getName() + ":" + spec.getVersion()); - return featureSet; - }) - .collect(Collectors.toList()); - - return new Job( - jobId, - extId, - jobManager.getRunnerType().getName(), - feast.core.model.Source.fromProto(source), - feast.core.model.Store.fromProto(sinkSpec), - featureSets, - JobStatus.ERROR); + job.setStatus(JobStatus.ERROR); + return job; } } @@ -187,7 +188,10 @@ private Job startJob( private Job updateJob(Job job, List featureSetSpecs, StoreProto.Store store) { job.setFeatureSets( featureSetSpecs.stream() - .map(spec -> FeatureSet.fromSpec(spec)) + .map( + spec -> + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) .collect(Collectors.toList())); job.setStore(feast.core.model.Store.fromProto(store)); AuditLogger.log( diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 7bc8f0c5e6..91761ef18a 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -76,12 +76,17 @@ public Runner getRunnerType() { } @Override - public Job startJob( - String name, - List featureSets, - SourceProto.Source source, - StoreProto.Store sink) { - return submitDataflowJob(name, featureSets, source, sink, false); + public Job startJob(Job job) { + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + try { + return submitDataflowJob( + job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), false); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(String.format("Unable to start job %s", job.getId()), e); + } } /** @@ -93,10 +98,11 @@ public Job startJob( @Override public Job updateJob(Job job) { try { - List featureSetSpecs = new ArrayList<>(); - for (FeatureSet featureSet : job.getFeatureSets()) { - featureSetSpecs.add(featureSet.toProto().getSpec()); - } + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + return submitDataflowJob( job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), true); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 57d832c98e..89c6dc3848 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -21,7 +21,6 @@ import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; @@ -30,8 +29,6 @@ import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -72,39 +69,22 @@ public Runner getRunnerType() { /** * Start a direct runner job. * - * @param jobName of job to run - * @param featureSetSpecs list of specs for featureSets to be populated by the job - * @param source Source to retrieve features from - * @param sink Store to sink features to + * @param job Job to start */ @Override - public Job startJob( - String jobName, - List featureSetSpecs, - SourceProto.Source source, - StoreProto.Store sink) { - List featureSets = - featureSetSpecs.stream() - .map( - spec -> { - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(spec.getName() + ":" + spec.getVersion()); - return featureSet; - }) - .collect(Collectors.toList()); + public Job startJob(Job job) { try { - ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, sink); + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, job.getStore().toProto()); PipelineResult pipelineResult = runPipeline(pipelineOptions); - DirectJob directJob = new DirectJob(jobName, pipelineResult); + DirectJob directJob = new DirectJob(job.getId(), pipelineResult); jobs.add(directJob); - return new Job( - jobName, - jobName, - getRunnerType().getName(), - Source.fromProto(source), - Store.fromProto(sink), - featureSets, - JobStatus.RUNNING); + job.setExtId(job.getId()); + job.setStatus(JobStatus.RUNNING); + return job; } catch (Exception e) { log.error("Error submitting job", e); throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); @@ -155,8 +135,8 @@ public Job updateJob(Job job) { for (FeatureSet featureSet : job.getFeatureSets()) { featureSetSpecs.add(featureSet.toProto().getSpec()); } - return startJob(jobId, featureSetSpecs, job.getSource().toProto(), job.getStore().toProto()); - } catch (JobExecutionException | InvalidProtocolBufferException e) { + return startJob(job); + } catch (JobExecutionException e) { throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); } } diff --git a/core/src/main/java/feast/core/model/FeatureSet.java b/core/src/main/java/feast/core/model/FeatureSet.java index d6729469a7..388b27cb04 100644 --- a/core/src/main/java/feast/core/model/FeatureSet.java +++ b/core/src/main/java/feast/core/model/FeatureSet.java @@ -96,7 +96,8 @@ public FeatureSet( long maxAgeSeconds, List entities, List features, - Source source) { + Source source, + FeatureSetStatus status) { this.id = String.format("%s:%s", name, version); this.name = name; this.version = version; @@ -104,10 +105,11 @@ public FeatureSet( this.entities = entities; this.features = features; this.source = source; - this.status = FeatureSetStatus.STATUS_PENDING.toString(); + this.status = status.toString(); } - public static FeatureSet fromSpec(FeatureSetSpec featureSetSpec) { + public static FeatureSet fromProto(FeatureSetProto.FeatureSet featureSetProto) { + FeatureSetSpec featureSetSpec = featureSetProto.getSpec(); Source source = Source.fromProto(featureSetSpec.getSource()); String id = String.format("%s:%d", featureSetSpec.getName(), featureSetSpec.getVersion()); List features = new ArrayList<>(); @@ -125,7 +127,8 @@ public static FeatureSet fromSpec(FeatureSetSpec featureSetSpec) { featureSetSpec.getMaxAge().getSeconds(), entities, features, - source); + source, + featureSetProto.getMeta().getStatus()); } public FeatureSetProto.FeatureSet toProto() { diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 5e90be26ac..2a2bd327e7 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -83,7 +83,7 @@ public JobCoordinatorService( * *

2) Does a diff with the current set of jobs, starts/updates job(s) if necessary * - *

3) Updates job object in DB with status, feature sets\ + *

3) Updates job object in DB with status, feature sets * *

4) Updates Feature set statuses */ diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index 79cbbdc99c..937fc29717 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -32,6 +32,7 @@ import feast.core.CoreServiceProto.ListStoresResponse.Builder; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.StoreProto; @@ -214,10 +215,11 @@ public ListStoresResponse listStores(ListStoresRequest.Filter filter) { * this method will update the incoming featureSet spec with the latest version stored in the * repository, and return that. * - * @param newFeatureSetSpec featureSet to add. + * @param newFeatureSet featureSet to add. */ - public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) + public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFeatureSet) throws InvalidProtocolBufferException { + FeatureSetSpec newFeatureSetSpec = newFeatureSet.getSpec(); FeatureSetValidator.validateSpec(newFeatureSetSpec); List existingFeatureSets = featureSetRepository.findByName(newFeatureSetSpec.getName()); @@ -227,7 +229,7 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } else { existingFeatureSets = Ordering.natural().reverse().sortedCopy(existingFeatureSets); FeatureSet latest = existingFeatureSets.get(0); - FeatureSet featureSet = FeatureSet.fromSpec(newFeatureSetSpec); + FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); // If the featureSet remains unchanged, we do nothing. if (featureSet.equalTo(latest)) { @@ -238,7 +240,8 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } newFeatureSetSpec = newFeatureSetSpec.toBuilder().setVersion(latest.getVersion() + 1).build(); } - FeatureSet featureSet = FeatureSet.fromSpec(newFeatureSetSpec); + newFeatureSet = newFeatureSet.toBuilder().setSpec(newFeatureSetSpec).build(); + FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { featureSet.setSource(defaultSource); } diff --git a/core/src/test/java/feast/core/job/JobMatcher.java b/core/src/test/java/feast/core/job/JobMatcher.java new file mode 100644 index 0000000000..87be05668f --- /dev/null +++ b/core/src/test/java/feast/core/job/JobMatcher.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job; + +import feast.core.model.Job; +import org.mockito.ArgumentMatcher; + +public class JobMatcher implements ArgumentMatcher { + + private Job left; + + public JobMatcher(Job job) { + this.left = job; + } + + @Override + public boolean matches(Job right) { + if (right == null) { + return false; + } + left.setId(right.getId()); + return left.equals(right); + } +} diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java index 5441cca0b3..a1b4cdbab2 100644 --- a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -24,6 +24,7 @@ import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -75,10 +76,16 @@ public void setUp() { @Test public void shouldUpdateJobIfPresent() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); - FeatureSetSpec featureSet2 = - FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source)) + .build(); Job originalJob = new Job( "job", @@ -86,11 +93,11 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( - Arrays.asList(featureSet1, featureSet2), + Arrays.asList(featureSet1.getSpec(), featureSet2.getSpec()), source, store, Optional.of(originalJob), @@ -103,7 +110,7 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); Job expected = @@ -113,7 +120,7 @@ public void shouldUpdateJobIfPresent() { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), JobStatus.PENDING); when(jobManager.updateJob(submittedJob)).thenReturn(expected); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); @@ -124,14 +131,32 @@ public void shouldUpdateJobIfPresent() { @Test public void shouldCreateJobIfNotPresent() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.empty(), + jobManager, + 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + Job expectedInput = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + Job expected = new Job( "job", @@ -139,12 +164,11 @@ public void shouldCreateJobIfNotPresent() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.RUNNING); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); - when(jobManager.startJob("job", Arrays.asList(featureSet1), source, store)) - .thenReturn(expected); + when(jobManager.startJob(expectedInput)).thenReturn(expected); Job actual = jobUpdateTask.call(); assertThat(actual, equalTo(expected)); @@ -152,8 +176,11 @@ public void shouldCreateJobIfNotPresent() { @Test public void shouldUpdateJobStatusIfNotCreateOrUpdate() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); Job originalJob = new Job( "job", @@ -161,11 +188,16 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager, 100L); + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.of(originalJob), + jobManager, + 100L); when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); Job expected = @@ -175,7 +207,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.ABORTING); Job actual = jobUpdateTask.call(); @@ -184,14 +216,32 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { @Test public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.empty(), + jobManager, + 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + Job expectedInput = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + Job expected = new Job( "job", @@ -199,11 +249,11 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.ERROR); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); - when(jobManager.startJob("job", Arrays.asList(featureSet1), source, store)) + when(jobManager.startJob(expectedInput)) .thenThrow(new RuntimeException("Something went wrong")); Job actual = jobUpdateTask.call(); diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index 8c34dfe186..5f72f0dd7a 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -28,8 +28,10 @@ import com.google.api.services.dataflow.Dataflow; import com.google.common.collect.Lists; +import com.google.protobuf.Duration; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -37,9 +39,15 @@ import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; +import feast.core.job.Runner; +import feast.core.model.FeatureSet; import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -84,6 +92,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) .build(); SourceProto.Source source = @@ -97,7 +106,12 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .build(); FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setSource(source) + .setMaxAge(Duration.newBuilder().build()) + .build(); Printer printer = JsonFormat.printer(); String expectedExtJobId = "feast-job-0"; @@ -122,7 +136,18 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { when(mockPipelineResult.getJobId()).thenReturn(expectedExtJobId); doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); - Job job = dfJobManager.startJob(jobName, Lists.newArrayList(featureSetSpec), source, store); + Job job = + new Job( + jobName, + "", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + Job actual = dfJobManager.startJob(job); verify(dfJobManager, times(1)).runPipeline(captor.capture()); ImportOptions actualPipelineOptions = captor.getValue(); @@ -143,7 +168,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setFilesToStage(actualPipelineOptions.getFilesToStage()); assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); - assertThat(job.getExtId(), equalTo(expectedExtJobId)); + assertThat(actual.getExtId(), equalTo(expectedExtJobId)); } @Test @@ -166,7 +191,7 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { .build(); FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).setSource(source).build(); dfJobManager = Mockito.spy(dfJobManager); @@ -175,7 +200,19 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); + Job job = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + expectedException.expect(JobExecutionException.class); - dfJobManager.startJob("job", Lists.newArrayList(featureSetSpec), source, store); + dfJobManager.startJob(job); } } diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index d493bf330a..73cbd9030f 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -26,18 +26,25 @@ import static org.mockito.MockitoAnnotations.initMocks; import com.google.common.collect.Lists; +import com.google.protobuf.Duration; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.Source; import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; +import feast.core.job.Runner; +import feast.core.model.FeatureSet; import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -79,10 +86,11 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) .build(); SourceProto.Source source = - Source.newBuilder() + SourceProto.Source.newBuilder() .setType(SourceType.KAFKA) .setKafkaSourceConfig( KafkaSourceConfig.newBuilder() @@ -92,7 +100,12 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .build(); FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setMaxAge(Duration.newBuilder()) + .setSource(source) + .build(); Printer printer = JsonFormat.printer(); @@ -116,7 +129,17 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { doReturn(mockPipelineResult).when(drJobManager).runPipeline(any()); Job job = - drJobManager.startJob(expectedJobId, Lists.newArrayList(featureSetSpec), source, store); + new Job( + expectedJobId, + "", + Runner.DIRECT.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + Job actual = drJobManager.startJob(job); verify(drJobManager, times(1)).runPipeline(pipelineOptionsCaptor.capture()); verify(directJobRegistry, times(1)).add(directJobCaptor.capture()); @@ -128,7 +151,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult)); assertThat(jobStarted.getJobId(), equalTo(expectedJobId)); - assertThat(job.getExtId(), equalTo(expectedJobId)); + assertThat(actual.getExtId(), equalTo(expectedJobId)); } @Test diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 12af759704..033a657ead 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -19,7 +19,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -42,6 +42,7 @@ import feast.core.dao.FeatureSetRepository; import feast.core.dao.JobInfoRepository; import feast.core.job.JobManager; +import feast.core.job.JobMatcher; import feast.core.job.Runner; import feast.core.model.FeatureSet; import feast.core.model.Job; @@ -123,13 +124,29 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep .build()) .build(); - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source).build(); - FeatureSetSpec featureSet2 = - FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source)) + .build(); + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source)) + .build(); String extId = "ext"; ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); + Job expectedInput = + new Job( + "", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.PENDING); + Job expected = new Job( "some_id", @@ -137,22 +154,20 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1), FeatureSet.fromSpec(featureSet2)), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); when(specService.listFeatureSets( Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) .thenReturn( ListFeatureSetsResponse.newBuilder() - .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet1)) - .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet2)) + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) .build()); when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(jobManager.startJob( - any(), eq(Arrays.asList(featureSet1, featureSet2)), eq(source), eq(store))) - .thenReturn(expected); + when(jobManager.startJob(argThat(new JobMatcher(expectedInput)))).thenReturn(expected); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = @@ -193,10 +208,27 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { .build()) .build(); - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1).build(); - FeatureSetSpec featureSet2 = - FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1)) + .build(); + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2)) + .build(); + + Job expectedInput1 = + new Job( + "name1", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source1), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + Job expected1 = new Job( "name1", @@ -204,9 +236,19 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source1), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet1)), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.RUNNING); + Job expectedInput2 = + new Job( + "", + "extId2", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source2), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet2)), + JobStatus.PENDING); + Job expected2 = new Job( "name2", @@ -214,7 +256,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { Runner.DATAFLOW.getName(), feast.core.model.Source.fromProto(source2), feast.core.model.Store.fromProto(store), - Arrays.asList(FeatureSet.fromSpec(featureSet2)), + Arrays.asList(FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); @@ -222,16 +264,14 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) .thenReturn( ListFeatureSetsResponse.newBuilder() - .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet1)) - .addFeatureSets(FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSet2)) + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) .build()); when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet1)), eq(source1), eq(store))) - .thenReturn(expected1); - when(jobManager.startJob(any(), eq(Arrays.asList(featureSet2)), eq(source2), eq(store))) - .thenReturn(expected2); + when(jobManager.startJob(argThat(new JobMatcher(expectedInput1)))).thenReturn(expected1); + when(jobManager.startJob(argThat(new JobMatcher(expectedInput2)))).thenReturn(expected2); when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); JobCoordinatorService jcs = diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index d08880afc1..dbf1290fb6 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -37,6 +37,7 @@ import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.FeatureSetProto.FeatureSpec; import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.SourceType; @@ -103,7 +104,13 @@ public void setUp() { Field f3e1 = new Field("f3", "f3e1", Enum.STRING); FeatureSet featureSet3v1 = new FeatureSet( - "f3", 1, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource); + "f3", + 1, + 100L, + Arrays.asList(f3e1), + Arrays.asList(f3f2, f3f1), + defaultSource, + FeatureSetStatus.STATUS_READY); featureSets = Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); @@ -302,10 +309,11 @@ public void shouldThrowRetrievalExceptionIfNoStoresFoundWithName() { @Test public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHasNotChanged() throws InvalidProtocolBufferException { - FeatureSetSpec incomingFeatureSet = + FeatureSetSpec incomingFeatureSetSpec = featureSets.get(2).toProto().getSpec().toBuilder().clearVersion().build(); ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); verify(featureSetRepository, times(0)).save(ArgumentMatchers.any(FeatureSet.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); @@ -316,13 +324,15 @@ public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHa public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() throws InvalidProtocolBufferException { when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); - FeatureSetSpec incomingFeatureSet = + FeatureSetSpec incomingFeatureSetSpec = newDummyFeatureSet("f2", 1).toProto().getSpec().toBuilder().clearVersion().build(); + ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); FeatureSetSpec expected = - incomingFeatureSet.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); + incomingFeatureSetSpec.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); } @@ -342,7 +352,8 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() FeatureSetSpec expected = incomingFeatureSet.toBuilder().setVersion(4).setSource(defaultSource.toProto()).build(); ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(expected).build()); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); @@ -355,24 +366,30 @@ public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() Field f3f1 = new Field("f3", "f3f1", Enum.INT64); Field f3f2 = new Field("f3", "f3f2", Enum.INT64); Field f3e1 = new Field("f3", "f3e1", Enum.STRING); - FeatureSetProto.FeatureSetSpec incomingFeatureSet = + FeatureSetProto.FeatureSet incomingFeatureSet = (new FeatureSet( - "f3", 5, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource)) - .toProto() - .getSpec(); - - FeatureSetSpec expected = incomingFeatureSet; + "f3", + 5, + 100L, + Arrays.asList(f3e1), + Arrays.asList(f3f2, f3f1), + defaultSource, + FeatureSetStatus.STATUS_READY)) + .toProto(); + + FeatureSetProto.FeatureSet expected = incomingFeatureSet; ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); assertThat( applyFeatureSetResponse.getFeatureSet().getSpec().getMaxAge(), - equalTo(expected.getMaxAge())); + equalTo(expected.getSpec().getMaxAge())); assertThat( applyFeatureSetResponse.getFeatureSet().getSpec().getEntities(0), - equalTo(expected.getEntities(0))); + equalTo(expected.getSpec().getEntities(0))); assertThat( - applyFeatureSetResponse.getFeatureSet().getSpec().getName(), equalTo(expected.getName())); + applyFeatureSetResponse.getFeatureSet().getSpec().getName(), + equalTo(expected.getSpec().getName())); } @Test @@ -418,7 +435,13 @@ private FeatureSet newDummyFeatureSet(String name, int version) { Field entity = new Field(name, "entity", Enum.STRING); FeatureSet fs = new FeatureSet( - name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), defaultSource); + name, + version, + 100L, + Arrays.asList(entity), + Arrays.asList(feature), + defaultSource, + FeatureSetStatus.STATUS_READY); fs.setCreated(Date.from(Instant.ofEpochSecond(10L))); return fs; } diff --git a/protos/feast/core/CoreService.proto b/protos/feast/core/CoreService.proto index 1704623db9..9a9eaa64fd 100644 --- a/protos/feast/core/CoreService.proto +++ b/protos/feast/core/CoreService.proto @@ -113,7 +113,7 @@ message ListStoresResponse { message ApplyFeatureSetRequest { // Feature set version and source will be ignored - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; } message ApplyFeatureSetResponse { diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 3a78c51535..20df828a0e 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -241,7 +241,7 @@ def _apply_feature_set(self, feature_set: FeatureSet): # Convert the feature set to a request and send to Feast Core apply_fs_response = self._core_service_stub.ApplyFeatureSet( - ApplyFeatureSetRequest(feature_set=feature_set.to_proto().spec), + ApplyFeatureSetRequest(feature_set=feature_set.to_proto()), timeout=GRPC_CONNECTION_TIMEOUT_APPLY, ) # type: ApplyFeatureSetResponse diff --git a/sdk/python/feast/core/CoreService_pb2.py b/sdk/python/feast/core/CoreService_pb2.py index b54dc89221..3185bece28 100644 --- a/sdk/python/feast/core/CoreService_pb2.py +++ b/sdk/python/feast/core/CoreService_pb2.py @@ -22,7 +22,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\020CoreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"I\n\x16\x41pplyFeatureSetRequest\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"E\n\x16\x41pplyFeatureSetRequest\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , dependencies=[feast_dot_core_dot_FeatureSet__pb2.DESCRIPTOR,feast_dot_core_dot_Store__pb2.DESCRIPTOR,]) @@ -49,8 +49,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=809, - serialized_end=856, + serialized_start=805, + serialized_end=852, ) _sym_db.RegisterEnumDescriptor(_APPLYFEATURESETRESPONSE_STATUS) @@ -71,8 +71,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1106, - serialized_end=1142, + serialized_start=1102, + serialized_end=1138, ) _sym_db.RegisterEnumDescriptor(_UPDATESTORERESPONSE_STATUS) @@ -364,7 +364,7 @@ oneofs=[ ], serialized_start=601, - serialized_end=674, + serialized_end=670, ) @@ -402,8 +402,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=677, - serialized_end=856, + serialized_start=673, + serialized_end=852, ) @@ -426,8 +426,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=858, - serialized_end=886, + serialized_start=854, + serialized_end=882, ) @@ -457,8 +457,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=888, - serialized_end=934, + serialized_start=884, + serialized_end=930, ) @@ -488,8 +488,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=936, - serialized_end=990, + serialized_start=932, + serialized_end=986, ) @@ -527,8 +527,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=993, - serialized_end=1142, + serialized_start=989, + serialized_end=1138, ) _GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET @@ -538,7 +538,7 @@ _LISTSTORESREQUEST_FILTER.containing_type = _LISTSTORESREQUEST _LISTSTORESREQUEST.fields_by_name['filter'].message_type = _LISTSTORESREQUEST_FILTER _LISTSTORESRESPONSE.fields_by_name['store'].message_type = feast_dot_core_dot_Store__pb2._STORE -_APPLYFEATURESETREQUEST.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_APPLYFEATURESETREQUEST.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _APPLYFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _APPLYFEATURESETRESPONSE.fields_by_name['status'].enum_type = _APPLYFEATURESETRESPONSE_STATUS _APPLYFEATURESETRESPONSE_STATUS.containing_type = _APPLYFEATURESETRESPONSE @@ -669,8 +669,8 @@ file=DESCRIPTOR, index=0, serialized_options=None, - serialized_start=1145, - serialized_end=1689, + serialized_start=1141, + serialized_end=1685, methods=[ _descriptor.MethodDescriptor( name='GetFeastCoreVersion', diff --git a/sdk/python/feast/core/CoreService_pb2.pyi b/sdk/python/feast/core/CoreService_pb2.pyi index 6b83807aba..5cd6eaf671 100644 --- a/sdk/python/feast/core/CoreService_pb2.pyi +++ b/sdk/python/feast/core/CoreService_pb2.pyi @@ -2,7 +2,6 @@ import sys from feast.core.FeatureSet_pb2 import ( FeatureSet as feast___core___FeatureSet_pb2___FeatureSet, - FeatureSetSpec as feast___core___FeatureSet_pb2___FeatureSetSpec, ) from feast.core.Store_pb2 import ( @@ -196,11 +195,11 @@ class ApplyFeatureSetRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> ApplyFeatureSetRequest: ... diff --git a/sdk/python/tests/feast_core_server.py b/sdk/python/tests/feast_core_server.py index fe1ef36967..61688f6504 100644 --- a/sdk/python/tests/feast_core_server.py +++ b/sdk/python/tests/feast_core_server.py @@ -55,31 +55,31 @@ def ListFeatureSets(self, request: ListFeatureSetsRequest, context): def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): feature_set = request.feature_set - if feature_set.version is None: - feature_set.version = 1 + if feature_set.spec.version is None: + feature_set.spec.version = 1 else: - feature_set.version = feature_set.version + 1 + feature_set.spec.version = feature_set.spec.version + 1 - if feature_set.source.type == SourceTypeProto.INVALID: - feature_set.source.kafka_source_config.CopyFrom( + if feature_set.spec.source.type == SourceTypeProto.INVALID: + feature_set.spec.source.kafka_source_config.CopyFrom( KafkaSourceConfigProto(bootstrap_servers="server.com", topic="topic1") ) - feature_set.source.type = SourceTypeProto.KAFKA + feature_set.spec.source.type = SourceTypeProto.KAFKA feature_set_meta = FeatureSetMeta( status=FeatureSetStatus.STATUS_READY, created_timestamp=Timestamp(seconds=10), ) - applied_feature_set = FeatureSetProto(spec=feature_set, meta=feature_set_meta) - self._feature_sets[feature_set.name] = applied_feature_set + applied_feature_set = FeatureSetProto(spec=feature_set.spec, meta=feature_set_meta) + self._feature_sets[feature_set.spec.name] = applied_feature_set _logger.info( "registered feature set " - + feature_set.name + + feature_set.spec.name + " with " - + str(len(feature_set.entities)) + + str(len(feature_set.spec.entities)) + " entities and " - + str(len(feature_set.features)) + + str(len(feature_set.spec.features)) + " features" ) From 438e4ac5e96d0c77104c33114b3b5ba8d44c7c11 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 15:37:51 +0800 Subject: [PATCH 10/12] Erase all traces of jobInfo --- ...InfoRepository.java => JobRepository.java} | 2 +- .../core/job/dataflow/DataflowJobManager.java | 16 +++++----- .../core/service/JobCoordinatorService.java | 12 ++++---- .../service/JobCoordinatorServiceTest.java | 29 ++++++++++--------- 4 files changed, 30 insertions(+), 29 deletions(-) rename core/src/main/java/feast/core/dao/{JobInfoRepository.java => JobRepository.java} (94%) diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobRepository.java similarity index 94% rename from core/src/main/java/feast/core/dao/JobInfoRepository.java rename to core/src/main/java/feast/core/dao/JobRepository.java index b670243d0e..98da76912e 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobRepository.java @@ -25,7 +25,7 @@ /** JPA repository supplying Job objects keyed by ID. */ @Repository -public interface JobInfoRepository extends JpaRepository { +public interface JobRepository extends JpaRepository { List findByStatusNotIn(Collection statuses); List findBySourceIdAndStoreNameOrderByLastUpdatedDesc(String sourceId, String storeName); diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 91761ef18a..92763e7971 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -144,28 +144,28 @@ public void abortJob(String dataflowJobId) { /** * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. * - * @param jobInfo Job containing dataflow job id + * @param job Job containing dataflow job id * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. */ @Override - public JobStatus getJobStatus(Job jobInfo) { - if (!Runner.DATAFLOW.getName().equals(jobInfo.getRunner())) { - return jobInfo.getStatus(); + public JobStatus getJobStatus(Job job) { + if (!Runner.DATAFLOW.getName().equals(job.getRunner())) { + return job.getStatus(); } try { - com.google.api.services.dataflow.model.Job job = + com.google.api.services.dataflow.model.Job dataflowJob = dataflow .projects() .locations() .jobs() - .get(projectId, location, jobInfo.getExtId()) + .get(projectId, location, job.getExtId()) .execute(); - return DataflowJobStateMapper.map(job.getCurrentState()); + return DataflowJobStateMapper.map(dataflowJob.getCurrentState()); } catch (Exception e) { log.error( "Unable to retrieve status of a dataflow job with id : {}\ncause: {}", - jobInfo.getExtId(), + job.getExtId(), e.getMessage()); } return JobStatus.UNKNOWN; diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 2a2bd327e7..76a1cc27dd 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -27,7 +27,7 @@ import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.dao.FeatureSetRepository; -import feast.core.dao.JobInfoRepository; +import feast.core.dao.JobRepository; import feast.core.job.JobManager; import feast.core.job.JobUpdateTask; import feast.core.model.FeatureSet; @@ -56,7 +56,7 @@ public class JobCoordinatorService { private final long POLLING_INTERVAL_MILLISECONDS = 60000; // 1 min - private JobInfoRepository jobInfoRepository; + private JobRepository jobRepository; private FeatureSetRepository featureSetRepository; private SpecService specService; private JobManager jobManager; @@ -64,12 +64,12 @@ public class JobCoordinatorService { @Autowired public JobCoordinatorService( - JobInfoRepository jobInfoRepository, + JobRepository jobRepository, FeatureSetRepository featureSetRepository, SpecService specService, JobManager jobManager, JobUpdatesProperties jobUpdatesProperties) { - this.jobInfoRepository = jobInfoRepository; + this.jobRepository = jobRepository; this.featureSetRepository = featureSetRepository; this.specService = specService; this.jobManager = jobManager; @@ -146,7 +146,7 @@ public void Poll() { try { Job job = ecs.take().get(); if (job != null) { - jobInfoRepository.saveAndFlush(job); + jobRepository.saveAndFlush(job); } } catch (ExecutionException | InterruptedException e) { log.warn("Unable to start or update job: {}", e.getMessage()); @@ -192,7 +192,7 @@ private void updateFeatureSetStatuses(List jobUpdateTasks) { @Transactional public Optional getJob(Source source, Store store) { List jobs = - jobInfoRepository.findBySourceIdAndStoreNameOrderByLastUpdatedDesc( + jobRepository.findBySourceIdAndStoreNameOrderByLastUpdatedDesc( source.getId(), store.getName()); jobs = jobs.stream() diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 033a657ead..5b892d30aa 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -40,7 +40,7 @@ import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.dao.FeatureSetRepository; -import feast.core.dao.JobInfoRepository; +import feast.core.dao.JobRepository; import feast.core.job.JobManager; import feast.core.job.JobMatcher; import feast.core.job.Runner; @@ -59,7 +59,8 @@ public class JobCoordinatorServiceTest { @Rule public final ExpectedException exception = ExpectedException.none(); - @Mock JobInfoRepository jobInfoRepository; + @Mock + JobRepository jobRepository; @Mock JobManager jobManager; @Mock SpecService specService; @Mock FeatureSetRepository featureSetRepository; @@ -78,9 +79,9 @@ public void shouldDoNothingIfNoStoresFound() { when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); JobCoordinatorService jcs = new JobCoordinatorService( - jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); - verify(jobInfoRepository, times(0)).saveAndFlush(any()); + verify(jobRepository, times(0)).saveAndFlush(any()); } @Test @@ -99,9 +100,9 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol .thenReturn(ListFeatureSetsResponse.newBuilder().build()); JobCoordinatorService jcs = new JobCoordinatorService( - jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); - verify(jobInfoRepository, times(0)).saveAndFlush(any()); + verify(jobRepository, times(0)).saveAndFlush(any()); } @Test @@ -135,7 +136,7 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source)) .build(); String extId = "ext"; - ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); + ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); Job expectedInput = new Job( @@ -172,10 +173,10 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep JobCoordinatorService jcs = new JobCoordinatorService( - jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); - verify(jobInfoRepository, times(1)).saveAndFlush(jobInfoArgCaptor.capture()); - Job actual = jobInfoArgCaptor.getValue(); + verify(jobRepository, times(1)).saveAndFlush(jobArgCaptor.capture()); + Job actual = jobArgCaptor.getValue(); assertThat(actual, equalTo(expected)); } @@ -258,7 +259,7 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { feast.core.model.Store.fromProto(store), Arrays.asList(FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); - ArgumentCaptor jobInfoArgCaptor = ArgumentCaptor.forClass(Job.class); + ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); when(specService.listFeatureSets( Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) @@ -276,11 +277,11 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { JobCoordinatorService jcs = new JobCoordinatorService( - jobInfoRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); jcs.Poll(); - verify(jobInfoRepository, times(2)).saveAndFlush(jobInfoArgCaptor.capture()); - List actual = jobInfoArgCaptor.getAllValues(); + verify(jobRepository, times(2)).saveAndFlush(jobArgCaptor.capture()); + List actual = jobArgCaptor.getAllValues(); assertThat(actual.get(0), equalTo(expected1)); assertThat(actual.get(1), equalTo(expected2)); From 05bb8388878df5c2e0505b647948c225c41ca79e Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 15:48:55 +0800 Subject: [PATCH 11/12] Remove status and created timestamp from constructor --- .prow/scripts/test-end-to-end-batch.sh | 4 ++-- .prow/scripts/test-end-to-end.sh | 4 ++-- sdk/python/feast/feature_set.py | 18 ++++++------------ tests/e2e/basic-ingest-redis-serving.py | 10 +++++----- tests/e2e/bq-batch-retrieval.py | 2 -- .../e2e/large_volume/cust_trans_large_fs.yaml | 2 +- 6 files changed, 16 insertions(+), 24 deletions(-) diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index bafb28506c..f25c0720ed 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -76,7 +76,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 15 +sleep 20 tail -n10 /var/log/kafka.log echo " @@ -143,7 +143,7 @@ EOF nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & -sleep 30 +sleep 35 tail -n10 /var/log/feast-core.log echo " ============================================================ diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 9133d31682..f6ebd8c6ee 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -59,7 +59,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 15 +sleep 20 tail -n10 /var/log/kafka.log echo " @@ -126,7 +126,7 @@ EOF nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & -sleep 30 +sleep 35 tail -n10 /var/log/feast-core.log echo " diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index 02c9e90cf2..85d8e13753 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -45,9 +45,7 @@ def __init__( features: List[Feature] = None, entities: List[Entity] = None, source: Source = None, - max_age: Optional[Duration] = None, - status: FeatureSetStatus = None, - created_timestamp: Optional[Timestamp] = None, + max_age: Optional[Duration] = None ): self._name = name self._fields = OrderedDict() # type: Dict[str, Field] @@ -62,8 +60,8 @@ def __init__( self._max_age = max_age self._version = None self._client = None - self._status = status - self._created_timestamp = created_timestamp + self._status = None + self._created_timestamp = None def __eq__(self, other): if not isinstance(other, FeatureSet): @@ -511,15 +509,11 @@ def from_proto(cls, feature_set_proto: FeatureSetProto): None if feature_set_proto.spec.source.type == 0 else Source.from_proto(feature_set_proto.spec.source) - ), - status=( - None - if feature_set_proto.meta.status == 0 - else feature_set_proto.meta.status - ), - created_timestamp=feature_set_proto.meta.created_timestamp, + ) ) feature_set._version = feature_set_proto.spec.version + feature_set._status = feature_set_proto.meta.status + feature_set._created_timestamp = feature_set_proto.meta.created_timestamp return feature_set def to_proto(self) -> FeatureSetProto: diff --git a/tests/e2e/basic-ingest-redis-serving.py b/tests/e2e/basic-ingest-redis-serving.py index 902b0985c1..f674363f36 100644 --- a/tests/e2e/basic-ingest-redis-serving.py +++ b/tests/e2e/basic-ingest-redis-serving.py @@ -70,7 +70,7 @@ def basic_dataframe(): ) -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=10) def test_basic_register_feature_set_success(client): # Load feature set from file @@ -96,7 +96,7 @@ def test_basic_register_feature_set_success(client): ) -@pytest.mark.timeout(45) +@pytest.mark.timeout(300) @pytest.mark.run(order=11) def test_basic_ingest_success(client, basic_dataframe): cust_trans_fs = client.get_feature_set(name="customer_transactions") @@ -202,7 +202,7 @@ def all_types_dataframe(): ) -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=20) def test_all_types_register_feature_set_success(client): all_types_fs_expected = FeatureSet( @@ -246,7 +246,7 @@ def test_all_types_register_feature_set_success(client): ) -@pytest.mark.timeout(45) +@pytest.mark.timeout(300) @pytest.mark.run(order=21) def test_all_types_ingest_success(client, all_types_dataframe): # Get all_types feature set @@ -322,7 +322,7 @@ def large_volume_dataframe(): return customer_data -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=30) def test_large_volume_register_feature_set_success(client): cust_trans_fs_expected = FeatureSet.from_yaml( diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 067dd14a2f..639ca9f559 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -174,11 +174,9 @@ def test_multiple_featureset_joins(client): ) client.apply(fs1) - time.sleep(10) fs1 = client.get_feature_set(name="feature_set_1", version=1) client.apply(fs2) - time.sleep(10) fs2 = client.get_feature_set(name="feature_set_2", version=1) N_ROWS = 10 diff --git a/tests/e2e/large_volume/cust_trans_large_fs.yaml b/tests/e2e/large_volume/cust_trans_large_fs.yaml index 0ea0a12427..54bf4cac28 100644 --- a/tests/e2e/large_volume/cust_trans_large_fs.yaml +++ b/tests/e2e/large_volume/cust_trans_large_fs.yaml @@ -1,6 +1,6 @@ +kind: feature_set spec: name: customer_transactions_large - kind: feature_set entities: - name: customer_id valueType: INT64 From 38f652139f65ba712bc2853476bc328e096b7cc1 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 17 Dec 2019 17:01:51 +0800 Subject: [PATCH 12/12] Remove queue buffer limit --- sdk/python/feast/loaders/ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index 23ba2ecb3b..a59c7c66b1 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -200,8 +200,8 @@ def ingest_table_to_kafka( ): # Push rows onto a queue for the production process to pick up row_queue.put(row) - while row_queue.qsize() > chunk_size: - time.sleep(0.1) + # while row_queue.qsize() > chunk_size: + # time.sleep(0.1) row_queue.put(None) except Exception as ex: _logger.error(f"Exception occurred: {ex}")